Example #1
0
 def sample_data(self, indices, encoder=False):
     ''' sample data from replay buffers to construct a training meta-batch '''
     # collect data from multiple tasks for the meta-batch
     obs, actions, rewards, next_obs, terms = [], [], [], [], []
     for idx in indices:
         if encoder:
             batch = ptu.np_to_pytorch_batch(
                 self.enc_replay_buffer.random_batch(
                     idx,
                     batch_size=self.embedding_batch_size,
                     sequence=self.recurrent))
         else:
             batch = ptu.np_to_pytorch_batch(
                 self.replay_buffer.random_batch(
                     idx, batch_size=self.batch_size))
         o = batch['observations'][None, ...]
         a = batch['actions'][None, ...]
         if encoder and self.sparse_rewards:
             # in sparse reward settings, only the encoder is trained with sparse reward
             r = batch['sparse_rewards'][None, ...]
         else:
             r = batch['rewards'][None, ...]
         no = batch['next_observations'][None, ...]
         t = batch['terminals'][None, ...]
         obs.append(o)
         actions.append(a)
         rewards.append(r)
         next_obs.append(no)
         terms.append(t)
     obs = torch.cat(obs, dim=0)
     actions = torch.cat(actions, dim=0)
     rewards = torch.cat(rewards, dim=0)
     next_obs = torch.cat(next_obs, dim=0)
     terms = torch.cat(terms, dim=0)
     return [obs, actions, rewards, next_obs, terms]
Example #2
0
 def sample_context(self, indices):
     ''' sample batch of context from a list of tasks from the replay buffer '''
     # make method work given a single task index
     if not hasattr(indices, '__iter__'):
         indices = [indices]
     # what do these batches contain tho?
     batches = [
         ptu.np_to_pytorch_batch(
             self.enc_replay_buffer.random_batch(
                 idx,
                 batch_size=self.embedding_batch_size,
                 sequence=self.recurrent)) for idx in indices
     ]
     # context <- batches
     context = [
         self.unpack_batch(batch, sparse_reward=self.sparse_rewards)
         for batch in batches
     ]
     # group like elements together
     # unpack the whole thing
     context = [[x[i] for x in context] for i in range(len(context[0]))]
     # concatenate the whole thing
     context = [torch.cat(x, dim=0) for x in context]
     # full context consists of [obs, act, rewards, next_obs, terms]
     # if dynamics don't change across tasks, don't include next_obs
     # don't include terminals in context
     if self.use_next_obs_in_context:
         context = torch.cat(context[:-1], dim=2)
     else:
         context = torch.cat(context[:-2], dim=2)
     return context
Example #3
0
 def prepare_context(self, idx):
     ''' sample context from replay buffer and prepare it '''
     batch = ptu.np_to_pytorch_batch(self.enc_replay_buffer.random_batch(idx, batch_size=self.embedding_batch_size, sequence=self.recurrent))
     obs = batch['observations'][None, ...]
     act = batch['actions'][None, ...]
     rewards = batch['rewards'][None, ...]
     context = self.prepare_encoder_data(obs, act, rewards)
     return context
Example #4
0
 def get_samples(self):
     batch = ptu.np_to_pytorch_batch(self.replay_buffer.random_batch(self.batch_size))
     o = batch['observations'][None, ...]
     a = batch['actions'][None, ...]
     r = batch['rewards'][None, ...]
     no = batch['next_observations'][None, ...]
     t = batch['terminals'][None, ...]
     return o, a, r, no, t
Example #5
0
 def sample_sac(self, indices):
     ''' sample batch of training data from a list of tasks for training the actor-critic '''
     # this batch consists of transitions sampled randomly from replay buffer
     # rewards are always dense
     batches = [ptu.np_to_pytorch_batch(self.replay_buffer.random_batch(idx, batch_size=self.batch_size)) for idx in indices]
     unpacked = [self.unpack_batch(batch) for batch in batches]
     # group like elements together
     unpacked = [[x[i] for x in unpacked] for i in range(len(unpacked[0]))]
     unpacked = [torch.cat(x, dim=0) for x in unpacked]
     return unpacked
Example #6
0
 def sample_low_level(self):
     '''Sample batch of low level interactions
     In the form of ([state,goal], primitive action,parameterized reward, next state)'''
     batch = ptu.np_to_pytorch_batch(
         self.low_buffer.random_batch(batch_size=self.low_batch_size))
     unpacked = [self.unpack_batch(batch)]  #puts it into format [o,a,r,s,d]
     # group like elements together
     unpacked = [[x[i][0] for x in unpacked]
                 for i in range(len(unpacked[0]))]
     unpacked = [torch.cat(x, dim=0) for x in unpacked]
     return unpacked
Example #7
0
 def sample_high_level(self, indices):
     '''Sample batch of high level interactions
     In the form of (Original state,Goal given,Reward received,State achieved)'''
     batches = [
         ptu.np_to_pytorch_batch(
             self.high_buffer.random_batch(idx,
                                           batch_size=self.high_batch_size))
         for idx in indices
     ]
     unpacked = [self.unpack_batch(batch) for batch in batches]
     # group like elements together
     unpacked = [[x[i] for x in unpacked] for i in range(len(unpacked[0]))]
     unpacked = [torch.cat(x, dim=0) for x in unpacked]
     return unpacked