コード例 #1
0
 def sample(self):
     '''Samples a batch from memory'''
     batch = self.body.memory.sample()
     # this is safe for next_action at done since the calculated act_next_q_preds will be multiplied by (1 - batch['dones'])
     batch['next_actions'] = np.zeros_like(batch['actions'])
     batch['next_actions'][:-1] = batch['actions'][1:]
     batch = util.to_torch_batch(batch, self.net.device,
                                 self.body.memory.is_episodic)
     return batch
コード例 #2
0
ファイル: base.py プロジェクト: sherlock1987/Dp-without-Adv
 def space_sample(self):
     '''Samples a batch from memory'''
     batches = []
     for body in self.agent.nanflat_body_a:
         self.body = body
         batches.append(self.sample())
     # set body reference back to default
     self.body = self.agent.nanflat_body_a[0]
     batch = util.concat_batches(batches)
     batch = util.to_torch_batch(batch, self.net.device, self.body.memory.is_episodic)
     return batch
コード例 #3
0
 def sample(self):
     '''Modify the onpolicy sample to also append to replay'''
     batch = self.body.memory.sample()
     batch = {k: np.concatenate(v)
              for k, v in batch.items()}  # concat episodic memory
     for idx in range(len(batch['dones'])):
         tuples = [batch[k][idx] for k in self.body.replay_memory.data_keys]
         self.body.replay_memory.add_experience(*tuples)
     batch = util.to_torch_batch(batch, self.net.device,
                                 self.body.replay_memory.is_episodic)
     return batch
コード例 #4
0
 def warmup_sample(self):
     '''Samples a batch from warm-up memory'''
     batch = self.body.warmup_memory.sample()
     batch = util.to_torch_batch(batch, self.net.device,
                                 self.body.warmup_memory.is_episodic)
     return batch
コード例 #5
0
 def sample(self):
     '''Samples a batch from memory of size self.memory_spec['batch_size']'''
     batch = self.body.memory.sample()
     batch = util.to_torch_batch(batch, self.net.device,
                                 self.body.memory.is_episodic)
     return batch
コード例 #6
0
 def replay_sample(self):
     '''Samples a batch from memory'''
     batch = self.body.replay_memory.sample()
     batch = util.to_torch_batch(batch, self.net.device,
                                 self.body.replay_memory.is_episodic)
     return batch