def sample(self): '''Samples a batch from memory''' batch = self.body.memory.sample() # this is safe for next_action at done since the calculated act_next_q_preds will be multiplied by (1 - batch['dones']) batch['next_actions'] = np.zeros_like(batch['actions']) batch['next_actions'][:-1] = batch['actions'][1:] batch = util.to_torch_batch(batch, self.net.device, self.body.memory.is_episodic) return batch
def space_sample(self): '''Samples a batch from memory''' batches = [] for body in self.agent.nanflat_body_a: self.body = body batches.append(self.sample()) # set body reference back to default self.body = self.agent.nanflat_body_a[0] batch = util.concat_batches(batches) batch = util.to_torch_batch(batch, self.net.device, self.body.memory.is_episodic) return batch
def sample(self): '''Modify the onpolicy sample to also append to replay''' batch = self.body.memory.sample() batch = {k: np.concatenate(v) for k, v in batch.items()} # concat episodic memory for idx in range(len(batch['dones'])): tuples = [batch[k][idx] for k in self.body.replay_memory.data_keys] self.body.replay_memory.add_experience(*tuples) batch = util.to_torch_batch(batch, self.net.device, self.body.replay_memory.is_episodic) return batch
def warmup_sample(self): '''Samples a batch from warm-up memory''' batch = self.body.warmup_memory.sample() batch = util.to_torch_batch(batch, self.net.device, self.body.warmup_memory.is_episodic) return batch
def sample(self): '''Samples a batch from memory of size self.memory_spec['batch_size']''' batch = self.body.memory.sample() batch = util.to_torch_batch(batch, self.net.device, self.body.memory.is_episodic) return batch
def replay_sample(self): '''Samples a batch from memory''' batch = self.body.replay_memory.sample() batch = util.to_torch_batch(batch, self.net.device, self.body.replay_memory.is_episodic) return batch