def space_sample(self): ''' Samples a batch from memory. Note that multitask's bodies are parallelized copies with similar envs, just to get more batch sizes ''' batches = [] for body in self.agent.nanflat_body_a: body_batch = body.memory.sample() # one-hot actions to calc q_targets if body.is_discrete: body_batch['actions'] = util.to_one_hot( body_batch['actions'], body.action_space.high) if self.normalize_state: body_batch = policy_util.normalize_states_and_next_states( body, body_batch) body_batch = util.to_torch_batch(body_batch, self.net.device, body.memory.is_episodic) batches.append(body_batch) # Concat states at dim=1 for feedforward batch = { 'states': torch.cat([body_batch['states'] for body_batch in batches], dim=1), 'next_states': torch.cat([body_batch['next_states'] for body_batch in batches], dim=1), } # retain body-batches for body-wise q_targets calc batch['body_batches'] = batches return batch
def sample(self): '''Samples a batch from memory of size self.memory_spec['batch_size']''' batch = self.body.memory.sample() if self.normalize_state: batch = policy_util.normalize_states_and_next_states(self.body, batch) batch = util.to_torch_batch(batch, self.net.device, self.body.memory.is_episodic) return batch
def replay_sample(self): '''Samples a batch from memory''' batch = self.body.replay_memory.sample() if self.normalize_state: batch = policy_util.normalize_states_and_next_states( self.body, batch, episodic_flag=self.body.replay_memory.is_episodic) batch = util.to_torch_batch(batch, self.net.device, self.body.replay_memory.is_episodic) assert not torch.isnan(batch['states']).any(), batch['states'] return batch
def sample(self): '''Samples a batch from memory of size self.memory_spec['batch_size']''' batch = self.body.memory.sample() # one-hot actions to calc q_targets if self.body.is_discrete: batch['actions'] = util.to_one_hot(batch['actions'], self.body.action_space.high) if self.normalize_state: batch = policy_util.normalize_states_and_next_states(self.body, batch) batch = util.to_torch_batch(batch, self.net.device, self.body.memory.is_episodic) return batch
def space_sample(self): '''Samples a batch per body, which may experience different environment''' batch = {k: [] for k in self.body.memory.data_keys} for body in self.agent.nanflat_body_a: body_batch = body.memory.sample() if self.normalize_state: body_batch = policy_util.normalize_states_and_next_states(body, body_batch) body_batch = util.to_torch_batch(body_batch, self.net.device, body.memory.is_episodic) for k, arr in batch.items(): arr.append(body_batch[k]) return batch
def sample(self): '''Modify the onpolicy sample to also append to replay''' batch = self.body.memory.sample() batch = {k: np.concatenate(v) for k, v in batch.items()} # concat episodic memory batch['rets'] = math_util.calc_returns(batch, self.gamma) for idx in range(len(batch['dones'])): tuples = [batch[k][idx] for k in self.body.replay_memory.data_keys] self.body.replay_memory.add_experience(*tuples) if self.normalize_state: batch = policy_util.normalize_states_and_next_states(self.body, batch) batch = util.to_torch_batch(batch, self.net.device, self.body.replay_memory.is_episodic) return batch
def sample(self): '''Samples a batch from memory''' batch = self.body.memory.sample() # this is safe for next_action at done since the calculated act_next_q_preds will be multiplied by (1 - batch['dones']) batch['next_actions'] = np.zeros_like(batch['actions']) batch['next_actions'][:-1] = batch['actions'][1:] if self.normalize_state: batch = policy_util.normalize_states_and_next_states( self.body, batch) batch = util.to_torch_batch(batch, self.net.device, self.body.memory.is_episodic) return batch
def space_sample(self): '''Samples a batch per body, which may experience different environment''' batches = [] for body in self.agent.nanflat_body_a: body_batch = body.memory.sample() # one-hot actions to calc q_targets if body.is_discrete: body_batch['actions'] = util.to_one_hot( body_batch['actions'], body.action_space.high) if self.normalize_state: body_batch = policy_util.normalize_states_and_next_states( body, body_batch) body_batch = util.to_torch_batch(body_batch, self.net.device, body.memory.is_episodic) batches.append(body_batch) # collect per body for feedforward to hydra heads batch = { 'states': [body_batch['states'] for body_batch in batches], 'next_states': [body_batch['next_states'] for body_batch in batches], } # retain body-batches for body-wise q_targets calc batch['body_batches'] = batches return batch