def sample(self): '''Samples a batch from memory of size self.batch_size''' batches = [body.memory.sample(self.batch_size) for body in self.agent.nanflat_body_a] batch = util.concat_dict(batches) util.to_torch_batch(batch) return batch
def sample(self): '''Samples a batch from memory''' batches = [body.memory.sample() for body in self.agent.nanflat_body_a] batch = util.concat_dict(batches) if self.is_episodic: util.to_torch_nested_batch(batch, self.gpu) else: util.to_torch_batch(batch, self.gpu) return batch
def sample(self): '''Samples one batch per environment''' batches = [body.memory.sample(self.batch_size) for body in self.agent.nanflat_body_a] # Package data into pytorch variables for batch_b in batches: util.to_torch_batch(batch_b) batch = {'states': [], 'next_states': []} for b in batches: batch['states'].append(b['states']) batch['next_states'].append(b['next_states']) batch['batches'] = batches return batch
def sample(self): ''' Samples a batch from memory. Note that multitask's bodies are parallelized copies with similar envs, just to get more batch sizes ''' batches = [] for body in self.agent.nanflat_body_a: body_batch = body.memory.sample() # one-hot actions to calc q_targets if body.is_discrete: body_batch['actions'] = util.to_one_hot( body_batch['actions'], body.action_space.high) body_batch = util.to_torch_batch(body_batch, self.net.gpu) batches.append(body_batch) # Concat states at dim=1 for feedforward batch = { 'states': torch.cat([body_batch['states'] for body_batch in batches], dim=1), 'next_states': torch.cat([body_batch['next_states'] for body_batch in batches], dim=1), } # retain body-batches for body-wise q_targets calc batch['body_batches'] = batches return batch
def sample(self): '''Samples a batch from memory of size self.memory_spec['batch_size']''' batch = self.body.memory.sample() if self.normalize_state: batch = policy_util.normalize_states_and_next_states(self.body, batch) batch = util.to_torch_batch(batch, self.net.device, self.body.memory.is_episodic) return batch
def replay_sample(self): '''Samples a batch from memory''' batches = [body.replay_memory.sample() for body in self.agent.nanflat_body_a] batch = util.concat_batches(batches) batch = util.to_torch_batch(batch, self.net.gpu) assert not torch.isnan(batch['states']).any() return batch
def sample(self): '''Samples a batch from memory''' batch = self.body.memory.sample() # this is safe for next_action at done since the calculated act_next_q_preds will be multiplied by (1 - batch['dones']) batch['next_actions'] = np.zeros_like(batch['actions']) batch['next_actions'][:-1] = batch['actions'][1:] batch = util.to_torch_batch(batch, self.net.device, self.body.memory.is_episodic) return batch
def sample(self): # NOTE the purpose of multi-body is to parallelize and get more batch_sizes batches = [body.memory.sample(self.batch_size) for body in self.agent.nanflat_body_a] # Package data into pytorch variables for batch_b in batches: util.to_torch_batch(batch_b) # Concat state combined_states = torch.cat( [batch_b['states'] for batch_b in batches], dim=1) combined_next_states = torch.cat( [batch_b['next_states'] for batch_b in batches], dim=1) batch = {'states': combined_states, 'next_states': combined_next_states} # use recursive packaging to carry sub data batch['batches'] = batches return batch
def replay_sample(self): '''Samples a batch from memory''' batches = [ body.replay_memory.sample() for body in self.agent.nanflat_body_a ] batch = util.concat_batches(batches) batch = util.to_torch_batch(batch, self.net.gpu) assert not torch.isnan(batch['states']).any() return batch
def replay_sample(self): '''Samples a batch from memory''' batch = self.body.replay_memory.sample() if self.normalize_state: batch = policy_util.normalize_states_and_next_states( self.body, batch, episodic_flag=self.body.replay_memory.is_episodic) batch = util.to_torch_batch(batch, self.net.device, self.body.replay_memory.is_episodic) assert not torch.isnan(batch['states']).any(), batch['states'] return batch
def sample(self): '''Samples a batch from memory of size self.memory_spec['batch_size']''' batch = self.body.memory.sample() # one-hot actions to calc q_targets if self.body.is_discrete: batch['actions'] = util.to_one_hot(batch['actions'], self.body.action_space.high) if self.normalize_state: batch = policy_util.normalize_states_and_next_states(self.body, batch) batch = util.to_torch_batch(batch, self.net.device, self.body.memory.is_episodic) return batch
def sample(self): '''Modify the onpolicy sample to also append to replay''' batch = self.body.memory.sample() if self.body.memory.is_episodic: batch = {k: np.concatenate(v) for k, v in batch.items()} # concat episodic memory for idx in range(len(batch['dones'])): tuples = [batch[k][idx] for k in self.body.replay_memory.data_keys] self.body.replay_memory.add_experience(*tuples) batch = util.to_torch_batch(batch, self.net.device, self.body.replay_memory.is_episodic) return batch
def sample(self): '''Modify the onpolicy sample to also append to replay''' batches = [body.memory.sample() for body in self.agent.nanflat_body_a] batch = util.concat_batches(batches) data_keys = self.body.replay_memory.data_keys for idx in range(len(batch['dones'])): tuples = [batch[k][idx] for k in data_keys] self.body.replay_memory.add_experience(*tuples) batch = util.to_torch_batch(batch, self.net.gpu) return batch
def space_sample(self): '''Samples a batch from memory''' batches = [] for body in self.agent.nanflat_body_a: self.body = body batches.append(self.sample()) # set body reference back to default self.body = self.agent.nanflat_body_a[0] batch = util.concat_batches(batches) batch = util.to_torch_batch(batch, self.net.device, self.body.memory.is_episodic) return batch
def space_sample(self): '''Samples a batch per body, which may experience different environment''' batch = {k: [] for k in self.body.memory.data_keys} for body in self.agent.nanflat_body_a: body_batch = body.memory.sample() if self.normalize_state: body_batch = policy_util.normalize_states_and_next_states(body, body_batch) body_batch = util.to_torch_batch(body_batch, self.net.device, body.memory.is_episodic) for k, arr in batch.items(): arr.append(body_batch[k]) return batch
def sample(self): '''Samples a batch from memory of size self.memory_spec['batch_size']''' batches = [] for body in self.agent.nanflat_body_a: body_batch = body.memory.sample() # one-hot actions to calc q_targets if body.is_discrete: body_batch['actions'] = util.to_one_hot(body_batch['actions'], body.action_space.high) batches.append(body_batch) batch = util.concat_batches(batches) batch = util.to_torch_batch(batch, self.net.gpu) return batch
def sample(self): '''Modify the onpolicy sample to also append to replay''' batch = self.body.memory.sample() batch = {k: np.concatenate(v) for k, v in batch.items()} # concat episodic memory batch['rets'] = math_util.calc_returns(batch, self.gamma) for idx in range(len(batch['dones'])): tuples = [batch[k][idx] for k in self.body.replay_memory.data_keys] self.body.replay_memory.add_experience(*tuples) if self.normalize_state: batch = policy_util.normalize_states_and_next_states(self.body, batch) batch = util.to_torch_batch(batch, self.net.device, self.body.replay_memory.is_episodic) return batch
def sample(self): '''Samples a batch from memory''' batches = [body.memory.sample() for body in self.agent.nanflat_body_a] batch = util.concat_dict(batches) if self.is_episodic: util.to_torch_nested_batch(batch, self.gpu) # Add next action to batch batch['actions_onehot'] = [] batch['next_actions'] = [] for acts in batch['actions']: # The next actions are the actions shifted by one time step # For episodic training is does not matter that the action in the last state is set to zero since there is no corresponding next state. The Q target is just the reward received in the terminal state. next_acts = torch.zeros_like(acts) next_acts[:-1] = acts[1:] # Convert actions to one hot (both representations are needed for SARSA) acts_onehot = util.convert_to_one_hot(acts, self.action_dim, self.gpu) batch['actions_onehot'].append(acts_onehot) batch['next_actions'].append(next_acts) # Flatten the batch to train all at once batch = util.concat_episodes(batch) else: util.to_torch_batch(batch, self.gpu) # Batch only useful to train with if it has more than one element # Train function checks for this and skips training if batch is too small if batch['states'].size(0) > 1: batch['next_actions'] = torch.zeros_like(batch['actions']) batch['next_actions'][:-1] = batch['actions'][1:] batch['actions_onehot'] = util.convert_to_one_hot(batch['actions'], self.action_dim, self.gpu) batch_elems = ['states', 'actions', 'actions_onehot', 'rewards', 'dones', 'next_states', 'next_actions'] for k in batch_elems: if batch[k].dim() == 1: batch[k].unsqueeze_(1) # If the last experience in the batch is not terminal the batch has to be shortened by one element since the algorithm does not yet have access to the next action taken for the final experience if batch['dones'].data[-1].int().eq_(0).cpu().numpy()[0]: logger.debug(f'Popping last element') for k in batch_elems: batch[k] = batch[k][:-1] return batch
def sample(self): '''Samples a batch from memory''' batches = [] for body in self.agent.nanflat_body_a: body_batch = body.memory.sample() # one-hot actions to calc q_targets if body.is_discrete: body_batch['one_hot_actions'] = util.to_one_hot(body_batch['actions'], body.action_space.high) batches.append(body_batch) batch = util.concat_batches(batches) # this is safe for next_action at done since the calculated act_next_q_preds will be multiplied by (1 - batch['dones']) batch['next_actions'] = np.zeros_like(batch['actions']) batch['next_actions'][:-1] = batch['actions'][1:] batch = util.to_torch_batch(batch, self.net.gpu) return batch
def sample(self): '''Samples a batch from memory''' batch = self.body.memory.sample() # one-hot actions to calc q_targets if self.body.is_discrete: batch['one_hot_actions'] = util.to_one_hot( batch['actions'], self.body.action_space.high) # this is safe for next_action at done since the calculated act_next_q_preds will be multiplied by (1 - batch['dones']) batch['next_actions'] = np.zeros_like(batch['actions']) batch['next_actions'][:-1] = batch['actions'][1:] if self.normalize_state: batch = policy_util.normalize_states_and_next_states( self.body, batch) batch = util.to_torch_batch(batch, self.net.device, self.body.memory.is_episodic) return batch
def sample(self): '''Samples a batch per body, which may experience different environment''' batches = [] for body in self.agent.nanflat_body_a: body_batch = body.memory.sample() # one-hot actions to calc q_targets if body.is_discrete: body_batch['actions'] = util.to_one_hot(body_batch['actions'], body.action_space.high) body_batch = util.to_torch_batch(body_batch, self.net.gpu) batches.append(body_batch) # collect per body for feedforward to hydra heads batch = { 'states': [body_batch['states'] for body_batch in batches], 'next_states': [body_batch['next_states'] for body_batch in batches], } # retain body-batches for body-wise q_targets calc batch['body_batches'] = batches return batch
def sample(self): ''' Samples a batch from memory. Note that multitask's bodies are parallelized copies with similar envs, just to get more batch sizes ''' batches = [] for body in self.agent.nanflat_body_a: body_batch = body.memory.sample() # one-hot actions to calc q_targets if body.is_discrete: body_batch['actions'] = util.to_one_hot(body_batch['actions'], body.action_space.high) body_batch = util.to_torch_batch(body_batch, self.net.gpu) batches.append(body_batch) # Concat states at dim=1 for feedforward batch = { 'states': torch.cat([body_batch['states'] for body_batch in batches], dim=1), 'next_states': torch.cat([body_batch['next_states'] for body_batch in batches], dim=1), } # retain body-batches for body-wise q_targets calc batch['body_batches'] = batches return batch
def sample(self): '''Samples a batch from memory''' batches = [body.memory.sample() for body in self.agent.nanflat_body_a] batch = util.concat_batches(batches) batch = util.to_torch_batch(batch, self.net.gpu) return batch
def replay_sample(self): '''Samples a batch from memory''' batch = self.body.replay_memory.sample() batch = util.to_torch_batch(batch, self.net.device, self.body.replay_memory.is_episodic) return batch
def sample(self): '''Samples a batch from memory of size self.memory_spec['batch_size']''' batch = self.body.memory.sample() batch = util.to_torch_batch(batch, self.net.device, self.body.memory.is_episodic) return batch