コード例 #1
0
ファイル: dqn.py プロジェクト: tigerneil/SLM-Lab
 def sample(self):
     '''Samples a batch from memory of size self.batch_size'''
     batches = [body.memory.sample(self.batch_size)
                for body in self.agent.nanflat_body_a]
     batch = util.concat_dict(batches)
     util.to_torch_batch(batch)
     return batch
コード例 #2
0
ファイル: actor_critic.py プロジェクト: zimoqingfeng/SLM-Lab
 def sample(self):
     '''Samples a batch from memory'''
     batches = [body.memory.sample() for body in self.agent.nanflat_body_a]
     batch = util.concat_dict(batches)
     if self.is_episodic:
         util.to_torch_nested_batch(batch, self.gpu)
     else:
         util.to_torch_batch(batch, self.gpu)
     return batch
コード例 #3
0
ファイル: dqn.py プロジェクト: tigerneil/SLM-Lab
 def sample(self):
     '''Samples one batch per environment'''
     batches = [body.memory.sample(self.batch_size)
                for body in self.agent.nanflat_body_a]
     # Package data into pytorch variables
     for batch_b in batches:
         util.to_torch_batch(batch_b)
     batch = {'states': [], 'next_states': []}
     for b in batches:
         batch['states'].append(b['states'])
         batch['next_states'].append(b['next_states'])
     batch['batches'] = batches
     return batch
コード例 #4
0
ファイル: dqn.py プロジェクト: kevin83919/SLM-Lab
 def sample(self):
     '''
     Samples a batch from memory.
     Note that multitask's bodies are parallelized copies with similar envs, just to get more batch sizes
     '''
     batches = []
     for body in self.agent.nanflat_body_a:
         body_batch = body.memory.sample()
         # one-hot actions to calc q_targets
         if body.is_discrete:
             body_batch['actions'] = util.to_one_hot(
                 body_batch['actions'], body.action_space.high)
         body_batch = util.to_torch_batch(body_batch, self.net.gpu)
         batches.append(body_batch)
     # Concat states at dim=1 for feedforward
     batch = {
         'states':
         torch.cat([body_batch['states'] for body_batch in batches], dim=1),
         'next_states':
         torch.cat([body_batch['next_states'] for body_batch in batches],
                   dim=1),
     }
     # retain body-batches for body-wise q_targets calc
     batch['body_batches'] = batches
     return batch
コード例 #5
0
 def sample(self):
     '''Samples a batch from memory of size self.memory_spec['batch_size']'''
     batch = self.body.memory.sample()
     if self.normalize_state:
         batch = policy_util.normalize_states_and_next_states(self.body, batch)
     batch = util.to_torch_batch(batch, self.net.device, self.body.memory.is_episodic)
     return batch
コード例 #6
0
ファイル: sil.py プロジェクト: ronald-xie/SLM-Lab
 def replay_sample(self):
     '''Samples a batch from memory'''
     batches = [body.replay_memory.sample() for body in self.agent.nanflat_body_a]
     batch = util.concat_batches(batches)
     batch = util.to_torch_batch(batch, self.net.gpu)
     assert not torch.isnan(batch['states']).any()
     return batch
コード例 #7
0
ファイル: sarsa.py プロジェクト: c-w-m/slm-lab
 def sample(self):
     '''Samples a batch from memory'''
     batch = self.body.memory.sample()
     # this is safe for next_action at done since the calculated act_next_q_preds will be multiplied by (1 - batch['dones'])
     batch['next_actions'] = np.zeros_like(batch['actions'])
     batch['next_actions'][:-1] = batch['actions'][1:]
     batch = util.to_torch_batch(batch, self.net.device, self.body.memory.is_episodic)
     return batch
コード例 #8
0
ファイル: dqn.py プロジェクト: tigerneil/SLM-Lab
 def sample(self):
     # NOTE the purpose of multi-body is to parallelize and get more batch_sizes
     batches = [body.memory.sample(self.batch_size)
                for body in self.agent.nanflat_body_a]
     # Package data into pytorch variables
     for batch_b in batches:
         util.to_torch_batch(batch_b)
     # Concat state
     combined_states = torch.cat(
         [batch_b['states'] for batch_b in batches], dim=1)
     combined_next_states = torch.cat(
         [batch_b['next_states'] for batch_b in batches], dim=1)
     batch = {'states': combined_states,
              'next_states': combined_next_states}
     # use recursive packaging to carry sub data
     batch['batches'] = batches
     return batch
コード例 #9
0
ファイル: sil.py プロジェクト: shlpu/SLM-Lab
 def replay_sample(self):
     '''Samples a batch from memory'''
     batches = [
         body.replay_memory.sample() for body in self.agent.nanflat_body_a
     ]
     batch = util.concat_batches(batches)
     batch = util.to_torch_batch(batch, self.net.gpu)
     assert not torch.isnan(batch['states']).any()
     return batch
コード例 #10
0
ファイル: sil.py プロジェクト: wilson1yan/SLM-Lab
 def replay_sample(self):
     '''Samples a batch from memory'''
     batch = self.body.replay_memory.sample()
     if self.normalize_state:
         batch = policy_util.normalize_states_and_next_states(
             self.body, batch, episodic_flag=self.body.replay_memory.is_episodic)
     batch = util.to_torch_batch(batch, self.net.device, self.body.replay_memory.is_episodic)
     assert not torch.isnan(batch['states']).any(), batch['states']
     return batch
コード例 #11
0
ファイル: dqn.py プロジェクト: xiangshengcn/SLM-Lab
 def sample(self):
     '''Samples a batch from memory of size self.memory_spec['batch_size']'''
     batch = self.body.memory.sample()
     # one-hot actions to calc q_targets
     if self.body.is_discrete:
         batch['actions'] = util.to_one_hot(batch['actions'], self.body.action_space.high)
     if self.normalize_state:
         batch = policy_util.normalize_states_and_next_states(self.body, batch)
     batch = util.to_torch_batch(batch, self.net.device, self.body.memory.is_episodic)
     return batch
コード例 #12
0
ファイル: sil.py プロジェクト: kengz/SLM-Lab
 def sample(self):
     '''Modify the onpolicy sample to also append to replay'''
     batch = self.body.memory.sample()
     if self.body.memory.is_episodic:
         batch = {k: np.concatenate(v) for k, v in batch.items()}  # concat episodic memory
     for idx in range(len(batch['dones'])):
         tuples = [batch[k][idx] for k in self.body.replay_memory.data_keys]
         self.body.replay_memory.add_experience(*tuples)
     batch = util.to_torch_batch(batch, self.net.device, self.body.replay_memory.is_episodic)
     return batch
コード例 #13
0
ファイル: sil.py プロジェクト: shlpu/SLM-Lab
 def sample(self):
     '''Modify the onpolicy sample to also append to replay'''
     batches = [body.memory.sample() for body in self.agent.nanflat_body_a]
     batch = util.concat_batches(batches)
     data_keys = self.body.replay_memory.data_keys
     for idx in range(len(batch['dones'])):
         tuples = [batch[k][idx] for k in data_keys]
         self.body.replay_memory.add_experience(*tuples)
     batch = util.to_torch_batch(batch, self.net.gpu)
     return batch
コード例 #14
0
ファイル: sil.py プロジェクト: ronald-xie/SLM-Lab
 def sample(self):
     '''Modify the onpolicy sample to also append to replay'''
     batches = [body.memory.sample() for body in self.agent.nanflat_body_a]
     batch = util.concat_batches(batches)
     data_keys = self.body.replay_memory.data_keys
     for idx in range(len(batch['dones'])):
         tuples = [batch[k][idx] for k in data_keys]
         self.body.replay_memory.add_experience(*tuples)
     batch = util.to_torch_batch(batch, self.net.gpu)
     return batch
コード例 #15
0
 def space_sample(self):
     '''Samples a batch from memory'''
     batches = []
     for body in self.agent.nanflat_body_a:
         self.body = body
         batches.append(self.sample())
     # set body reference back to default
     self.body = self.agent.nanflat_body_a[0]
     batch = util.concat_batches(batches)
     batch = util.to_torch_batch(batch, self.net.device, self.body.memory.is_episodic)
     return batch
コード例 #16
0
 def space_sample(self):
     '''Samples a batch per body, which may experience different environment'''
     batch = {k: [] for k in self.body.memory.data_keys}
     for body in self.agent.nanflat_body_a:
         body_batch = body.memory.sample()
         if self.normalize_state:
             body_batch = policy_util.normalize_states_and_next_states(body, body_batch)
         body_batch = util.to_torch_batch(body_batch, self.net.device, body.memory.is_episodic)
         for k, arr in batch.items():
             arr.append(body_batch[k])
     return batch
コード例 #17
0
 def sample(self):
     '''Samples a batch from memory of size self.memory_spec['batch_size']'''
     batches = []
     for body in self.agent.nanflat_body_a:
         body_batch = body.memory.sample()
         # one-hot actions to calc q_targets
         if body.is_discrete:
             body_batch['actions'] = util.to_one_hot(body_batch['actions'], body.action_space.high)
         batches.append(body_batch)
     batch = util.concat_batches(batches)
     batch = util.to_torch_batch(batch, self.net.gpu)
     return batch
コード例 #18
0
ファイル: sil.py プロジェクト: wilson1yan/SLM-Lab
 def sample(self):
     '''Modify the onpolicy sample to also append to replay'''
     batch = self.body.memory.sample()
     batch = {k: np.concatenate(v) for k, v in batch.items()}  # concat episodic memory
     batch['rets'] = math_util.calc_returns(batch, self.gamma)
     for idx in range(len(batch['dones'])):
         tuples = [batch[k][idx] for k in self.body.replay_memory.data_keys]
         self.body.replay_memory.add_experience(*tuples)
     if self.normalize_state:
         batch = policy_util.normalize_states_and_next_states(self.body, batch)
     batch = util.to_torch_batch(batch, self.net.device, self.body.replay_memory.is_episodic)
     return batch
コード例 #19
0
 def sample(self):
     '''Samples a batch from memory'''
     batches = [body.memory.sample()
                for body in self.agent.nanflat_body_a]
     batch = util.concat_dict(batches)
     if self.is_episodic:
         util.to_torch_nested_batch(batch, self.gpu)
         # Add next action to batch
         batch['actions_onehot'] = []
         batch['next_actions'] = []
         for acts in batch['actions']:
             # The next actions are the actions shifted by one time step
             # For episodic training is does not matter that the action in the last state is set to zero since there is no corresponding next state. The Q target is just the reward received in the terminal state.
             next_acts = torch.zeros_like(acts)
             next_acts[:-1] = acts[1:]
             # Convert actions to one hot (both representations are needed for SARSA)
             acts_onehot = util.convert_to_one_hot(acts, self.action_dim, self.gpu)
             batch['actions_onehot'].append(acts_onehot)
             batch['next_actions'].append(next_acts)
         # Flatten the batch to train all at once
         batch = util.concat_episodes(batch)
     else:
         util.to_torch_batch(batch, self.gpu)
         # Batch only useful to train with if it has more than one element
         # Train function checks for this and skips training if batch is too small
         if batch['states'].size(0) > 1:
             batch['next_actions'] = torch.zeros_like(batch['actions'])
             batch['next_actions'][:-1] = batch['actions'][1:]
             batch['actions_onehot'] = util.convert_to_one_hot(batch['actions'], self.action_dim, self.gpu)
             batch_elems = ['states', 'actions', 'actions_onehot', 'rewards', 'dones', 'next_states', 'next_actions']
             for k in batch_elems:
                 if batch[k].dim() == 1:
                     batch[k].unsqueeze_(1)
             # If the last experience in the batch is not terminal the batch has to be shortened by one element since the algorithm does not yet have access to the next action taken for the final experience
             if batch['dones'].data[-1].int().eq_(0).cpu().numpy()[0]:
                 logger.debug(f'Popping last element')
                 for k in batch_elems:
                     batch[k] = batch[k][:-1]
     return batch
コード例 #20
0
ファイル: sarsa.py プロジェクト: ronald-xie/SLM-Lab
 def sample(self):
     '''Samples a batch from memory'''
     batches = []
     for body in self.agent.nanflat_body_a:
         body_batch = body.memory.sample()
         # one-hot actions to calc q_targets
         if body.is_discrete:
             body_batch['one_hot_actions'] = util.to_one_hot(body_batch['actions'], body.action_space.high)
         batches.append(body_batch)
     batch = util.concat_batches(batches)
     # this is safe for next_action at done since the calculated act_next_q_preds will be multiplied by (1 - batch['dones'])
     batch['next_actions'] = np.zeros_like(batch['actions'])
     batch['next_actions'][:-1] = batch['actions'][1:]
     batch = util.to_torch_batch(batch, self.net.gpu)
     return batch
コード例 #21
0
ファイル: sarsa.py プロジェクト: dantodor/SLM-Lab
 def sample(self):
     '''Samples a batch from memory'''
     batches = []
     for body in self.agent.nanflat_body_a:
         body_batch = body.memory.sample()
         # one-hot actions to calc q_targets
         if body.is_discrete:
             body_batch['one_hot_actions'] = util.to_one_hot(body_batch['actions'], body.action_space.high)
         batches.append(body_batch)
     batch = util.concat_batches(batches)
     # this is safe for next_action at done since the calculated act_next_q_preds will be multiplied by (1 - batch['dones'])
     batch['next_actions'] = np.zeros_like(batch['actions'])
     batch['next_actions'][:-1] = batch['actions'][1:]
     batch = util.to_torch_batch(batch, self.net.gpu)
     return batch
コード例 #22
0
 def sample(self):
     '''Samples a batch from memory'''
     batch = self.body.memory.sample()
     # one-hot actions to calc q_targets
     if self.body.is_discrete:
         batch['one_hot_actions'] = util.to_one_hot(
             batch['actions'], self.body.action_space.high)
     # this is safe for next_action at done since the calculated act_next_q_preds will be multiplied by (1 - batch['dones'])
     batch['next_actions'] = np.zeros_like(batch['actions'])
     batch['next_actions'][:-1] = batch['actions'][1:]
     if self.normalize_state:
         batch = policy_util.normalize_states_and_next_states(
             self.body, batch)
     batch = util.to_torch_batch(batch, self.net.device,
                                 self.body.memory.is_episodic)
     return batch
コード例 #23
0
 def sample(self):
     '''Samples a batch per body, which may experience different environment'''
     batches = []
     for body in self.agent.nanflat_body_a:
         body_batch = body.memory.sample()
         # one-hot actions to calc q_targets
         if body.is_discrete:
             body_batch['actions'] = util.to_one_hot(body_batch['actions'], body.action_space.high)
         body_batch = util.to_torch_batch(body_batch, self.net.gpu)
         batches.append(body_batch)
     # collect per body for feedforward to hydra heads
     batch = {
         'states': [body_batch['states'] for body_batch in batches],
         'next_states': [body_batch['next_states'] for body_batch in batches],
     }
     # retain body-batches for body-wise q_targets calc
     batch['body_batches'] = batches
     return batch
コード例 #24
0
ファイル: dqn.py プロジェクト: ronald-xie/SLM-Lab
 def sample(self):
     '''
     Samples a batch from memory.
     Note that multitask's bodies are parallelized copies with similar envs, just to get more batch sizes
     '''
     batches = []
     for body in self.agent.nanflat_body_a:
         body_batch = body.memory.sample()
         # one-hot actions to calc q_targets
         if body.is_discrete:
             body_batch['actions'] = util.to_one_hot(body_batch['actions'], body.action_space.high)
         body_batch = util.to_torch_batch(body_batch, self.net.gpu)
         batches.append(body_batch)
     # Concat states at dim=1 for feedforward
     batch = {
         'states': torch.cat([body_batch['states'] for body_batch in batches], dim=1),
         'next_states': torch.cat([body_batch['next_states'] for body_batch in batches], dim=1),
     }
     # retain body-batches for body-wise q_targets calc
     batch['body_batches'] = batches
     return batch
コード例 #25
0
ファイル: actor_critic.py プロジェクト: ronald-xie/SLM-Lab
 def sample(self):
     '''Samples a batch from memory'''
     batches = [body.memory.sample() for body in self.agent.nanflat_body_a]
     batch = util.concat_batches(batches)
     batch = util.to_torch_batch(batch, self.net.gpu)
     return batch
コード例 #26
0
ファイル: reinforce.py プロジェクト: kevin83919/SLM-Lab
 def sample(self):
     '''Samples a batch from memory'''
     batches = [body.memory.sample() for body in self.agent.nanflat_body_a]
     batch = util.concat_batches(batches)
     batch = util.to_torch_batch(batch, self.net.gpu)
     return batch
コード例 #27
0
 def replay_sample(self):
     '''Samples a batch from memory'''
     batch = self.body.replay_memory.sample()
     batch = util.to_torch_batch(batch, self.net.device,
                                 self.body.replay_memory.is_episodic)
     return batch
コード例 #28
0
 def sample(self):
     '''Samples a batch from memory of size self.memory_spec['batch_size']'''
     batch = self.body.memory.sample()
     batch = util.to_torch_batch(batch, self.net.device,
                                 self.body.memory.is_episodic)
     return batch