Esempio n. 1
0
 def get_data(self, idxs, horizon=9):
     sample = dict()
     sample['obs_seq'] = np.asarray(rolling_window(self.data['observs'], horizon+1)[idxs]).astype(np.float32) 
     sample['rew_seq'] = np.asarray(rolling_window(self.data['rews'], horizon+1)[idxs]).astype(np.float32) 
     sample['done_seq'] = np.asarray(rolling_window(self.data['dones'], horizon+1)[idxs]).astype(np.float32) 
     sample['action_seq'] = np.asarray(rolling_window(self.data['actions'], horizon+1)[idxs]).astype(np.float32) 
     return sample
Esempio n. 2
0
 def sample(self, batch_size=256, horizon=9):
     idxs = np.random.randint(0, len(self.data['dones'])-(horizon+1), size=(batch_size,))
     sample = dict()
     # Roll for sequence of steps in game to train on
     sample['obs_seq'] = np.asarray(rolling_window(self.data['observs'], horizon+1)[idxs]).astype(np.float32) 
     sample['rew_seq'] = np.asarray(rolling_window(self.data['rews'], horizon+1)[idxs]).astype(np.float32) 
     sample['done_seq'] = np.asarray(rolling_window(self.data['dones'], horizon+1)[idxs]).astype(np.float32) 
     sample['action_seq'] = np.asarray(rolling_window(self.data['actions'], horizon+1)[idxs]).astype(np.float32) 
     return sample
Esempio n. 3
0
 def priority_sample(self, batch_size=256, horizon=9):
     """
     Samples data points that contain rewards. Potentially 
     returns a batch of data smaller than the batch size.
     """
     event_idxs = (self.data['rews']!=0)
     idxs = np.random.permutation(event_idxs.sum())[:batch_size]
     sample = dict()
     # Roll for sequence of steps in game to train on
     sample['obs_seq'] = np.asarray(rolling_window(self.data['observs'], horizon+1)[event_idxs][idxs]).astype(np.float32) 
     sample['rew_seq'] = np.asarray(rolling_window(self.data['rews'], horizon+1)[event_idxs][idxs]).astype(np.float32) 
     sample['done_seq'] = np.asarray(rolling_window(self.data['dones'], horizon+1)[event_idxs][idxs]).astype(np.float32) 
     sample['action_seq'] = np.asarray(rolling_window(self.data['actions'], horizon+1)[event_idxs][idxs]).astype(np.float32) 
     return sample