def _get_trajectories(self, start_indices): trajs = super()._get_trajectories(start_indices) trajs['target_numbers'] = subsequences( self._target_numbers, start_indices, self._subtraj_length, ) trajs['times'] = subsequences( self._times, start_indices, self._subtraj_length, ) return trajs
def _get_trajectories(self, start_indices): next_memories = subsequences(self._memories, start_indices, self._subtraj_length, start_offset=1) return dict( env_obs=subsequences(self._env_obs, start_indices, self._subtraj_length), env_actions=subsequences(self._env_actions, start_indices, self._subtraj_length), next_env_obs=subsequences(self._env_obs, start_indices, self._subtraj_length, start_offset=1), memories=subsequences(self._memories, start_indices, self._subtraj_length), writes=next_memories, next_memories=next_memories, rewards=subsequences(self._rewards, start_indices, self._subtraj_length), terminals=subsequences(self._terminals, start_indices, self._subtraj_length), dloss_dwrites=subsequences(self._dloss_dmemories, start_indices, self._subtraj_length, start_offset=1), )
def _get_trajectories(self, start_indices): return dict( observations=subsequences(self._observations, start_indices, self._subtraj_length), actions=subsequences(self._actions, start_indices, self._subtraj_length), next_observations=subsequences(self._observations, start_indices, self._subtraj_length, start_offset=1), rewards=subsequences(self._rewards, start_indices, self._subtraj_length), terminals=subsequences(self._terminals, start_indices, self._subtraj_length), )
def test_subsequences(self): M = np.array([ [0, 1], [2, 3], [4, 5], [6, 7], ]) start_indices = [0, 1, 2] length = 2 subsequences = np_util.subsequences(M, start_indices, length) expected = np.array([ [ [0, 1], [2, 3], ], [ [2, 3], [4, 5], ], [ [4, 5], [6, 7], ], ]) self.assertNpEqual(subsequences, expected)