Ejemplo n.º 1
0
 def _get_trajectories(self, start_indices):
     trajs = super()._get_trajectories(start_indices)
     trajs['target_numbers'] = subsequences(
         self._target_numbers,
         start_indices,
         self._subtraj_length,
     )
     trajs['times'] = subsequences(
         self._times,
         start_indices,
         self._subtraj_length,
     )
     return trajs
Ejemplo n.º 2
0
 def _get_trajectories(self, start_indices):
     next_memories = subsequences(self._memories,
                                  start_indices,
                                  self._subtraj_length,
                                  start_offset=1)
     return dict(
         env_obs=subsequences(self._env_obs, start_indices,
                              self._subtraj_length),
         env_actions=subsequences(self._env_actions, start_indices,
                                  self._subtraj_length),
         next_env_obs=subsequences(self._env_obs,
                                   start_indices,
                                   self._subtraj_length,
                                   start_offset=1),
         memories=subsequences(self._memories, start_indices,
                               self._subtraj_length),
         writes=next_memories,
         next_memories=next_memories,
         rewards=subsequences(self._rewards, start_indices,
                              self._subtraj_length),
         terminals=subsequences(self._terminals, start_indices,
                                self._subtraj_length),
         dloss_dwrites=subsequences(self._dloss_dmemories,
                                    start_indices,
                                    self._subtraj_length,
                                    start_offset=1),
     )
 def _get_trajectories(self, start_indices):
     return dict(
         observations=subsequences(self._observations, start_indices,
                                   self._subtraj_length),
         actions=subsequences(self._actions, start_indices,
                              self._subtraj_length),
         next_observations=subsequences(self._observations,
                                        start_indices,
                                        self._subtraj_length,
                                        start_offset=1),
         rewards=subsequences(self._rewards, start_indices,
                              self._subtraj_length),
         terminals=subsequences(self._terminals, start_indices,
                                self._subtraj_length),
     )
Ejemplo n.º 4
0
 def test_subsequences(self):
     M = np.array([
         [0, 1],
         [2, 3],
         [4, 5],
         [6, 7],
     ])
     start_indices = [0, 1, 2]
     length = 2
     subsequences = np_util.subsequences(M, start_indices, length)
     expected = np.array([
         [
             [0, 1],
             [2, 3],
         ],
         [
             [2, 3],
             [4, 5],
         ],
         [
             [4, 5],
             [6, 7],
         ],
     ])
     self.assertNpEqual(subsequences, expected)