Beispiel #1
0
 def reset(self, **kwargs):
     if (self._init_reset and self.error_on_premature_reset
             and self.n_transitions > 0):  # noqa: E127
         raise RuntimeError(
             "BufferingWrapper reset() before samples were accessed")
     self._init_reset = True
     self.n_transitions = 0
     obs = self.venv.reset(**kwargs)
     self._traj_accum = rollout.TrajectoryAccumulator()
     for i, ob in enumerate(obs):
         self._traj_accum.add_step({"obs": ob}, key=i)
     return obs
Beispiel #2
0
    def reset(self) -> np.ndarray:
        """Resets the environment.

        Returns:
            obs: first observation of a new trajectory.
        """
        self.traj_accum = rollout.TrajectoryAccumulator()
        obs = self.env.reset()
        self._last_obs = obs
        self.traj_accum.add_step({"obs": obs})
        self._done_before = False
        self._is_reset = True
        return obs