def reset(self, **kwargs): if (self._init_reset and self.error_on_premature_reset and self.n_transitions > 0): # noqa: E127 raise RuntimeError( "BufferingWrapper reset() before samples were accessed") self._init_reset = True self.n_transitions = 0 obs = self.venv.reset(**kwargs) self._traj_accum = rollout.TrajectoryAccumulator() for i, ob in enumerate(obs): self._traj_accum.add_step({"obs": ob}, key=i) return obs
def reset(self) -> np.ndarray: """Resets the environment. Returns: obs: first observation of a new trajectory. """ self.traj_accum = rollout.TrajectoryAccumulator() obs = self.env.reset() self._last_obs = obs self.traj_accum.add_step({"obs": obs}) self._done_before = False self._is_reset = True return obs