def step(self, action): if self.is_closed(): if self._episode_counter >= self._max_episodes: raise ClosedEnvironmentError( f"Env reached max number of episodes ({self._max_episodes})" ) raise ClosedEnvironmentError("Can't step through closed env.") obs, reward, done, info = super().step(action) if self.is_vectorized: # BUG: This can be reached while in the last 'send' (which uses self.send) # of the previous epoch while iterating if any(done) and self._episode_counter >= self.max_episodes: logger.info( f"Closing the envs since we reached the max number of episodes." ) self.close() done[:] = True else: if done and self._episode_counter == self._max_episodes: logger.info( f"Closing the env since we reached the max number of episodes." ) self.close() return obs, reward, done, info
def step(self, action): if self._is_closed: if self._obs_counter >= self._max_obs: raise ClosedEnvironmentError(f"Env reached max number of observations ({self._max_obs})") raise ClosedEnvironmentError("Can't step through closed env.") obs, reward, done, info = self.env.step(action) self._obs_counter += self.env.num_envs if self.is_vectorized else 1 logger.debug(f"(observation {self._obs_counter}/{self._max_obs})") # BUG: If we dont use >=, then iteration with EnvDataset doesn't work. if self._obs_counter >= self._max_obs: self.close() return obs, reward, done, info
def reset(self): if self._is_closed: if self._obs_counter >= self._max_obs: raise ClosedEnvironmentError(f"Env reached max number of observations ({self._max_obs})") raise ClosedEnvironmentError("Can't step through closed env.") # Resetting actually gives you an observation, so we count it here. self._obs_counter += self.env.num_envs if self.is_vectorized else 1 logger.debug(f"(observation {self._obs_counter}/{self._max_obs})") obs = self.env.reset() if self._obs_counter >= self._max_obs: self.close() return obs
def step(self, action): if self._action_counter >= self._max_obs: raise ClosedEnvironmentError(f"Env reached max number of actions ({self._max_steps})") obs, reward, done, info = super().step(action) logger.debug(f"(step {self._action_counter}/{self._max_steps})") # BUG: If we dont use >=, then iteration with EnvDataset doesn't work. if self._action_counter >= self._max_steps: self.close() return obs, reward, done, info
def reset(self): obs = super().reset() if self._episode_counter >= self._max_episodes: raise ClosedEnvironmentError( f"Env reached max number of episodes ({self._max_episodes})") if self.is_vectorized: if not self._initial_reset: self._initial_reset = True self._episode_counter = 0 else: # Resetting all envs. n_unfinished_envs: int = (self._done == False).sum() self._episode_counter += n_unfinished_envs self._done[:] = False else: # Increment every time for non-vectorized env, or just once for # VectorEnvs. self._episode_counter += 1 return obs
def _assert_is_running(self): if self.closed: raise ClosedEnvironmentError("Trying to operate on `{0}`, after a " "call to `close()`.".format( type(self).__name__))
def _assert_is_running(self): if self.closed: raise ClosedEnvironmentError( f"Trying to operate on `{type(self).__name__}`, after a call to `close()`." )