Exemple #1
0
    def step(self, action):
        if self.is_closed():
            if self._episode_counter >= self._max_episodes:
                raise ClosedEnvironmentError(
                    f"Env reached max number of episodes ({self._max_episodes})"
                )
            raise ClosedEnvironmentError("Can't step through closed env.")

        obs, reward, done, info = super().step(action)

        if self.is_vectorized:
            # BUG: This can be reached while in the last 'send' (which uses self.send)
            # of the previous epoch while iterating
            if any(done) and self._episode_counter >= self.max_episodes:
                logger.info(
                    f"Closing the envs since we reached the max number of episodes."
                )
                self.close()
                done[:] = True
        else:
            if done and self._episode_counter == self._max_episodes:
                logger.info(
                    f"Closing the env since we reached the max number of episodes."
                )
                self.close()

        return obs, reward, done, info
Exemple #2
0
    def step(self, action):
        if self._is_closed:
            if self._obs_counter >= self._max_obs:
                raise ClosedEnvironmentError(f"Env reached max number of observations ({self._max_obs})")
            raise ClosedEnvironmentError("Can't step through closed env.")

        obs, reward, done, info = self.env.step(action)

        self._obs_counter += self.env.num_envs if self.is_vectorized else 1
        logger.debug(f"(observation {self._obs_counter}/{self._max_obs})")

        # BUG: If we dont use >=, then iteration with EnvDataset doesn't work.
        if self._obs_counter >= self._max_obs:
            self.close()

        return obs, reward, done, info
Exemple #3
0
    def reset(self):
        if self._is_closed:
            if self._obs_counter >= self._max_obs:
                raise ClosedEnvironmentError(f"Env reached max number of observations ({self._max_obs})")
            raise ClosedEnvironmentError("Can't step through closed env.")

        # Resetting actually gives you an observation, so we count it here.
        self._obs_counter += self.env.num_envs if self.is_vectorized else 1
        logger.debug(f"(observation {self._obs_counter}/{self._max_obs})")
        
        obs = self.env.reset()

        if self._obs_counter >= self._max_obs:
            self.close()

        return obs
    def step(self, action):
        if self._action_counter >= self._max_obs:
            raise ClosedEnvironmentError(f"Env reached max number of actions ({self._max_steps})")

        obs, reward, done, info = super().step(action)
        logger.debug(f"(step {self._action_counter}/{self._max_steps})")

        # BUG: If we dont use >=, then iteration with EnvDataset doesn't work.
        if self._action_counter >= self._max_steps:
            self.close()

        return obs, reward, done, info
Exemple #5
0
    def reset(self):
        obs = super().reset()

        if self._episode_counter >= self._max_episodes:
            raise ClosedEnvironmentError(
                f"Env reached max number of episodes ({self._max_episodes})")

        if self.is_vectorized:
            if not self._initial_reset:
                self._initial_reset = True
                self._episode_counter = 0
            else:
                # Resetting all envs.
                n_unfinished_envs: int = (self._done == False).sum()
                self._episode_counter += n_unfinished_envs
                self._done[:] = False
        else:
            # Increment every time for non-vectorized env, or just once for
            # VectorEnvs.
            self._episode_counter += 1

        return obs
 def _assert_is_running(self):
     if self.closed:
         raise ClosedEnvironmentError("Trying to operate on `{0}`, after a "
                                      "call to `close()`.".format(
                                          type(self).__name__))
Exemple #7
0
 def _assert_is_running(self):
     if self.closed:
         raise ClosedEnvironmentError(
             f"Trying to operate on `{type(self).__name__}`, after a call to `close()`."
         )