Beispiel #1
0
        # We are done as per our max-episode-len.
        if self.max_episode_len is not None and \
                self.steps >= self.max_episode_len:
            done = True
        # Max not reached yet -> Sample done via p_done.
        elif self.p_done > 0.0:
            done = bool(
                np.random.choice([True, False],
                                 p=[self.p_done, 1.0 - self.p_done]))

        return self.observation_space.sample(), \
            float(self.reward_space.sample()), done, {}


# Multi-agent version of the RandomEnv.
RandomMultiAgentEnv = make_multi_agent(lambda c: RandomEnv(c))


# Large observation space "pre-compiled" random env (for testing).
class RandomLargeObsSpaceEnv(RandomEnv):
    def __init__(self, config=None):
        config = config or {}
        config.update(
            {"observation_space": gym.spaces.Box(-1.0, 1.0, (5000, ))})
        super().__init__(config=config)


# Large observation space + cont. actions "pre-compiled" random env
# (for testing).
class RandomLargeObsSpaceEnvContActions(RandomEnv):
    def __init__(self, config=None):
Beispiel #2
0
    @override(CartPoleEnv)
    def reset(self):
        # Reset timestep counter for the new episode.
        self.timesteps = 0
        # Should we crash?
        if np.random.random() < self.p_crash_reset or (
                self.crash_after_n_steps is not None
                and self.crash_after_n_steps == 0):
            raise EnvError(
                "Simulated env crash in `reset()`! Feel free to use any "
                "other exception type here instead.")
        return super().reset()

    @override(CartPoleEnv)
    def step(self, action):
        # Increase timestep counter for the ongoing episode.
        self.timesteps += 1
        # Should we crash?
        if np.random.random() < self.p_crash or (self.crash_after_n_steps
                                                 and self.crash_after_n_steps
                                                 == self.timesteps):
            raise EnvError(
                "Simulated env crash in `step()`! Feel free to use any "
                "other exception type here instead.")
        # No crash.
        return super().step(action)


MultiAgentCartPoleCrashing = make_multi_agent(
    lambda config: CartPoleCrashing(config))