예제 #1
0
def test_gray_scale_observation(env_id, keep_dim):
    gray_env = AtariPreprocessing(gym.make(env_id),
                                  screen_size=84,
                                  grayscale_obs=True)
    rgb_env = AtariPreprocessing(gym.make(env_id),
                                 screen_size=84,
                                 grayscale_obs=False)
    wrapped_env = GrayScaleObservation(rgb_env, keep_dim=keep_dim)
    assert rgb_env.observation_space.shape[-1] == 3

    seed = 0
    gray_env.seed(seed)
    wrapped_env.seed(seed)

    gray_obs = gray_env.reset()
    wrapped_obs = wrapped_env.reset()

    if keep_dim:
        assert wrapped_env.observation_space.shape[-1] == 1
        assert len(wrapped_obs.shape) == 3
        wrapped_obs = wrapped_obs.squeeze(-1)
    else:
        assert len(wrapped_env.observation_space.shape) == 2
        assert len(wrapped_obs.shape) == 2

    # ALE gray scale is slightly different, but no more than by one shade
    assert np.allclose(gray_obs.astype("int32"),
                       wrapped_obs.astype("int32"),
                       atol=1)
예제 #2
0
파일: breakout.py 프로젝트: LukeWood/MuZero
class Breakout(AbstractGame):
    """The Gym CartPole environment"""
    def __init__(self, discount: float):
        super().__init__(discount)
        self.env = gym.make('BreakoutDeterministic-v4')
        self.env = ResizeObservation(self.env, shape=(84, 84))
        self.env = GrayScaleObservation(self.env, keep_dim=True)
        self.actions = list(
            map(lambda i: Action(i), range(self.env.action_space.n)))
        self.observations = [self.env.reset()]
        self.done = False

    @property
    def action_space_size(self) -> int:
        """Return the size of the action space."""
        return len(self.actions)

    def step(self, action) -> int:
        """Execute one step of the game conditioned by the given action."""

        observation, reward, done, _ = self.env.step(action.index)
        self.observations += [observation]
        self.done = done
        return reward

    def terminal(self) -> bool:
        """Is the game is finished?"""
        return self.done

    def legal_actions(self) -> List[Action]:
        """Return the legal actions available at this instant."""
        return self.actions

    def make_image(self, state_index: int):
        """Compute the state of the game."""
        return self.observations[state_index]
예제 #3
0
    outdir = '/tmp/random-agent-results'
    env = wrappers.Monitor(env, directory=outdir, force=True)
    env.seed(0)

    mem = Memory()

    episode_count = 50
    reward = 0
    done = False

    rewardPerEp = numpy.array([])
    ep = numpy.array([])
    interaction = numpy.array([])

    for i in range(episode_count):
        env.reset()
        actual_state, reward, done, info = env.step(env.action_space.sample())

        nbInteraction = 0
        sumReward = 0

        while True:
            nbInteraction += 1
            # Choose the action to do
            action = agent.act(actual_state, epsilon_start)

            # Calculate next state, reward and if the episode is finished or not
            next_state, reward, done, _ = env.step(action)

            sumReward += reward