def test_gray_scale_observation(env_id, keep_dim): gray_env = AtariPreprocessing(gym.make(env_id), screen_size=84, grayscale_obs=True) rgb_env = AtariPreprocessing(gym.make(env_id), screen_size=84, grayscale_obs=False) wrapped_env = GrayScaleObservation(rgb_env, keep_dim=keep_dim) assert rgb_env.observation_space.shape[-1] == 3 seed = 0 gray_env.seed(seed) wrapped_env.seed(seed) gray_obs = gray_env.reset() wrapped_obs = wrapped_env.reset() if keep_dim: assert wrapped_env.observation_space.shape[-1] == 1 assert len(wrapped_obs.shape) == 3 wrapped_obs = wrapped_obs.squeeze(-1) else: assert len(wrapped_env.observation_space.shape) == 2 assert len(wrapped_obs.shape) == 2 # ALE gray scale is slightly different, but no more than by one shade assert np.allclose(gray_obs.astype("int32"), wrapped_obs.astype("int32"), atol=1)
class Breakout(AbstractGame): """The Gym CartPole environment""" def __init__(self, discount: float): super().__init__(discount) self.env = gym.make('BreakoutDeterministic-v4') self.env = ResizeObservation(self.env, shape=(84, 84)) self.env = GrayScaleObservation(self.env, keep_dim=True) self.actions = list( map(lambda i: Action(i), range(self.env.action_space.n))) self.observations = [self.env.reset()] self.done = False @property def action_space_size(self) -> int: """Return the size of the action space.""" return len(self.actions) def step(self, action) -> int: """Execute one step of the game conditioned by the given action.""" observation, reward, done, _ = self.env.step(action.index) self.observations += [observation] self.done = done return reward def terminal(self) -> bool: """Is the game is finished?""" return self.done def legal_actions(self) -> List[Action]: """Return the legal actions available at this instant.""" return self.actions def make_image(self, state_index: int): """Compute the state of the game.""" return self.observations[state_index]
outdir = '/tmp/random-agent-results' env = wrappers.Monitor(env, directory=outdir, force=True) env.seed(0) mem = Memory() episode_count = 50 reward = 0 done = False rewardPerEp = numpy.array([]) ep = numpy.array([]) interaction = numpy.array([]) for i in range(episode_count): env.reset() actual_state, reward, done, info = env.step(env.action_space.sample()) nbInteraction = 0 sumReward = 0 while True: nbInteraction += 1 # Choose the action to do action = agent.act(actual_state, epsilon_start) # Calculate next state, reward and if the episode is finished or not next_state, reward, done, _ = env.step(action) sumReward += reward