Exemple #1
0
def make_gym_atari_env(game, seed=1):
    env = gym_atari.GymAtari(game, seed=seed)
    env = gym_atari.RandomNoopsEnvironmentWrapper(env,
                                                  min_noop_steps=1,
                                                  max_noop_steps=30,
                                                  seed=seed)
    return env
Exemple #2
0
 def environment_builder():
   """Creates Atari environment."""
   env = gym_atari.GymAtari(
       FLAGS.environment_name, seed=random_state.randint(1, 2**32))
   return gym_atari.RandomNoopsEnvironmentWrapper(
       env,
       min_noop_steps=1,
       max_noop_steps=30,
       seed=random_state.randint(1, 2**32),
   )
Exemple #3
0
 def num_noops_sequence(seed, num_episodes):
     tape = []
     environment = test_utils.DummyEnvironment(tape, episode_length=10)
     wrapped_environment = gym_atari.RandomNoopsEnvironmentWrapper(
         environment, max_noop_steps=8, seed=seed)
     seq = []
     for _ in range(num_episodes):
         tape.clear()
         wrapped_environment.reset()
         seq.append(len(tape))
     return seq
Exemple #4
0
 def test_specs(self):
     environment = test_utils.DummyEnvironment([], episode_length=10)
     wrapped_environment = gym_atari.RandomNoopsEnvironmentWrapper(
         environment, max_noop_steps=5)
     self.assertEqual(environment.observation_spec(),
                      wrapped_environment.observation_spec())
     self.assertEqual(environment.action_spec(),
                      wrapped_environment.action_spec())
     self.assertEqual(environment.reward_spec(),
                      wrapped_environment.reward_spec())
     self.assertEqual(environment.discount_spec(),
                      wrapped_environment.discount_spec())
Exemple #5
0
    def test_basic(self, min_noop_steps, max_noop_steps):
        noop_action = 3
        tape = []
        environment = test_utils.DummyEnvironment(tape, episode_length=10)
        wrapped_environment = gym_atari.RandomNoopsEnvironmentWrapper(
            environment,
            min_noop_steps=min_noop_steps,
            max_noop_steps=max_noop_steps,
            noop_action=noop_action,
            seed=42)

        # Make sure noops are applied appropriate number of times (in min/max range
        # and not always the same number), with correct action.
        num_noop_steps = set()
        for i in range(20):
            # Switch between different ways of starting a new episode.
            if i % 4 == 0:
                tape.clear()
                wrapped_environment.reset()
                num_steps = len(tape)
                expected_tape = (['Environment reset'] +
                                 ['Environment step (%s)' % noop_action] *
                                 (num_steps - 1))
            else:
                timestep = wrapped_environment.reset()
                while not timestep.last():
                    timestep = wrapped_environment.step(0)
                tape.clear()
                wrapped_environment.step(noop_action)
                num_steps = len(tape)
                expected_tape = (['Environment step (%s)' % noop_action] *
                                 num_steps)

            self.assertEqual(expected_tape, tape)
            # +1 because of the extra initial reset() / step().
            self.assertBetween(num_steps, min_noop_steps + 1,
                               max_noop_steps + 1)
            num_noop_steps.add(num_steps)

            # Do some regular steps & check pass-through of actions.
            wrapped_environment.step(6)
            wrapped_environment.step(7)
            self.assertLen(tape, num_steps + 2)
            self.assertEqual(['Environment step (6)', 'Environment step (7)'],
                             tape[-2:])

        # Check it's not always the same number of random noop steps.
        if max_noop_steps > min_noop_steps:
            self.assertGreater(len(num_noop_steps), 1)
Exemple #6
0
 def environment_builder():
     """Creates Key-Door environment."""
     env = gym_key_door.GymKeyDoor(
         env_args={
             constants.MAP_ASCII_PATH: FLAGS.map_ascii_path,
             constants.MAP_YAML_PATH: FLAGS.map_yaml_path,
             constants.REPRESENTATION: constants.PIXEL,
             constants.SCALING: FLAGS.env_scaling,
             constants.EPISODE_TIMEOUT: FLAGS.max_frames_per_episode,
             constants.GRAYSCALE: False,
             constants.BATCH_DIMENSION: False,
             constants.TORCH_AXES: False,
         },
         env_shape=FLAGS.env_shape,
     )
     return gym_atari.RandomNoopsEnvironmentWrapper(
         env,
         min_noop_steps=1,
         max_noop_steps=30,
         seed=random_state.randint(1, 2**32),
     )
Exemple #7
0
 def test_episode_end_during_noop_steps(self):
     environment = test_utils.DummyEnvironment([], episode_length=5)
     wrapped_environment = gym_atari.RandomNoopsEnvironmentWrapper(
         environment, min_noop_steps=10, max_noop_steps=20)
     with self.assertRaisesRegex(RuntimeError, 'Episode ended'):
         wrapped_environment.reset()