def make_gym_atari_env(game, seed=1): env = gym_atari.GymAtari(game, seed=seed) env = gym_atari.RandomNoopsEnvironmentWrapper(env, min_noop_steps=1, max_noop_steps=30, seed=seed) return env
def environment_builder(): """Creates Atari environment.""" env = gym_atari.GymAtari( FLAGS.environment_name, seed=random_state.randint(1, 2**32)) return gym_atari.RandomNoopsEnvironmentWrapper( env, min_noop_steps=1, max_noop_steps=30, seed=random_state.randint(1, 2**32), )
def num_noops_sequence(seed, num_episodes): tape = [] environment = test_utils.DummyEnvironment(tape, episode_length=10) wrapped_environment = gym_atari.RandomNoopsEnvironmentWrapper( environment, max_noop_steps=8, seed=seed) seq = [] for _ in range(num_episodes): tape.clear() wrapped_environment.reset() seq.append(len(tape)) return seq
def test_specs(self): environment = test_utils.DummyEnvironment([], episode_length=10) wrapped_environment = gym_atari.RandomNoopsEnvironmentWrapper( environment, max_noop_steps=5) self.assertEqual(environment.observation_spec(), wrapped_environment.observation_spec()) self.assertEqual(environment.action_spec(), wrapped_environment.action_spec()) self.assertEqual(environment.reward_spec(), wrapped_environment.reward_spec()) self.assertEqual(environment.discount_spec(), wrapped_environment.discount_spec())
def test_basic(self, min_noop_steps, max_noop_steps): noop_action = 3 tape = [] environment = test_utils.DummyEnvironment(tape, episode_length=10) wrapped_environment = gym_atari.RandomNoopsEnvironmentWrapper( environment, min_noop_steps=min_noop_steps, max_noop_steps=max_noop_steps, noop_action=noop_action, seed=42) # Make sure noops are applied appropriate number of times (in min/max range # and not always the same number), with correct action. num_noop_steps = set() for i in range(20): # Switch between different ways of starting a new episode. if i % 4 == 0: tape.clear() wrapped_environment.reset() num_steps = len(tape) expected_tape = (['Environment reset'] + ['Environment step (%s)' % noop_action] * (num_steps - 1)) else: timestep = wrapped_environment.reset() while not timestep.last(): timestep = wrapped_environment.step(0) tape.clear() wrapped_environment.step(noop_action) num_steps = len(tape) expected_tape = (['Environment step (%s)' % noop_action] * num_steps) self.assertEqual(expected_tape, tape) # +1 because of the extra initial reset() / step(). self.assertBetween(num_steps, min_noop_steps + 1, max_noop_steps + 1) num_noop_steps.add(num_steps) # Do some regular steps & check pass-through of actions. wrapped_environment.step(6) wrapped_environment.step(7) self.assertLen(tape, num_steps + 2) self.assertEqual(['Environment step (6)', 'Environment step (7)'], tape[-2:]) # Check it's not always the same number of random noop steps. if max_noop_steps > min_noop_steps: self.assertGreater(len(num_noop_steps), 1)
def environment_builder(): """Creates Key-Door environment.""" env = gym_key_door.GymKeyDoor( env_args={ constants.MAP_ASCII_PATH: FLAGS.map_ascii_path, constants.MAP_YAML_PATH: FLAGS.map_yaml_path, constants.REPRESENTATION: constants.PIXEL, constants.SCALING: FLAGS.env_scaling, constants.EPISODE_TIMEOUT: FLAGS.max_frames_per_episode, constants.GRAYSCALE: False, constants.BATCH_DIMENSION: False, constants.TORCH_AXES: False, }, env_shape=FLAGS.env_shape, ) return gym_atari.RandomNoopsEnvironmentWrapper( env, min_noop_steps=1, max_noop_steps=30, seed=random_state.randint(1, 2**32), )
def test_episode_end_during_noop_steps(self): environment = test_utils.DummyEnvironment([], episode_length=5) wrapped_environment = gym_atari.RandomNoopsEnvironmentWrapper( environment, min_noop_steps=10, max_noop_steps=20) with self.assertRaisesRegex(RuntimeError, 'Episode ended'): wrapped_environment.reset()