def initialize_environments(self, batch_size=1, max_episode_steps=-1, max_and_skip_env=False): """Initializes the environments and trajectories. Subclasses can override this if they don't want a default implementation which initializes `batch_size` environments, but must take care to initialize self._trajectories (this is checked in __init__ anyways). Args: batch_size: (int) Number of `self.base_env_name` envs to initialize. max_episode_steps: (int) Passed on to `gym_utils.make_gym_env`. max_and_skip_env: (boolean) Passed on to `gym_utils.make_gym_env`. """ assert batch_size >= 1 self._batch_size = batch_size # pylint: disable=g-complex-comprehension self._envs = [ gym_utils.make_gym_env(self.base_env_name, rl_env_max_episode_steps=max_episode_steps, maxskip_env=max_and_skip_env) for _ in range(batch_size) ] # If self.observation_space and self.action_space aren't None, then it means # that this is a re-initialization of this class, in that case make sure # that this matches our previous behaviour. if self._observation_space: assert str(self._observation_space) == str( self._envs[0].observation_space) else: # This means that we are initializing this class for the first time. # # We set this equal to the first env's observation space, later on we'll # verify that all envs have the same observation space. self._observation_space = self._envs[0].observation_space # Similarly for action_space if self._action_space: assert str(self._action_space) == str(self._envs[0].action_space) else: self._action_space = self._envs[0].action_space self._verify_same_spaces() # If self.reward_range is None, i.e. this means that we should take the # reward range of the env. if self.reward_range is None: self._reward_range = self._envs[0].reward_range # This data structure stores the history of each env. # # NOTE: Even if the env is a NN and can step in all batches concurrently, it # is still valuable to store the trajectories separately. self._trajectories = trajectory.BatchTrajectory(batch_size=batch_size)
def __init__(self, base_env_name=None, batch_size=1, grayscale=False, resize_height_factor=2, resize_width_factor=2, rl_env_max_episode_steps=-1, max_num_noops=0, maxskip_envs=False, **kwargs): if base_env_name is None: base_env_name = self.base_env_name self._base_env_name = base_env_name super(T2TGymEnv, self).__init__(batch_size, **kwargs) self.grayscale = grayscale self.resize_height_factor = resize_height_factor self.resize_width_factor = resize_width_factor if not self.name: # Set problem name if not registered. self.name = "Gym%s" % base_env_name self._envs = [ gym_utils.make_gym_env( base_env_name, rl_env_max_episode_steps=rl_env_max_episode_steps, maxskip_env=maxskip_envs) for _ in range(self.batch_size)] # max_num_noops works only with atari envs. if max_num_noops > 0: assert self._envs[0].unwrapped.get_action_meanings()[ self.noop_action ] == "NOOP" self.max_num_noops = max_num_noops orig_observ_space = self._envs[0].observation_space if not all(env.observation_space == orig_observ_space for env in self._envs): raise ValueError("All environments must use the same observation space.") self.observation_space = self._derive_observation_space(orig_observ_space) self.action_space = self._envs[0].action_space if not all(env.action_space == self.action_space for env in self._envs): raise ValueError("All environments must use the same action space.") with self._tf_graph.obj.as_default(): self._resize = dict() orig_height, orig_width = orig_observ_space.shape[:2] self._img_batch_t = _Noncopyable(tf.placeholder( dtype=tf.uint8, shape=(None, orig_height, orig_width, 3))) height, width = self.observation_space.shape[:2] resized = tf.image.resize_images(self._img_batch_t.obj, [height, width], tf.image.ResizeMethod.AREA) resized = tf.cast(resized, tf.as_dtype(self.observation_space.dtype)) if self.grayscale: resized = tf.image.rgb_to_grayscale(resized) self._resized_img_batch_t = _Noncopyable(resized)
def test_resize(self): env_name = TEST_ENV_NAME orig_env = make_gym_env(env_name) resize_height_factor = 2 resize_width_factor = 3 orig_height, orig_width = orig_env.observation_space.shape[:2] env, obs, _, _ = self.init_batch_and_play( env_name, steps_per_epoch=1, resize_height_factor=resize_height_factor, resize_width_factor=resize_width_factor) for obs_batch in obs: ob = obs_batch[0] self.assertEqual(ob.shape, env.observation_space.shape) height, width = ob.shape[:2] self.assertEqual(height, orig_height // resize_height_factor) self.assertEqual(width, orig_width // resize_width_factor)
def test_unlimited_env(self): env = gym_utils.make_gym_env("CartPole-v0", rl_env_max_episode_steps=None) self.assertIsInstance(env, gym.Env) self.assertNotIsInstance(env, gym.wrappers.TimeLimit)
def test_making_timewrapped_env(self): env = gym_utils.make_gym_env("CartPole-v0", rl_env_max_episode_steps=1000) self.assertIsInstance(env, gym.Env) self.assertIsInstance(env, gym.wrappers.TimeLimit) self.assertEqual(1000, env._max_episode_steps)
def test_making_simple_env(self): env = gym_utils.make_gym_env("CartPole-v0") self.assertIsInstance(env, gym.Env)
def __init__(self, base_env_name=None, batch_size=1, grayscale=False, resize_height_factor=2, resize_width_factor=2, rl_env_max_episode_steps=-1, max_num_noops=0, maxskip_envs=False, sticky_actions=False, should_derive_observation_space=True, **kwargs): if base_env_name is None: base_env_name = self.base_env_name self._base_env_name = base_env_name super(T2TGymEnv, self).__init__(batch_size, **kwargs) # TODO(afrozm): Find a proper way of doing this. Refactor or cleanup. self.should_derive_observation_space = should_derive_observation_space self.grayscale = grayscale self.resize_height_factor = resize_height_factor self.resize_width_factor = resize_width_factor self.rl_env_max_episode_steps = rl_env_max_episode_steps self.maxskip_envs = maxskip_envs self.sticky_actions = sticky_actions self._initial_state = None self._initial_frames = None if not self.name: # Set problem name if not registered. self.name = "Gym%s" % base_env_name self._envs = [ gym_utils.make_gym_env( base_env_name, rl_env_max_episode_steps=rl_env_max_episode_steps, maxskip_env=maxskip_envs, sticky_actions=sticky_actions) for _ in range(self.batch_size) ] #pdb.set_trace() # max_num_noops works only with atari envs. ## Commented by Andres #if max_num_noops > 0: # assert self._envs[0].unwrapped.get_action_meanings()[ # self.noop_action # ] == "NOOP" self.max_num_noops = max_num_noops orig_observ_space = self._envs[0].observation_space if not all(env.observation_space == orig_observ_space for env in self._envs): raise ValueError( "All environments must use the same observation space.") self.observation_space = orig_observ_space if self.should_derive_observation_space: self.observation_space = self._derive_observation_space( orig_observ_space) self.action_space = self._envs[0].action_space if not all(env.action_space == self.action_space for env in self._envs): raise ValueError( "All environments must use the same action space.") if self.should_derive_observation_space: with self._tf_graph.obj.as_default(): self._resize = {} orig_height, orig_width = orig_observ_space.shape[:2] self._img_batch_t = _Noncopyable( tf.placeholder(dtype=tf.uint8, shape=(None, orig_height, orig_width, 3))) height, width = self.observation_space.shape[:2] resized = tf.image.resize_images(self._img_batch_t.obj, [height, width], tf.image.ResizeMethod.AREA) resized = tf.cast(resized, tf.as_dtype(self.observation_space.dtype)) if self.grayscale: resized = tf.image.rgb_to_grayscale(resized) self._resized_img_batch_t = _Noncopyable(resized)