def initialize_environments(self,
                                batch_size=1,
                                max_episode_steps=-1,
                                max_and_skip_env=False):
        """Initializes the environments and trajectories.

    Subclasses can override this if they don't want a default implementation
    which initializes `batch_size` environments, but must take care to
    initialize self._trajectories (this is checked in __init__ anyways).

    Args:
      batch_size: (int) Number of `self.base_env_name` envs to initialize.
      max_episode_steps: (int) Passed on to `gym_utils.make_gym_env`.
      max_and_skip_env: (boolean) Passed on to `gym_utils.make_gym_env`.
    """

        assert batch_size >= 1
        self._batch_size = batch_size

        # pylint: disable=g-complex-comprehension
        self._envs = [
            gym_utils.make_gym_env(self.base_env_name,
                                   rl_env_max_episode_steps=max_episode_steps,
                                   maxskip_env=max_and_skip_env)
            for _ in range(batch_size)
        ]

        # If self.observation_space and self.action_space aren't None, then it means
        # that this is a re-initialization of this class, in that case make sure
        # that this matches our previous behaviour.
        if self._observation_space:
            assert str(self._observation_space) == str(
                self._envs[0].observation_space)
        else:
            # This means that we are initializing this class for the first time.
            #
            # We set this equal to the first env's observation space, later on we'll
            # verify that all envs have the same observation space.
            self._observation_space = self._envs[0].observation_space

        # Similarly for action_space
        if self._action_space:
            assert str(self._action_space) == str(self._envs[0].action_space)
        else:
            self._action_space = self._envs[0].action_space

        self._verify_same_spaces()

        # If self.reward_range is None, i.e. this means that we should take the
        # reward range of the env.
        if self.reward_range is None:
            self._reward_range = self._envs[0].reward_range

        # This data structure stores the history of each env.
        #
        # NOTE: Even if the env is a NN and can step in all batches concurrently, it
        # is still valuable to store the trajectories separately.
        self._trajectories = trajectory.BatchTrajectory(batch_size=batch_size)
Beispiel #2
0
  def __init__(self, base_env_name=None, batch_size=1, grayscale=False,
               resize_height_factor=2, resize_width_factor=2,
               rl_env_max_episode_steps=-1, max_num_noops=0,
               maxskip_envs=False, **kwargs):
    if base_env_name is None:
      base_env_name = self.base_env_name
    self._base_env_name = base_env_name
    super(T2TGymEnv, self).__init__(batch_size, **kwargs)
    self.grayscale = grayscale
    self.resize_height_factor = resize_height_factor
    self.resize_width_factor = resize_width_factor
    if not self.name:
      # Set problem name if not registered.
      self.name = "Gym%s" % base_env_name

    self._envs = [
        gym_utils.make_gym_env(
            base_env_name, rl_env_max_episode_steps=rl_env_max_episode_steps,
            maxskip_env=maxskip_envs)
        for _ in range(self.batch_size)]

    # max_num_noops works only with atari envs.
    if max_num_noops > 0:
      assert self._envs[0].unwrapped.get_action_meanings()[
          self.noop_action
      ] == "NOOP"
    self.max_num_noops = max_num_noops

    orig_observ_space = self._envs[0].observation_space
    if not all(env.observation_space == orig_observ_space
               for env in self._envs):
      raise ValueError("All environments must use the same observation space.")

    self.observation_space = self._derive_observation_space(orig_observ_space)

    self.action_space = self._envs[0].action_space
    if not all(env.action_space == self.action_space for env in self._envs):
      raise ValueError("All environments must use the same action space.")

    with self._tf_graph.obj.as_default():
      self._resize = dict()
      orig_height, orig_width = orig_observ_space.shape[:2]
      self._img_batch_t = _Noncopyable(tf.placeholder(
          dtype=tf.uint8, shape=(None, orig_height, orig_width, 3)))
      height, width = self.observation_space.shape[:2]
      resized = tf.image.resize_images(self._img_batch_t.obj,
                                       [height, width],
                                       tf.image.ResizeMethod.AREA)
      resized = tf.cast(resized, tf.as_dtype(self.observation_space.dtype))
      if self.grayscale:
        resized = tf.image.rgb_to_grayscale(resized)
      self._resized_img_batch_t = _Noncopyable(resized)
Beispiel #3
0
 def test_resize(self):
   env_name = TEST_ENV_NAME
   orig_env = make_gym_env(env_name)
   resize_height_factor = 2
   resize_width_factor = 3
   orig_height, orig_width = orig_env.observation_space.shape[:2]
   env, obs, _, _ = self.init_batch_and_play(
       env_name, steps_per_epoch=1,
       resize_height_factor=resize_height_factor,
       resize_width_factor=resize_width_factor)
   for obs_batch in obs:
     ob = obs_batch[0]
     self.assertEqual(ob.shape, env.observation_space.shape)
     height, width = ob.shape[:2]
     self.assertEqual(height, orig_height // resize_height_factor)
     self.assertEqual(width, orig_width // resize_width_factor)
Beispiel #4
0
 def test_unlimited_env(self):
     env = gym_utils.make_gym_env("CartPole-v0",
                                  rl_env_max_episode_steps=None)
     self.assertIsInstance(env, gym.Env)
     self.assertNotIsInstance(env, gym.wrappers.TimeLimit)
Beispiel #5
0
 def test_making_timewrapped_env(self):
     env = gym_utils.make_gym_env("CartPole-v0",
                                  rl_env_max_episode_steps=1000)
     self.assertIsInstance(env, gym.Env)
     self.assertIsInstance(env, gym.wrappers.TimeLimit)
     self.assertEqual(1000, env._max_episode_steps)
Beispiel #6
0
 def test_making_simple_env(self):
     env = gym_utils.make_gym_env("CartPole-v0")
     self.assertIsInstance(env, gym.Env)
Beispiel #7
0
    def __init__(self,
                 base_env_name=None,
                 batch_size=1,
                 grayscale=False,
                 resize_height_factor=2,
                 resize_width_factor=2,
                 rl_env_max_episode_steps=-1,
                 max_num_noops=0,
                 maxskip_envs=False,
                 sticky_actions=False,
                 should_derive_observation_space=True,
                 **kwargs):
        if base_env_name is None:
            base_env_name = self.base_env_name
        self._base_env_name = base_env_name
        super(T2TGymEnv, self).__init__(batch_size, **kwargs)
        # TODO(afrozm): Find a proper way of doing this. Refactor or cleanup.
        self.should_derive_observation_space = should_derive_observation_space
        self.grayscale = grayscale
        self.resize_height_factor = resize_height_factor
        self.resize_width_factor = resize_width_factor
        self.rl_env_max_episode_steps = rl_env_max_episode_steps
        self.maxskip_envs = maxskip_envs
        self.sticky_actions = sticky_actions
        self._initial_state = None
        self._initial_frames = None
        if not self.name:
            # Set problem name if not registered.
            self.name = "Gym%s" % base_env_name

        self._envs = [
            gym_utils.make_gym_env(
                base_env_name,
                rl_env_max_episode_steps=rl_env_max_episode_steps,
                maxskip_env=maxskip_envs,
                sticky_actions=sticky_actions) for _ in range(self.batch_size)
        ]
        #pdb.set_trace()
        # max_num_noops works only with atari envs.
        ## Commented by Andres
        #if max_num_noops > 0:
        #  assert self._envs[0].unwrapped.get_action_meanings()[
        #      self.noop_action
        #  ] == "NOOP"
        self.max_num_noops = max_num_noops

        orig_observ_space = self._envs[0].observation_space
        if not all(env.observation_space == orig_observ_space
                   for env in self._envs):
            raise ValueError(
                "All environments must use the same observation space.")

        self.observation_space = orig_observ_space
        if self.should_derive_observation_space:
            self.observation_space = self._derive_observation_space(
                orig_observ_space)

        self.action_space = self._envs[0].action_space
        if not all(env.action_space == self.action_space
                   for env in self._envs):
            raise ValueError(
                "All environments must use the same action space.")

        if self.should_derive_observation_space:
            with self._tf_graph.obj.as_default():
                self._resize = {}
                orig_height, orig_width = orig_observ_space.shape[:2]
                self._img_batch_t = _Noncopyable(
                    tf.placeholder(dtype=tf.uint8,
                                   shape=(None, orig_height, orig_width, 3)))
                height, width = self.observation_space.shape[:2]
                resized = tf.image.resize_images(self._img_batch_t.obj,
                                                 [height, width],
                                                 tf.image.ResizeMethod.AREA)
                resized = tf.cast(resized,
                                  tf.as_dtype(self.observation_space.dtype))
                if self.grayscale:
                    resized = tf.image.rgb_to_grayscale(resized)
                self._resized_img_batch_t = _Noncopyable(resized)