Beispiel #1
0
    def __init__(self):
        halite_env = make('halite', configuration=GAME_CONFIG, debug=True)
        self.env = halite_env.train(GAME_AGENTS)

        self.config = halite_env.configuration



        self.action_space = spaces.MultiDiscrete([N_SHIP_ACTIONS] * MAX_SHIPS +
                                                 [N_YARD_ACTIONS] * MAX_YARDS)
        self.action_space = gym_wrapper.spec_from_gym_space(space=self.action_space, name='action')

        self.observation_space = spaces.Box(low=0, high=1,
                                            shape=(self.config.size,
                                                   self.config.size,
                                                   N_FEATURES),
                                            dtype=np.float32)
        self.observation_space = gym_wrapper.spec_from_gym_space(space=self.observation_space, name='observation')

        self.observation_space = array_spec.BoundedArraySpec(
            shape=(self.config.size, self.config.size, N_FEATURES), dtype=np.int32, minimum=0,
            maximum=1, name='observation')


        self.reward_range = (REWARD_LOST, REWARD_WON)

        self.obs = None
        self.last_obs = None

        self.spec = None
        self.metadata = None
    def __init__(self,
                 num_envs,
                 discount=1.0,
                 spec_dtype_map=None,
                 simplify_box_bounds=True,
                 flatten=False,
                 normalize_rewards=False,
                 **procgen_kwargs):
        """Uses Native C++ Environment Vectorization, which reduces RAM usage.

    Except the num_envs and **procgen_kwargs, all of the other __init__
    args come from the original TF-Agents GymWrapper and
    ParallelPyEnvironment wrappers.

    Args:
      num_envs: List of callables that create environments.
      discount: Discount rewards automatically (also done in algorithms).
      spec_dtype_map: A dict from spaces to dtypes to use as the default dtype.
      simplify_box_bounds: Whether to replace bounds of Box space that are
        arrays with identical values with one number and rely on broadcasting.
      flatten: Boolean, whether to use flatten action and time_steps during
        communication to reduce overhead.
      normalize_rewards: Use VecNormalize to normalize rewards. Should be used
        for collect env only.
      **procgen_kwargs: Keyword arguments passed into the native ProcGen env.
    """
        super(TFAgentsParallelProcGenEnv, self).__init__()

        self._num_envs = num_envs

        parallel_env = procgen_env.ProcgenEnv(num_envs=num_envs,
                                              **procgen_kwargs)
        parallel_env = vector_wrap_environment(parallel_env,
                                               normalize_obs=False,
                                               normalize_ret=normalize_rewards,
                                               monitor=False)
        parallel_env = ObsToFloat(parallel_env, divisor=255.0)

        self._parallel_env = parallel_env

        self._observation_spec = tf_agents_gym_wrapper.spec_from_gym_space(
            self._parallel_env.observation_space, spec_dtype_map,
            simplify_box_bounds, "observation")

        self._action_spec = tf_agents_gym_wrapper.spec_from_gym_space(
            self._parallel_env.action_space, spec_dtype_map,
            simplify_box_bounds, "action")
        self._time_step_spec = ts.time_step_spec(self._observation_spec,
                                                 self.reward_spec())

        self._flatten = flatten
        self._discount = discount

        self._dones = [True] * num_envs  # Contains "done"s for all subenvs.
Beispiel #3
0
def _observation_space_fixture(gym_space_bound, gym_space_shape):
    gym_space = gym.spaces.Box(low=-gym_space_bound,
                               high=gym_space_bound,
                               shape=gym_space_shape,
                               dtype=np.float32)
    return tensor_spec.from_spec(
        spec_from_gym_space(gym_space, name="observation"))
Beispiel #4
0
    def test_spec_from_gym_space_dict(self):
        dict_space = gym.spaces.Dict([
            ('spec_2', gym.spaces.Box(-1.0, 1.0, (3, 4))),
            ('spec_1', gym.spaces.Discrete(2)),
        ])

        spec = gym_wrapper.spec_from_gym_space(dict_space)

        keys = list(spec.keys())
        self.assertEqual('spec_1', keys[1])
        self.assertEqual(2, len(spec))
        self.assertEqual((), spec['spec_1'].shape)
        self.assertEqual(np.int64, spec['spec_1'].dtype)
        self.assertEqual(0, spec['spec_1'].minimum)
        self.assertEqual(1, spec['spec_1'].maximum)

        self.assertEqual('spec_2', keys[0])
        self.assertEqual((3, 4), spec['spec_2'].shape)
        self.assertEqual(np.float32, spec['spec_2'].dtype)
        np.testing.assert_array_almost_equal(
            -np.ones((3, 4)),
            spec['spec_2'].minimum,
        )
        np.testing.assert_array_almost_equal(
            np.ones((3, 4)),
            spec['spec_2'].maximum,
        )
Beispiel #5
0
    def test_spec_from_gym_space_dtype_map(self):
        class Box(gym.spaces.Box):
            """Box space without the dtype property."""
            def __init__(self, *args, **kwargs):
                super(Box, self).__init__(*args, **kwargs)
                del self.dtype

        tuple_space = gym.spaces.Tuple((
            gym.spaces.Discrete(2),
            Box(0, 1, (3, 4)),
            gym.spaces.Tuple((gym.spaces.Discrete(2), gym.spaces.Discrete(3))),
            gym.spaces.Dict({
                'spec_1':
                gym.spaces.Discrete(2),
                'spec_2':
                gym.spaces.Tuple((
                    gym.spaces.Discrete(2),
                    Box(0, 1, (3, 4)),
                )),
            }),
        ))

        dtype_map = {gym.spaces.Discrete: np.uint8, gym.spaces.Box: np.uint16}
        spec = gym_wrapper.spec_from_gym_space(tuple_space,
                                               dtype_map=dtype_map)
        self.assertEqual(np.uint8, spec[0].dtype)
        self.assertEqual(np.uint16, spec[1].dtype)
        self.assertEqual(np.uint8, spec[2][0].dtype)
        self.assertEqual(np.uint8, spec[2][1].dtype)
        self.assertEqual(np.uint8, spec[3]['spec_1'].dtype)
        self.assertEqual(np.uint8, spec[3]['spec_2'][0].dtype)
        self.assertEqual(np.uint16, spec[3]['spec_2'][1].dtype)
  def test_spec_from_gym_space_multi_binary(self):
    multi_binary_space = gym.spaces.MultiBinary(4)
    spec = gym_wrapper.spec_from_gym_space(multi_binary_space)

    self.assertEqual((4,), spec.shape)
    self.assertEqual(np.int32, spec.dtype)
    np.testing.assert_array_equal(np.array([0], dtype=np.int), spec.minimum)
    np.testing.assert_array_equal(np.array([1], dtype=np.int), spec.maximum)
  def test_spec_from_gym_space_box_scalars_simplify_bounds(self):
    box_space = gym.spaces.Box(-1.0, 1.0, (3, 4))
    spec = gym_wrapper.spec_from_gym_space(box_space, simplify_box_bounds=True)

    self.assertEqual((3, 4), spec.shape)
    self.assertEqual(np.float32, spec.dtype)
    np.testing.assert_array_equal(np.array([-1], dtype=np.int), spec.minimum)
    np.testing.assert_array_equal(np.array([1], dtype=np.int), spec.maximum)
  def _create_spec(self, policy_info_spec):
    observation_spec = gym_wrapper.spec_from_gym_space(
        self._env.observation_space)
    action_spec = gym_wrapper.spec_from_gym_space(
        self._env.action_space)

    tf_agents_time_step_spec = time_step.time_step_spec(observation_spec)
    step_num_spec = specs.tensor_spec.from_spec(
        specs.BoundedArraySpec([],
                               dtype=np.int64,
                               minimum=0,
                               maximum=self._episode_step_limit,
                               name='step_num'))
    return EnvStep(tf_agents_time_step_spec.step_type, step_num_spec,
                   observation_spec, action_spec,
                   tf_agents_time_step_spec.reward,
                   tf_agents_time_step_spec.discount, policy_info_spec, {}, {})
Beispiel #9
0
    def test_spec_from_gym_space_discrete(self):
        discrete_space = gym.spaces.Discrete(3)
        spec = gym_wrapper.spec_from_gym_space(discrete_space)

        self.assertEqual((), spec.shape)
        self.assertEqual(np.int64, spec.dtype)
        self.assertEqual(0, spec.minimum)
        self.assertEqual(2, spec.maximum)
Beispiel #10
0
    def test_spec_from_gym_space_tuple_mixed(self):
        tuple_space = gym.spaces.Tuple((
            gym.spaces.Discrete(2),
            gym.spaces.Box(-1.0, 1.0, (3, 4)),
            gym.spaces.Tuple((gym.spaces.Discrete(2), gym.spaces.Discrete(3))),
            gym.spaces.Dict({
                'spec_1':
                gym.spaces.Discrete(2),
                'spec_2':
                gym.spaces.Tuple(
                    (gym.spaces.Discrete(2), gym.spaces.Discrete(3))),
            }),
        ))
        spec = gym_wrapper.spec_from_gym_space(tuple_space)

        self.assertEqual(4, len(spec))
        # Test Discrete
        self.assertEqual((), spec[0].shape)
        self.assertEqual(np.int64, spec[0].dtype)
        self.assertEqual(0, spec[0].minimum)
        self.assertEqual(1, spec[0].maximum)

        # Test Box
        self.assertEqual((3, 4), spec[1].shape)
        self.assertEqual(np.float32, spec[1].dtype)
        np.testing.assert_array_almost_equal(-np.ones((3, 4)), spec[1].minimum)
        np.testing.assert_array_almost_equal(np.ones((3, 4)), spec[1].maximum)

        # Test Tuple
        self.assertEqual(2, len(spec[2]))
        self.assertEqual((), spec[2][0].shape)
        self.assertEqual(np.int64, spec[2][0].dtype)
        self.assertEqual(0, spec[2][0].minimum)
        self.assertEqual(1, spec[2][0].maximum)
        self.assertEqual((), spec[2][1].shape)
        self.assertEqual(np.int64, spec[2][1].dtype)
        self.assertEqual(0, spec[2][1].minimum)
        self.assertEqual(2, spec[2][1].maximum)

        # Test Dict
        # Test Discrete in Dict
        discrete_in_dict = spec[3]['spec_1']
        self.assertEqual((), discrete_in_dict.shape)
        self.assertEqual(np.int64, discrete_in_dict.dtype)
        self.assertEqual(0, discrete_in_dict.minimum)
        self.assertEqual(1, discrete_in_dict.maximum)

        # Test Tuple in Dict
        tuple_in_dict = spec[3]['spec_2']
        self.assertEqual(2, len(tuple_in_dict))
        self.assertEqual((), tuple_in_dict[0].shape)
        self.assertEqual(np.int64, tuple_in_dict[0].dtype)
        self.assertEqual(0, tuple_in_dict[0].minimum)
        self.assertEqual(1, tuple_in_dict[0].maximum)
        self.assertEqual((), tuple_in_dict[1].shape)
        self.assertEqual(np.int64, tuple_in_dict[1].dtype)
        self.assertEqual(0, tuple_in_dict[1].minimum)
        self.assertEqual(2, tuple_in_dict[1].maximum)
Beispiel #11
0
    def test_spec_from_gym_space_box_scalars(self):
        for dtype in (np.float32, np.float64):
            box_space = gym.spaces.Box(-1.0, 1.0, (3, 4), dtype=dtype)
            spec = gym_wrapper.spec_from_gym_space(box_space)

            self.assertEqual((3, 4), spec.shape)
            self.assertEqual(dtype, spec.dtype)
            np.testing.assert_array_equal(-np.ones((3, 4)), spec.minimum)
            np.testing.assert_array_equal(np.ones((3, 4)), spec.maximum)
Beispiel #12
0
def _action_space_latent_obs_fixture(gym_space_bound,
                                     gym_space_shape_latent_obs):
    gym_space = gym.spaces.Box(
        low=-gym_space_bound,
        high=gym_space_bound,
        shape=gym_space_shape_latent_obs,
        dtype=np.float32,
    )
    return tensor_spec.from_spec(spec_from_gym_space(gym_space, name="action"))
  def test_spec_from_gym_space_multi_discrete(self):
    multi_discrete_space = gym.spaces.MultiDiscrete([1, 2, 3, 4])
    spec = gym_wrapper.spec_from_gym_space(multi_discrete_space)

    self.assertEqual((4,), spec.shape)
    self.assertEqual(np.int32, spec.dtype)
    np.testing.assert_array_equal(np.array([0], dtype=np.int), spec.minimum)
    np.testing.assert_array_equal(
        np.array([0, 1, 2, 3], dtype=np.int), spec.maximum)
  def test_spec_from_gym_space_box_array(self):
    for dtype in (np.float32, np.float64):
      box_space = gym.spaces.Box(np.array([-1.0, -2.0]), np.array([2.0, 4.0]),
                                 dtype=dtype)
      spec = gym_wrapper.spec_from_gym_space(box_space)

      self.assertEqual((2,), spec.shape)
      self.assertEqual(dtype, spec.dtype)
      np.testing.assert_array_equal(np.array([-1.0, -2.0]), spec.minimum)
      np.testing.assert_array_equal(np.array([2.0, 4.0]), spec.maximum)
  def test_spec_from_gym_space_box_array_constant_min(self):
    for dtype in (np.float32, np.float64):
      box_space = gym.spaces.Box(
          np.array([-1.0, -1.0]), np.array([2.0, 4.0]), dtype=dtype)
      spec = gym_wrapper.spec_from_gym_space(box_space)

      self.assertEqual((2,), spec.shape)
      self.assertEqual(dtype, spec.dtype)
      self.assertAllEqual([-1., -1.], spec.minimum)
      self.assertAllEqual([2., 4.], spec.maximum)
  def __init__(self,
               gym_env,
               discount=1.0,
               spec_dtype_map=None,
               match_obs_space_dtype=True,
               auto_reset=False,
               simplify_box_bounds=True):

    super(AdversarialGymWrapper, self).__init__(
        gym_env, discount, spec_dtype_map, match_obs_space_dtype, auto_reset,
        simplify_box_bounds)

    self.adversary_observation_spec = gym_wrapper.spec_from_gym_space(
        self._gym_env.adversary_observation_space, name='observation')
    self.adversary_action_spec = gym_wrapper.spec_from_gym_space(
        self._gym_env.adversary_action_space, name='action')
    self.adversary_time_step_spec = ts_lib.time_step_spec(
        self.adversary_observation_spec, self.reward_spec())
    self.adversary_flat_obs_spec = tf.nest.flatten(
        self.adversary_observation_spec)
    def __init__(self,
                 gym_env,
                 discount=1.0,
                 spec_dtype_map=None,
                 match_obs_space_dtype=True,
                 auto_reset=False,
                 simplify_box_bounds=True):
        # Do not allow parent class to handle auto_reset because it will call the
        # wrong reset function
        self._adversarial_auto_reset = auto_reset
        super(AdversarialGymWrapper,
              self).__init__(gym_env, discount, spec_dtype_map,
                             match_obs_space_dtype, False, simplify_box_bounds)

        self.adversary_observation_spec = gym_wrapper.spec_from_gym_space(
            self._gym_env.adversary_observation_space, name='observation')
        self.adversary_action_spec = gym_wrapper.spec_from_gym_space(
            self._gym_env.adversary_action_space, name='action')
        self.adversary_time_step_spec = ts_lib.time_step_spec(
            self.adversary_observation_spec, self.reward_spec())
        self.adversary_flat_obs_spec = tf.nest.flatten(
            self.adversary_observation_spec)
Beispiel #18
0
 def test_spec_name_nested(self):
     dict_space = gym.spaces.Tuple((gym.spaces.Dict({
         'spec_0':
         gym.spaces.Dict({
             'spec_1': gym.spaces.Discrete(2),
             'spec_2': gym.spaces.Discrete(2),
         }),
     }), gym.spaces.Discrete(2)))
     spec = gym_wrapper.spec_from_gym_space(dict_space, name='observation')
     self.assertEqual('observation/tuple_0/spec_0/spec_1',
                      spec[0]['spec_0']['spec_1'].name)
     self.assertEqual('observation/tuple_0/spec_0/spec_2',
                      spec[0]['spec_0']['spec_2'].name)
     self.assertEqual('observation/tuple_1', spec[1].name)
Beispiel #19
0
    def test_spec_from_gym_space_tuple(self):
        tuple_space = gym.spaces.Tuple(
            (gym.spaces.Discrete(2), gym.spaces.Discrete(3)))
        spec = gym_wrapper.spec_from_gym_space(tuple_space)

        self.assertEqual(2, len(spec))
        self.assertEqual((), spec[0].shape)
        self.assertEqual(np.int64, spec[0].dtype)
        self.assertEqual(0, spec[0].minimum)
        self.assertEqual(1, spec[0].maximum)

        self.assertEqual((), spec[1].shape)
        self.assertEqual(np.int64, spec[1].dtype)
        self.assertEqual(0, spec[1].minimum)
        self.assertEqual(2, spec[1].maximum)
Beispiel #20
0
    def test_spec_from_gym_space_when_simplify_box_bounds_false(self):
        # testing on gym.spaces.Dict which makes recursive calls to
        # _spec_from_gym_space
        box_space = gym.spaces.Box(-1.0, 1.0, (2, ))
        dict_space = gym.spaces.Dict({'box1': box_space, 'box2': box_space})
        spec = gym_wrapper.spec_from_gym_space(dict_space,
                                               simplify_box_bounds=False)

        self.assertEqual((2, ), spec['box1'].shape)
        self.assertEqual((2, ), spec['box2'].shape)
        self.assertEqual(np.float32, spec['box1'].dtype)
        self.assertEqual(np.float32, spec['box2'].dtype)
        self.assertEqual('box1', spec['box1'].name)
        self.assertEqual('box2', spec['box2'].name)
        np.testing.assert_array_equal(np.array([-1, -1], dtype=np.int),
                                      spec['box1'].minimum)
        np.testing.assert_array_equal(np.array([1, 1], dtype=np.int),
                                      spec['box1'].maximum)
        np.testing.assert_array_equal(np.array([-1, -1], dtype=np.int),
                                      spec['box2'].minimum)
        np.testing.assert_array_equal(np.array([1, 1], dtype=np.int),
                                      spec['box2'].maximum)
Beispiel #21
0
 def test_spec_name(self):
     box_space = gym.spaces.Box(np.array([-1.0, -2.0]),
                                np.array([2.0, 4.0]),
                                dtype=np.float32)
     spec = gym_wrapper.spec_from_gym_space(box_space, name='observation')
     self.assertEqual('observation', spec.name)