def __init__(self): halite_env = make('halite', configuration=GAME_CONFIG, debug=True) self.env = halite_env.train(GAME_AGENTS) self.config = halite_env.configuration self.action_space = spaces.MultiDiscrete([N_SHIP_ACTIONS] * MAX_SHIPS + [N_YARD_ACTIONS] * MAX_YARDS) self.action_space = gym_wrapper.spec_from_gym_space(space=self.action_space, name='action') self.observation_space = spaces.Box(low=0, high=1, shape=(self.config.size, self.config.size, N_FEATURES), dtype=np.float32) self.observation_space = gym_wrapper.spec_from_gym_space(space=self.observation_space, name='observation') self.observation_space = array_spec.BoundedArraySpec( shape=(self.config.size, self.config.size, N_FEATURES), dtype=np.int32, minimum=0, maximum=1, name='observation') self.reward_range = (REWARD_LOST, REWARD_WON) self.obs = None self.last_obs = None self.spec = None self.metadata = None
def __init__(self, num_envs, discount=1.0, spec_dtype_map=None, simplify_box_bounds=True, flatten=False, normalize_rewards=False, **procgen_kwargs): """Uses Native C++ Environment Vectorization, which reduces RAM usage. Except the num_envs and **procgen_kwargs, all of the other __init__ args come from the original TF-Agents GymWrapper and ParallelPyEnvironment wrappers. Args: num_envs: List of callables that create environments. discount: Discount rewards automatically (also done in algorithms). spec_dtype_map: A dict from spaces to dtypes to use as the default dtype. simplify_box_bounds: Whether to replace bounds of Box space that are arrays with identical values with one number and rely on broadcasting. flatten: Boolean, whether to use flatten action and time_steps during communication to reduce overhead. normalize_rewards: Use VecNormalize to normalize rewards. Should be used for collect env only. **procgen_kwargs: Keyword arguments passed into the native ProcGen env. """ super(TFAgentsParallelProcGenEnv, self).__init__() self._num_envs = num_envs parallel_env = procgen_env.ProcgenEnv(num_envs=num_envs, **procgen_kwargs) parallel_env = vector_wrap_environment(parallel_env, normalize_obs=False, normalize_ret=normalize_rewards, monitor=False) parallel_env = ObsToFloat(parallel_env, divisor=255.0) self._parallel_env = parallel_env self._observation_spec = tf_agents_gym_wrapper.spec_from_gym_space( self._parallel_env.observation_space, spec_dtype_map, simplify_box_bounds, "observation") self._action_spec = tf_agents_gym_wrapper.spec_from_gym_space( self._parallel_env.action_space, spec_dtype_map, simplify_box_bounds, "action") self._time_step_spec = ts.time_step_spec(self._observation_spec, self.reward_spec()) self._flatten = flatten self._discount = discount self._dones = [True] * num_envs # Contains "done"s for all subenvs.
def _observation_space_fixture(gym_space_bound, gym_space_shape): gym_space = gym.spaces.Box(low=-gym_space_bound, high=gym_space_bound, shape=gym_space_shape, dtype=np.float32) return tensor_spec.from_spec( spec_from_gym_space(gym_space, name="observation"))
def test_spec_from_gym_space_dict(self): dict_space = gym.spaces.Dict([ ('spec_2', gym.spaces.Box(-1.0, 1.0, (3, 4))), ('spec_1', gym.spaces.Discrete(2)), ]) spec = gym_wrapper.spec_from_gym_space(dict_space) keys = list(spec.keys()) self.assertEqual('spec_1', keys[1]) self.assertEqual(2, len(spec)) self.assertEqual((), spec['spec_1'].shape) self.assertEqual(np.int64, spec['spec_1'].dtype) self.assertEqual(0, spec['spec_1'].minimum) self.assertEqual(1, spec['spec_1'].maximum) self.assertEqual('spec_2', keys[0]) self.assertEqual((3, 4), spec['spec_2'].shape) self.assertEqual(np.float32, spec['spec_2'].dtype) np.testing.assert_array_almost_equal( -np.ones((3, 4)), spec['spec_2'].minimum, ) np.testing.assert_array_almost_equal( np.ones((3, 4)), spec['spec_2'].maximum, )
def test_spec_from_gym_space_dtype_map(self): class Box(gym.spaces.Box): """Box space without the dtype property.""" def __init__(self, *args, **kwargs): super(Box, self).__init__(*args, **kwargs) del self.dtype tuple_space = gym.spaces.Tuple(( gym.spaces.Discrete(2), Box(0, 1, (3, 4)), gym.spaces.Tuple((gym.spaces.Discrete(2), gym.spaces.Discrete(3))), gym.spaces.Dict({ 'spec_1': gym.spaces.Discrete(2), 'spec_2': gym.spaces.Tuple(( gym.spaces.Discrete(2), Box(0, 1, (3, 4)), )), }), )) dtype_map = {gym.spaces.Discrete: np.uint8, gym.spaces.Box: np.uint16} spec = gym_wrapper.spec_from_gym_space(tuple_space, dtype_map=dtype_map) self.assertEqual(np.uint8, spec[0].dtype) self.assertEqual(np.uint16, spec[1].dtype) self.assertEqual(np.uint8, spec[2][0].dtype) self.assertEqual(np.uint8, spec[2][1].dtype) self.assertEqual(np.uint8, spec[3]['spec_1'].dtype) self.assertEqual(np.uint8, spec[3]['spec_2'][0].dtype) self.assertEqual(np.uint16, spec[3]['spec_2'][1].dtype)
def test_spec_from_gym_space_multi_binary(self): multi_binary_space = gym.spaces.MultiBinary(4) spec = gym_wrapper.spec_from_gym_space(multi_binary_space) self.assertEqual((4,), spec.shape) self.assertEqual(np.int32, spec.dtype) np.testing.assert_array_equal(np.array([0], dtype=np.int), spec.minimum) np.testing.assert_array_equal(np.array([1], dtype=np.int), spec.maximum)
def test_spec_from_gym_space_box_scalars_simplify_bounds(self): box_space = gym.spaces.Box(-1.0, 1.0, (3, 4)) spec = gym_wrapper.spec_from_gym_space(box_space, simplify_box_bounds=True) self.assertEqual((3, 4), spec.shape) self.assertEqual(np.float32, spec.dtype) np.testing.assert_array_equal(np.array([-1], dtype=np.int), spec.minimum) np.testing.assert_array_equal(np.array([1], dtype=np.int), spec.maximum)
def _create_spec(self, policy_info_spec): observation_spec = gym_wrapper.spec_from_gym_space( self._env.observation_space) action_spec = gym_wrapper.spec_from_gym_space( self._env.action_space) tf_agents_time_step_spec = time_step.time_step_spec(observation_spec) step_num_spec = specs.tensor_spec.from_spec( specs.BoundedArraySpec([], dtype=np.int64, minimum=0, maximum=self._episode_step_limit, name='step_num')) return EnvStep(tf_agents_time_step_spec.step_type, step_num_spec, observation_spec, action_spec, tf_agents_time_step_spec.reward, tf_agents_time_step_spec.discount, policy_info_spec, {}, {})
def test_spec_from_gym_space_discrete(self): discrete_space = gym.spaces.Discrete(3) spec = gym_wrapper.spec_from_gym_space(discrete_space) self.assertEqual((), spec.shape) self.assertEqual(np.int64, spec.dtype) self.assertEqual(0, spec.minimum) self.assertEqual(2, spec.maximum)
def test_spec_from_gym_space_tuple_mixed(self): tuple_space = gym.spaces.Tuple(( gym.spaces.Discrete(2), gym.spaces.Box(-1.0, 1.0, (3, 4)), gym.spaces.Tuple((gym.spaces.Discrete(2), gym.spaces.Discrete(3))), gym.spaces.Dict({ 'spec_1': gym.spaces.Discrete(2), 'spec_2': gym.spaces.Tuple( (gym.spaces.Discrete(2), gym.spaces.Discrete(3))), }), )) spec = gym_wrapper.spec_from_gym_space(tuple_space) self.assertEqual(4, len(spec)) # Test Discrete self.assertEqual((), spec[0].shape) self.assertEqual(np.int64, spec[0].dtype) self.assertEqual(0, spec[0].minimum) self.assertEqual(1, spec[0].maximum) # Test Box self.assertEqual((3, 4), spec[1].shape) self.assertEqual(np.float32, spec[1].dtype) np.testing.assert_array_almost_equal(-np.ones((3, 4)), spec[1].minimum) np.testing.assert_array_almost_equal(np.ones((3, 4)), spec[1].maximum) # Test Tuple self.assertEqual(2, len(spec[2])) self.assertEqual((), spec[2][0].shape) self.assertEqual(np.int64, spec[2][0].dtype) self.assertEqual(0, spec[2][0].minimum) self.assertEqual(1, spec[2][0].maximum) self.assertEqual((), spec[2][1].shape) self.assertEqual(np.int64, spec[2][1].dtype) self.assertEqual(0, spec[2][1].minimum) self.assertEqual(2, spec[2][1].maximum) # Test Dict # Test Discrete in Dict discrete_in_dict = spec[3]['spec_1'] self.assertEqual((), discrete_in_dict.shape) self.assertEqual(np.int64, discrete_in_dict.dtype) self.assertEqual(0, discrete_in_dict.minimum) self.assertEqual(1, discrete_in_dict.maximum) # Test Tuple in Dict tuple_in_dict = spec[3]['spec_2'] self.assertEqual(2, len(tuple_in_dict)) self.assertEqual((), tuple_in_dict[0].shape) self.assertEqual(np.int64, tuple_in_dict[0].dtype) self.assertEqual(0, tuple_in_dict[0].minimum) self.assertEqual(1, tuple_in_dict[0].maximum) self.assertEqual((), tuple_in_dict[1].shape) self.assertEqual(np.int64, tuple_in_dict[1].dtype) self.assertEqual(0, tuple_in_dict[1].minimum) self.assertEqual(2, tuple_in_dict[1].maximum)
def test_spec_from_gym_space_box_scalars(self): for dtype in (np.float32, np.float64): box_space = gym.spaces.Box(-1.0, 1.0, (3, 4), dtype=dtype) spec = gym_wrapper.spec_from_gym_space(box_space) self.assertEqual((3, 4), spec.shape) self.assertEqual(dtype, spec.dtype) np.testing.assert_array_equal(-np.ones((3, 4)), spec.minimum) np.testing.assert_array_equal(np.ones((3, 4)), spec.maximum)
def _action_space_latent_obs_fixture(gym_space_bound, gym_space_shape_latent_obs): gym_space = gym.spaces.Box( low=-gym_space_bound, high=gym_space_bound, shape=gym_space_shape_latent_obs, dtype=np.float32, ) return tensor_spec.from_spec(spec_from_gym_space(gym_space, name="action"))
def test_spec_from_gym_space_multi_discrete(self): multi_discrete_space = gym.spaces.MultiDiscrete([1, 2, 3, 4]) spec = gym_wrapper.spec_from_gym_space(multi_discrete_space) self.assertEqual((4,), spec.shape) self.assertEqual(np.int32, spec.dtype) np.testing.assert_array_equal(np.array([0], dtype=np.int), spec.minimum) np.testing.assert_array_equal( np.array([0, 1, 2, 3], dtype=np.int), spec.maximum)
def test_spec_from_gym_space_box_array(self): for dtype in (np.float32, np.float64): box_space = gym.spaces.Box(np.array([-1.0, -2.0]), np.array([2.0, 4.0]), dtype=dtype) spec = gym_wrapper.spec_from_gym_space(box_space) self.assertEqual((2,), spec.shape) self.assertEqual(dtype, spec.dtype) np.testing.assert_array_equal(np.array([-1.0, -2.0]), spec.minimum) np.testing.assert_array_equal(np.array([2.0, 4.0]), spec.maximum)
def test_spec_from_gym_space_box_array_constant_min(self): for dtype in (np.float32, np.float64): box_space = gym.spaces.Box( np.array([-1.0, -1.0]), np.array([2.0, 4.0]), dtype=dtype) spec = gym_wrapper.spec_from_gym_space(box_space) self.assertEqual((2,), spec.shape) self.assertEqual(dtype, spec.dtype) self.assertAllEqual([-1., -1.], spec.minimum) self.assertAllEqual([2., 4.], spec.maximum)
def __init__(self, gym_env, discount=1.0, spec_dtype_map=None, match_obs_space_dtype=True, auto_reset=False, simplify_box_bounds=True): super(AdversarialGymWrapper, self).__init__( gym_env, discount, spec_dtype_map, match_obs_space_dtype, auto_reset, simplify_box_bounds) self.adversary_observation_spec = gym_wrapper.spec_from_gym_space( self._gym_env.adversary_observation_space, name='observation') self.adversary_action_spec = gym_wrapper.spec_from_gym_space( self._gym_env.adversary_action_space, name='action') self.adversary_time_step_spec = ts_lib.time_step_spec( self.adversary_observation_spec, self.reward_spec()) self.adversary_flat_obs_spec = tf.nest.flatten( self.adversary_observation_spec)
def __init__(self, gym_env, discount=1.0, spec_dtype_map=None, match_obs_space_dtype=True, auto_reset=False, simplify_box_bounds=True): # Do not allow parent class to handle auto_reset because it will call the # wrong reset function self._adversarial_auto_reset = auto_reset super(AdversarialGymWrapper, self).__init__(gym_env, discount, spec_dtype_map, match_obs_space_dtype, False, simplify_box_bounds) self.adversary_observation_spec = gym_wrapper.spec_from_gym_space( self._gym_env.adversary_observation_space, name='observation') self.adversary_action_spec = gym_wrapper.spec_from_gym_space( self._gym_env.adversary_action_space, name='action') self.adversary_time_step_spec = ts_lib.time_step_spec( self.adversary_observation_spec, self.reward_spec()) self.adversary_flat_obs_spec = tf.nest.flatten( self.adversary_observation_spec)
def test_spec_name_nested(self): dict_space = gym.spaces.Tuple((gym.spaces.Dict({ 'spec_0': gym.spaces.Dict({ 'spec_1': gym.spaces.Discrete(2), 'spec_2': gym.spaces.Discrete(2), }), }), gym.spaces.Discrete(2))) spec = gym_wrapper.spec_from_gym_space(dict_space, name='observation') self.assertEqual('observation/tuple_0/spec_0/spec_1', spec[0]['spec_0']['spec_1'].name) self.assertEqual('observation/tuple_0/spec_0/spec_2', spec[0]['spec_0']['spec_2'].name) self.assertEqual('observation/tuple_1', spec[1].name)
def test_spec_from_gym_space_tuple(self): tuple_space = gym.spaces.Tuple( (gym.spaces.Discrete(2), gym.spaces.Discrete(3))) spec = gym_wrapper.spec_from_gym_space(tuple_space) self.assertEqual(2, len(spec)) self.assertEqual((), spec[0].shape) self.assertEqual(np.int64, spec[0].dtype) self.assertEqual(0, spec[0].minimum) self.assertEqual(1, spec[0].maximum) self.assertEqual((), spec[1].shape) self.assertEqual(np.int64, spec[1].dtype) self.assertEqual(0, spec[1].minimum) self.assertEqual(2, spec[1].maximum)
def test_spec_from_gym_space_when_simplify_box_bounds_false(self): # testing on gym.spaces.Dict which makes recursive calls to # _spec_from_gym_space box_space = gym.spaces.Box(-1.0, 1.0, (2, )) dict_space = gym.spaces.Dict({'box1': box_space, 'box2': box_space}) spec = gym_wrapper.spec_from_gym_space(dict_space, simplify_box_bounds=False) self.assertEqual((2, ), spec['box1'].shape) self.assertEqual((2, ), spec['box2'].shape) self.assertEqual(np.float32, spec['box1'].dtype) self.assertEqual(np.float32, spec['box2'].dtype) self.assertEqual('box1', spec['box1'].name) self.assertEqual('box2', spec['box2'].name) np.testing.assert_array_equal(np.array([-1, -1], dtype=np.int), spec['box1'].minimum) np.testing.assert_array_equal(np.array([1, 1], dtype=np.int), spec['box1'].maximum) np.testing.assert_array_equal(np.array([-1, -1], dtype=np.int), spec['box2'].minimum) np.testing.assert_array_equal(np.array([1, 1], dtype=np.int), spec['box2'].maximum)
def test_spec_name(self): box_space = gym.spaces.Box(np.array([-1.0, -2.0]), np.array([2.0, 4.0]), dtype=np.float32) spec = gym_wrapper.spec_from_gym_space(box_space, name='observation') self.assertEqual('observation', spec.name)