def test_reraise_exception_in_step(self): constructor = functools.partial(MockEnvironmentCrashInStep, crash_at_step=3) env = parallel_py_environment.ProcessPyEnvironment(constructor) env.start() env.reset() action_spec = env.action_spec() rng = np.random.RandomState() env.step(array_spec.sample_bounded_spec(action_spec, rng)) env.step(array_spec.sample_bounded_spec(action_spec, rng)) with self.assertRaises(Exception): env.step(array_spec.sample_bounded_spec(action_spec, rng))
def test_step(self): num_envs = 2 env = self._make_parallel_py_environment(num_envs=num_envs) action_spec = env.action_spec() observation_spec = env.observation_spec() rng = np.random.RandomState() action = np.array([ array_spec.sample_bounded_spec(action_spec, rng) for _ in range(num_envs) ]) env.reset() # Take one step and assert observation is batched the right way. time_step = env.step(action) self.assertEqual(num_envs, time_step.observation.shape[0]) self.assertAllEqual(observation_spec.shape, time_step.observation.shape[1:]) self.assertEqual(num_envs, action.shape[0]) self.assertAllEqual(action_spec.shape, action.shape[1:]) # Take another step and assert that observations have the same shape. time_step2 = env.step(action) self.assertAllEqual(time_step.observation.shape, time_step2.observation.shape) env.close()
def test_batch_env(self): """Test batched version of the environment.""" obs_spec = collections.OrderedDict({ 'obs1': array_spec.ArraySpec((1,), np.int32), 'obs2': array_spec.ArraySpec((2,), np.int32), }) action_spec = array_spec.BoundedArraySpec((), np.int32, -10, 10) # Generate a randomy py environment with batch size. batch_size = 4 env = random_py_environment.RandomPyEnvironment( obs_spec, action_spec=action_spec, batch_size=batch_size) env = MockGoalReplayEnvWrapper(env) random_action = array_spec.sample_bounded_spec(action_spec, np.random.RandomState()) time_step = env.step(random_action) self.assertIsInstance(time_step.observation, dict) observation = cast(Mapping[Text, Any], time_step.observation) observation_spec = cast(Mapping[Text, Any], env.observation_spec()) self.assertEqual(observation.keys(), observation_spec.keys()) time_step = env.reset() self.assertIsInstance(time_step.observation, dict) observation = cast(Mapping[Text, Any], time_step.observation) self.assertEqual(observation.keys(), observation_spec.keys())
def test_with_varying_observation_filters(self, observations_to_keep): """Vary the observations to save from the environment.""" obs_spec = collections.OrderedDict({ 'obs1': array_spec.ArraySpec((1,), np.int32), 'obs2': array_spec.ArraySpec((2,), np.int32), 'obs3': array_spec.ArraySpec((3,), np.int32) }) observations_to_keep = np.array([observations_to_keep]).flatten() action_spec = array_spec.BoundedArraySpec((), np.int32, -10, 10) env = random_py_environment.RandomPyEnvironment( obs_spec, action_spec=action_spec) # Create the wrapper with list of observations to keep before packing it # into one dimension. env = wrappers.FlattenObservationsWrapper( env, observations_allowlist=observations_to_keep) time_step = env.step( array_spec.sample_bounded_spec(action_spec, np.random.RandomState())) # The expected shape is the sum of observation lengths in the observation # spec that has been filtered by the observations_to_keep list. expected_shape = self._get_expected_shape(obs_spec, observations_to_keep) # Test the expected shape of observations returned from stepping the # environment and additionally, check the environment spec. self.assertEqual(time_step.observation.shape, expected_shape) self.assertEqual( env.observation_spec(), array_spec.ArraySpec( shape=expected_shape, dtype=np.int32, name='packed_observations'))
def test_batch_env(self): """Vary the observation spec and step the environment.""" obs_spec = collections.OrderedDict({ 'obs1': array_spec.ArraySpec((1,), np.int32), 'obs2': array_spec.ArraySpec((2,), np.int32), }) action_spec = array_spec.BoundedArraySpec((), np.int32, -10, 10) # Generate a randomy py environment with batch size. batch_size = 4 env = random_py_environment.RandomPyEnvironment( obs_spec, action_spec=action_spec, batch_size=batch_size) env = wrappers.FlattenObservationsWrapper(env) time_step = env.step( array_spec.sample_bounded_spec(action_spec, np.random.RandomState())) expected_shape = self._get_expected_shape(obs_spec, obs_spec.keys()) self.assertEqual(time_step.observation.shape, (batch_size, expected_shape[0])) self.assertEqual( env.observation_spec(), array_spec.ArraySpec( shape=expected_shape, dtype=np.int32, name='packed_observations'))
def test_with_varying_observation_specs( self, observation_keys, observation_shapes, observation_dtypes): """Vary the observation spec and step the environment.""" obs_spec = collections.OrderedDict() for idx, key in enumerate(observation_keys): obs_spec[key] = array_spec.ArraySpec(observation_shapes[idx], observation_dtypes) action_spec = array_spec.BoundedArraySpec((), np.int32, -10, 10) env = random_py_environment.RandomPyEnvironment( obs_spec, action_spec=action_spec) env = MockGoalReplayEnvWrapper(env) random_action = array_spec.sample_bounded_spec(action_spec, np.random.RandomState()) time_step = env.step(random_action) self.assertIsInstance(time_step.observation, dict) observation = cast(Mapping[Text, Any], time_step.observation) observation_spec = cast(Mapping[Text, Any], env.observation_spec()) self.assertEqual(observation.keys(), observation_spec.keys()) time_step = env.reset() self.assertIsInstance(time_step.observation, dict) observation = cast(Mapping[Text, Any], time_step.observation) self.assertEqual(observation.keys(), observation_spec.keys())
def test_close_no_hang_after_step(self): constructor = functools.partial( random_py_environment.RandomPyEnvironment, array_spec.ArraySpec((3, 3), np.float32), array_spec.BoundedArraySpec([1], np.float32, minimum=-1.0, maximum=1.0), episode_end_probability=0, min_duration=5, max_duration=5) rng = np.random.RandomState() env = parallel_py_environment.ProcessPyEnvironment(constructor) env.start() action_spec = env.action_spec() env.reset() env.step(array_spec.sample_bounded_spec(action_spec, rng)) env.step(array_spec.sample_bounded_spec(action_spec, rng)) env.close()
def test_with_varying_observation_specs(self, observation_keys, observation_shapes, observation_dtypes): """Vary the observation spec and step the environment.""" obs_spec = collections.OrderedDict() for idx, key in enumerate(observation_keys): obs_spec[key] = array_spec.ArraySpec(observation_shapes[idx], observation_dtypes) action_spec = array_spec.BoundedArraySpec((), np.int32, -10, 10) env = random_py_environment.RandomPyEnvironment( obs_spec, action_spec=action_spec) env = wrappers.FlattenObservationsWrapper(env) time_step = env.step( array_spec.sample_bounded_spec(action_spec, np.random.RandomState())) # Check that all observations returned from environment is packed into one # dimension. expected_shape = self._get_expected_shape(obs_spec, obs_spec.keys()) self.assertEqual(time_step.observation.shape, expected_shape) self.assertEqual( env.observation_spec(), array_spec.ArraySpec(shape=expected_shape, dtype=observation_dtypes, name='packed_observations'))
def testSampleFloat64FullRange(self): rng = np.random.RandomState() spec = array_spec.BoundedArraySpec( (100, 10, 10), np.float64, minimum=0, maximum=100) sample = array_spec.sample_bounded_spec(spec, rng) self.assertTupleEqual((100, 10, 10), sample.shape) self.assertFalse(np.any(np.isinf(sample))) hist, _ = np.histogram(sample, bins=100, range=(0, 100)) self.assertTrue(np.all(hist > 0))
def testSampleUint64SmallRange(self): self.skipTest("TODO(oars): Fix this test.") rng = np.random.RandomState() spec = array_spec.BoundedArraySpec( (100, 10, 10), np.uint64, minimum=0, maximum=100) sample = array_spec.sample_bounded_spec(spec, rng) self.assertTupleEqual((100, 10, 10), sample.shape) hist, _ = np.histogram(sample, bins=100, range=(0, 100)) self.assertTrue(np.all(hist > 0))
def testSampleInt8(self): self.skipTest("TODO(oars): Fix this test.") rng = np.random.RandomState() spec = array_spec.BoundedArraySpec( (100, 10, 10), np.int8, minimum=-128, maximum=127) sample = array_spec.sample_bounded_spec(spec, rng) self.assertTupleEqual((100, 10, 10), sample.shape) hist, _ = np.histogram(sample, bins=256, range=(-128, 127)) self.assertTrue(np.all(hist > 0))
def testSampleInt64FullRange(self): rng = np.random.RandomState() spec = array_spec.BoundedArraySpec( (100, 10, 10), np.int64, minimum=np.iinfo(np.int64).min, maximum=np.iinfo(np.int64).max) sample = array_spec.sample_bounded_spec(spec, rng) self.assertTupleEqual((100, 10, 10), sample.shape) hist, _ = np.histogram(sample, bins=100, range=(np.iinfo(np.int64).min / 2, np.iinfo(np.int64).max / 2)) self.assertTrue(np.all(hist > 0))
def test_unstack_actions(self): num_envs = 5 action_spec = self.action_spec rng = np.random.RandomState() batched_action = np.array([ array_spec.sample_bounded_spec(action_spec, rng) for _ in range(num_envs) ]) # Test that actions are correctly unstacked when just batched in np.array. unstacked_actions = batched_py_environment.unstack_actions(batched_action) for action in unstacked_actions: self.assertAllEqual(action_spec.shape, action.shape)
def test_unstack_actions(self): num_envs = 2 env = self._make_parallel_py_environment(num_envs=num_envs) action_spec = env.action_spec() rng = np.random.RandomState() batched_action = np.array([ array_spec.sample_bounded_spec(action_spec, rng) for _ in range(num_envs) ]) # Test that actions are correctly unstacked when just batched in np.array. unstacked_actions = env._unstack_actions(batched_action) for action in unstacked_actions: self.assertAllEqual(action_spec.shape, action.shape) env.close()
def test_get_info_gym_env(self, multithreading): num_envs = 5 rng = np.random.RandomState() gym_env = self._make_batched_mock_gym_py_environment(multithreading, num_envs=num_envs) gym_env.reset() info = gym_env.get_info() self.assertEqual(info, {}) action = np.stack([ array_spec.sample_bounded_spec(self.action_spec, rng) for _ in range(num_envs) ]) gym_env.step(action) info = gym_env.get_info() self.assertAllEqual(info['last_action'], action) gym_env.close()
def test_unstack_nested_actions(self): num_envs = 5 action_spec = self.action_spec rng = np.random.RandomState() batched_action = np.array([ array_spec.sample_bounded_spec(action_spec, rng) for _ in range(num_envs) ]) # Test that actions are correctly unstacked when nested in namedtuple. class NestedAction( collections.namedtuple('NestedAction', ['action', 'other_var'])): pass nested_action = NestedAction( action=batched_action, other_var=np.array([13.0] * num_envs)) unstacked_actions = batched_py_environment.unstack_actions(nested_action) for nested_action in unstacked_actions: self.assertAllEqual(action_spec.shape, nested_action.action.shape) self.assertEqual(13.0, nested_action.other_var)
# agent = TFAgent() print('Average Return: ', metric.result()) for _ in range(num_episodes): episode_reward = 0 episode_steps = 0 # for _ in range(1000): while not time_step.is_last(): action_spec = env.action_spec() action = array_spec.sample_bounded_spec(action_spec, np.random.RandomState()) action = action actions = tf.expand_dims(action, axis=0) time_step = tf_env.step(actions) episode_steps += 1 episode_reward += time_step.reward.numpy() tf_env.render() rewards.append(episode_reward) steps.append(episode_steps) time_step = tf_env.reset() num_steps = np.sum(steps) avg_length = np.mean(steps)