Ejemplo n.º 1
0
 def test_reraise_exception_in_step(self):
   constructor = functools.partial(MockEnvironmentCrashInStep, crash_at_step=3)
   env = parallel_py_environment.ProcessPyEnvironment(constructor)
   env.start()
   env.reset()
   action_spec = env.action_spec()
   rng = np.random.RandomState()
   env.step(array_spec.sample_bounded_spec(action_spec, rng))
   env.step(array_spec.sample_bounded_spec(action_spec, rng))
   with self.assertRaises(Exception):
     env.step(array_spec.sample_bounded_spec(action_spec, rng))
Ejemplo n.º 2
0
    def test_step(self):
        num_envs = 2
        env = self._make_parallel_py_environment(num_envs=num_envs)
        action_spec = env.action_spec()
        observation_spec = env.observation_spec()
        rng = np.random.RandomState()
        action = np.array([
            array_spec.sample_bounded_spec(action_spec, rng)
            for _ in range(num_envs)
        ])
        env.reset()

        # Take one step and assert observation is batched the right way.
        time_step = env.step(action)
        self.assertEqual(num_envs, time_step.observation.shape[0])
        self.assertAllEqual(observation_spec.shape,
                            time_step.observation.shape[1:])
        self.assertEqual(num_envs, action.shape[0])
        self.assertAllEqual(action_spec.shape, action.shape[1:])

        # Take another step and assert that observations have the same shape.
        time_step2 = env.step(action)
        self.assertAllEqual(time_step.observation.shape,
                            time_step2.observation.shape)
        env.close()
Ejemplo n.º 3
0
  def test_batch_env(self):
    """Test batched version of the environment."""
    obs_spec = collections.OrderedDict({
        'obs1': array_spec.ArraySpec((1,), np.int32),
        'obs2': array_spec.ArraySpec((2,), np.int32),
    })
    action_spec = array_spec.BoundedArraySpec((), np.int32, -10, 10)

    # Generate a randomy py environment with batch size.
    batch_size = 4
    env = random_py_environment.RandomPyEnvironment(
        obs_spec, action_spec=action_spec, batch_size=batch_size)
    env = MockGoalReplayEnvWrapper(env)
    random_action = array_spec.sample_bounded_spec(action_spec,
                                                   np.random.RandomState())

    time_step = env.step(random_action)
    self.assertIsInstance(time_step.observation, dict)
    observation = cast(Mapping[Text, Any], time_step.observation)
    observation_spec = cast(Mapping[Text, Any], env.observation_spec())
    self.assertEqual(observation.keys(),
                     observation_spec.keys())
    time_step = env.reset()
    self.assertIsInstance(time_step.observation, dict)
    observation = cast(Mapping[Text, Any], time_step.observation)
    self.assertEqual(observation.keys(),
                     observation_spec.keys())
Ejemplo n.º 4
0
  def test_with_varying_observation_filters(self, observations_to_keep):
    """Vary the observations to save from the environment."""
    obs_spec = collections.OrderedDict({
        'obs1': array_spec.ArraySpec((1,), np.int32),
        'obs2': array_spec.ArraySpec((2,), np.int32),
        'obs3': array_spec.ArraySpec((3,), np.int32)
    })

    observations_to_keep = np.array([observations_to_keep]).flatten()
    action_spec = array_spec.BoundedArraySpec((), np.int32, -10, 10)

    env = random_py_environment.RandomPyEnvironment(
        obs_spec, action_spec=action_spec)
    # Create the wrapper with list of observations to keep before packing it
    # into one dimension.
    env = wrappers.FlattenObservationsWrapper(
        env, observations_allowlist=observations_to_keep)
    time_step = env.step(
        array_spec.sample_bounded_spec(action_spec, np.random.RandomState()))
    # The expected shape is the sum of observation lengths in the observation
    # spec that has been filtered by the observations_to_keep list.
    expected_shape = self._get_expected_shape(obs_spec, observations_to_keep)
    # Test the expected shape of observations returned from stepping the
    # environment and additionally, check the environment spec.
    self.assertEqual(time_step.observation.shape, expected_shape)
    self.assertEqual(
        env.observation_spec(),
        array_spec.ArraySpec(
            shape=expected_shape, dtype=np.int32, name='packed_observations'))
Ejemplo n.º 5
0
  def test_batch_env(self):
    """Vary the observation spec and step the environment."""
    obs_spec = collections.OrderedDict({
        'obs1': array_spec.ArraySpec((1,), np.int32),
        'obs2': array_spec.ArraySpec((2,), np.int32),
    })

    action_spec = array_spec.BoundedArraySpec((), np.int32, -10, 10)

    # Generate a randomy py environment with batch size.
    batch_size = 4
    env = random_py_environment.RandomPyEnvironment(
        obs_spec, action_spec=action_spec, batch_size=batch_size)

    env = wrappers.FlattenObservationsWrapper(env)
    time_step = env.step(
        array_spec.sample_bounded_spec(action_spec, np.random.RandomState()))

    expected_shape = self._get_expected_shape(obs_spec, obs_spec.keys())
    self.assertEqual(time_step.observation.shape,
                     (batch_size, expected_shape[0]))
    self.assertEqual(
        env.observation_spec(),
        array_spec.ArraySpec(
            shape=expected_shape, dtype=np.int32, name='packed_observations'))
Ejemplo n.º 6
0
  def test_with_varying_observation_specs(
      self, observation_keys, observation_shapes, observation_dtypes):
    """Vary the observation spec and step the environment."""
    obs_spec = collections.OrderedDict()
    for idx, key in enumerate(observation_keys):
      obs_spec[key] = array_spec.ArraySpec(observation_shapes[idx],
                                           observation_dtypes)
    action_spec = array_spec.BoundedArraySpec((), np.int32, -10, 10)

    env = random_py_environment.RandomPyEnvironment(
        obs_spec, action_spec=action_spec)
    env = MockGoalReplayEnvWrapper(env)
    random_action = array_spec.sample_bounded_spec(action_spec,
                                                   np.random.RandomState())
    time_step = env.step(random_action)
    self.assertIsInstance(time_step.observation, dict)
    observation = cast(Mapping[Text, Any], time_step.observation)
    observation_spec = cast(Mapping[Text, Any], env.observation_spec())
    self.assertEqual(observation.keys(),
                     observation_spec.keys())
    time_step = env.reset()
    self.assertIsInstance(time_step.observation, dict)
    observation = cast(Mapping[Text, Any], time_step.observation)
    self.assertEqual(observation.keys(),
                     observation_spec.keys())
Ejemplo n.º 7
0
 def test_close_no_hang_after_step(self):
   constructor = functools.partial(
       random_py_environment.RandomPyEnvironment,
       array_spec.ArraySpec((3, 3), np.float32),
       array_spec.BoundedArraySpec([1], np.float32, minimum=-1.0, maximum=1.0),
       episode_end_probability=0,
       min_duration=5,
       max_duration=5)
   rng = np.random.RandomState()
   env = parallel_py_environment.ProcessPyEnvironment(constructor)
   env.start()
   action_spec = env.action_spec()
   env.reset()
   env.step(array_spec.sample_bounded_spec(action_spec, rng))
   env.step(array_spec.sample_bounded_spec(action_spec, rng))
   env.close()
Ejemplo n.º 8
0
    def test_with_varying_observation_specs(self, observation_keys,
                                            observation_shapes,
                                            observation_dtypes):
        """Vary the observation spec and step the environment."""
        obs_spec = collections.OrderedDict()
        for idx, key in enumerate(observation_keys):
            obs_spec[key] = array_spec.ArraySpec(observation_shapes[idx],
                                                 observation_dtypes)
        action_spec = array_spec.BoundedArraySpec((), np.int32, -10, 10)

        env = random_py_environment.RandomPyEnvironment(
            obs_spec, action_spec=action_spec)
        env = wrappers.FlattenObservationsWrapper(env)
        time_step = env.step(
            array_spec.sample_bounded_spec(action_spec,
                                           np.random.RandomState()))
        # Check that all observations returned from environment is packed into one
        # dimension.
        expected_shape = self._get_expected_shape(obs_spec, obs_spec.keys())
        self.assertEqual(time_step.observation.shape, expected_shape)
        self.assertEqual(
            env.observation_spec(),
            array_spec.ArraySpec(shape=expected_shape,
                                 dtype=observation_dtypes,
                                 name='packed_observations'))
Ejemplo n.º 9
0
 def testSampleFloat64FullRange(self):
   rng = np.random.RandomState()
   spec = array_spec.BoundedArraySpec(
       (100, 10, 10), np.float64, minimum=0, maximum=100)
   sample = array_spec.sample_bounded_spec(spec, rng)
   self.assertTupleEqual((100, 10, 10), sample.shape)
   self.assertFalse(np.any(np.isinf(sample)))
   hist, _ = np.histogram(sample, bins=100, range=(0, 100))
   self.assertTrue(np.all(hist > 0))
Ejemplo n.º 10
0
 def testSampleUint64SmallRange(self):
   self.skipTest("TODO(oars): Fix this test.")
   rng = np.random.RandomState()
   spec = array_spec.BoundedArraySpec(
       (100, 10, 10), np.uint64, minimum=0, maximum=100)
   sample = array_spec.sample_bounded_spec(spec, rng)
   self.assertTupleEqual((100, 10, 10), sample.shape)
   hist, _ = np.histogram(sample, bins=100, range=(0, 100))
   self.assertTrue(np.all(hist > 0))
Ejemplo n.º 11
0
 def testSampleInt8(self):
   self.skipTest("TODO(oars): Fix this test.")
   rng = np.random.RandomState()
   spec = array_spec.BoundedArraySpec(
       (100, 10, 10), np.int8, minimum=-128, maximum=127)
   sample = array_spec.sample_bounded_spec(spec, rng)
   self.assertTupleEqual((100, 10, 10), sample.shape)
   hist, _ = np.histogram(sample, bins=256, range=(-128, 127))
   self.assertTrue(np.all(hist > 0))
Ejemplo n.º 12
0
 def testSampleInt64FullRange(self):
   rng = np.random.RandomState()
   spec = array_spec.BoundedArraySpec(
       (100, 10, 10),
       np.int64,
       minimum=np.iinfo(np.int64).min,
       maximum=np.iinfo(np.int64).max)
   sample = array_spec.sample_bounded_spec(spec, rng)
   self.assertTupleEqual((100, 10, 10), sample.shape)
   hist, _ = np.histogram(sample, bins=100, range=(np.iinfo(np.int64).min / 2,
                                                   np.iinfo(np.int64).max / 2))
   self.assertTrue(np.all(hist > 0))
  def test_unstack_actions(self):
    num_envs = 5
    action_spec = self.action_spec
    rng = np.random.RandomState()
    batched_action = np.array([
        array_spec.sample_bounded_spec(action_spec, rng)
        for _ in range(num_envs)
    ])

    # Test that actions are correctly unstacked when just batched in np.array.
    unstacked_actions = batched_py_environment.unstack_actions(batched_action)
    for action in unstacked_actions:
      self.assertAllEqual(action_spec.shape, action.shape)
Ejemplo n.º 14
0
    def test_unstack_actions(self):
        num_envs = 2
        env = self._make_parallel_py_environment(num_envs=num_envs)
        action_spec = env.action_spec()
        rng = np.random.RandomState()
        batched_action = np.array([
            array_spec.sample_bounded_spec(action_spec, rng)
            for _ in range(num_envs)
        ])

        # Test that actions are correctly unstacked when just batched in np.array.
        unstacked_actions = env._unstack_actions(batched_action)
        for action in unstacked_actions:
            self.assertAllEqual(action_spec.shape, action.shape)
        env.close()
Ejemplo n.º 15
0
 def test_get_info_gym_env(self, multithreading):
     num_envs = 5
     rng = np.random.RandomState()
     gym_env = self._make_batched_mock_gym_py_environment(multithreading,
                                                          num_envs=num_envs)
     gym_env.reset()
     info = gym_env.get_info()
     self.assertEqual(info, {})
     action = np.stack([
         array_spec.sample_bounded_spec(self.action_spec, rng)
         for _ in range(num_envs)
     ])
     gym_env.step(action)
     info = gym_env.get_info()
     self.assertAllEqual(info['last_action'], action)
     gym_env.close()
  def test_unstack_nested_actions(self):
    num_envs = 5
    action_spec = self.action_spec
    rng = np.random.RandomState()
    batched_action = np.array([
        array_spec.sample_bounded_spec(action_spec, rng)
        for _ in range(num_envs)
    ])

    # Test that actions are correctly unstacked when nested in namedtuple.
    class NestedAction(
        collections.namedtuple('NestedAction', ['action', 'other_var'])):
      pass

    nested_action = NestedAction(
        action=batched_action, other_var=np.array([13.0] * num_envs))
    unstacked_actions = batched_py_environment.unstack_actions(nested_action)
    for nested_action in unstacked_actions:
      self.assertAllEqual(action_spec.shape, nested_action.action.shape)
      self.assertEqual(13.0, nested_action.other_var)
Ejemplo n.º 17
0
# agent = TFAgent()

print('Average Return: ', metric.result())



for _ in range(num_episodes):
    
    episode_reward = 0
    episode_steps = 0
    
    # for _ in range(1000):
    while not time_step.is_last():

        action_spec = env.action_spec()
        action = array_spec.sample_bounded_spec(action_spec, np.random.RandomState())
        action = action
        actions = tf.expand_dims(action, axis=0)

        time_step = tf_env.step(actions)

        episode_steps += 1
        episode_reward += time_step.reward.numpy()
        tf_env.render()
    
    rewards.append(episode_reward)
    steps.append(episode_steps)
    time_step = tf_env.reset()

num_steps = np.sum(steps)
avg_length = np.mean(steps)