Beispiel #1
0
  def _action(self, time_step, policy_state, seed: Optional[types.Seed] = None):
    del seed  # Unused. Seed passed to the class.
    outer_dims = self._outer_dims
    if outer_dims is None:
      if self.time_step_spec.observation:
        outer_dims = nest_utils.get_outer_array_shape(
            time_step.observation, self.time_step_spec.observation)
      else:
        outer_dims = ()

    observation_and_action_constraint_splitter = (
        self.observation_and_action_constraint_splitter)

    if observation_and_action_constraint_splitter is not None:
      _, mask = observation_and_action_constraint_splitter(
          time_step.observation)

      zero_logits = tf.cast(tf.zeros_like(mask), tf.float32)
      masked_categorical = masked.MaskedCategorical(zero_logits, mask)
      random_action = tf.cast(
          masked_categorical.sample() + self.action_spec.minimum,
          self.action_spec.dtype)

      # If the action spec says each action should be shaped (1,), add another
      # dimension so the final shape is (B, 1) rather than (B,).
      if len(self.action_spec.shape) == 1:
        random_action = tf.expand_dims(random_action, axis=-1)
    else:
      random_action = array_spec.sample_spec_nest(
          self._action_spec, self._rng, outer_dims=outer_dims)

    info = array_spec.sample_spec_nest(
        self._info_spec, self._rng, outer_dims=outer_dims)

    return policy_step.PolicyStep(random_action, policy_state, info)
    def testArraySpecSample(self, dtype):
        spec = array_spec.ArraySpec((2, 3), dtype)
        sample = array_spec.sample_spec_nest(spec, self.rng)

        bounded = array_spec.BoundedArraySpec.from_spec(spec)
        self.assertTrue(np.all(sample >= bounded.minimum))
        self.assertTrue(np.all(sample <= bounded.maximum))
    def testSavedModel(self):
        path = os.path.join(self.get_temp_dir(), 'saved_policy')
        saver = policy_saver.PolicySaver(self.tf_policy)
        saver.save(path)

        eager_py_policy = py_tf_eager_policy.SavedModelPyTFEagerPolicy(
            path, self.time_step_spec, self.action_spec)
        rng = np.random.RandomState()
        sample_time_step = array_spec.sample_spec_nest(self.time_step_spec,
                                                       rng)
        batched_sample_time_step = nest_utils.batch_nested_array(
            sample_time_step)

        original_action = self.tf_policy.action(batched_sample_time_step)
        unbatched_original_action = nest_utils.unbatch_nested_tensors(
            original_action)
        original_action_np = tf.nest.map_structure(lambda t: t.numpy(),
                                                   unbatched_original_action)
        saved_policy_action = eager_py_policy.action(sample_time_step)

        tf.nest.assert_same_structure(saved_policy_action.action,
                                      self.action_spec)

        np.testing.assert_array_almost_equal(original_action_np.action,
                                             saved_policy_action.action)
 def testBoundedArraySpecNoBounds(self, dtype):
     spec = array_spec.ArraySpec((2, 3), dtype)
     bounded_spec = array_spec.BoundedArraySpec.from_spec(spec)
     sample = array_spec.sample_spec_nest(bounded_spec, self.rng)
     tf_dtype = tf.as_dtype(spec.dtype)
     self.assertTrue(np.all(sample >= tf_dtype.min))
     self.assertTrue(np.all(sample <= tf_dtype.max))
  def _action(self,
              time_step: ts.TimeStep,
              policy_state: types.NestedArray) -> ps.PolicyStep:
    random_action = array_spec.sample_spec_nest(
        self._action_spec, self._rng)

    return ps.PolicyStep(random_action, policy_state)
 def testBoundedArraySpecSampleMultipleBounds(self, dtype):
     spec = array_spec.BoundedArraySpec((2, ), dtype, [-10, 1], [10, 3])
     sample = array_spec.sample_spec_nest(spec, self.rng)
     self.assertGreaterEqual(sample[0], -10)
     self.assertLessEqual(sample[0], 10)
     self.assertGreaterEqual(sample[1], 1)
     self.assertLessEqual(sample[1], 3)
    def testNestSampleOuterDims(self, dtype):
        spec = example_nested_spec(dtype)
        outer_dims = [2, 3]
        sample = array_spec.sample_spec_nest(spec,
                                             self.rng,
                                             outer_dims=outer_dims)

        bounded = array_spec.BoundedArraySpec.from_spec(spec["array_spec_1"])
        self.assertTrue(np.all(sample["array_spec_1"] >= bounded.minimum))
        self.assertTrue(np.all(sample["array_spec_1"] <= bounded.maximum))

        self.assertTrue(np.all(sample["bounded_spec_1"] >= -10))
        self.assertTrue(np.all(sample["bounded_spec_1"] <= 10))

        self.assertIn("array_spec_2", sample["dict_spec"])
        self.assertIn("bounded_spec_2", sample["dict_spec"])

        self.assertIn("tuple_spec", sample)

        self.assertIn("list_spec", sample)
        self.assertTrue(np.all(sample["list_spec"][1][1] >= -10))
        self.assertTrue(np.all(sample["list_spec"][1][1] <= 10))

        def _test_batched_shape(sample_, spec_):
            self.assertSequenceEqual(sample_.shape,
                                     outer_dims + list(spec_.shape))

        tf.nest.map_structure(_test_batched_shape, sample, spec)
    def testArraySpecSampleWithName(self, dtype):
        spec = array_spec.ArraySpec((2, 3), dtype, name="test_spec")
        sample = array_spec.sample_spec_nest(spec, self.rng)

        bounded = array_spec.BoundedArraySpec.from_spec(spec)
        self.assertTrue(np.all(sample >= bounded.minimum))
        self.assertTrue(np.all(sample <= bounded.maximum))
        self.assertEqual("test_spec", bounded.name)
Beispiel #9
0
    def test_serialize_deserialize(self, dtype):
        spec = example_nested_spec(dtype)
        serializer = example_encoding.get_example_serializer(spec)
        decoder = example_encoding.get_example_decoder(spec)

        sample = array_spec.sample_spec_nest(spec, np.random.RandomState(0))
        example_proto = serializer(sample)

        recovered = self.evaluate(decoder(example_proto))
        tf.nest.map_structure(np.testing.assert_almost_equal, sample,
                              recovered)
Beispiel #10
0
  def _action(self, time_step, policy_state):
    outer_dims = self._outer_dims
    if outer_dims is None:
      if self.time_step_spec.observation:
        outer_dims = nest_utils.get_outer_array_shape(
            time_step.observation, self.time_step_spec.observation)
      else:
        outer_dims = ()

    random_action = array_spec.sample_spec_nest(
        self._action_spec, self._rng, outer_dims=outer_dims)
    return policy_step.PolicyStep(random_action, policy_state)
Beispiel #11
0
    def testInferenceFromCheckpoint(self):
        path = os.path.join(self.get_temp_dir(), 'saved_policy')
        saver = policy_saver.PolicySaver(self.tf_policy)
        saver.save(path)

        rng = np.random.RandomState()
        sample_time_step = array_spec.sample_spec_nest(self.time_step_spec,
                                                       rng)
        batched_sample_time_step = nest_utils.batch_nested_array(
            sample_time_step)

        self.evaluate(
            tf.nest.map_structure(lambda v: v.assign(v * 0 + -1),
                                  self.tf_policy.variables()))
        checkpoint_path = os.path.join(self.get_temp_dir(), 'checkpoint')
        saver.save_checkpoint(checkpoint_path)

        eager_py_policy = py_tf_eager_policy.SavedModelPyTFEagerPolicy(
            path, self.time_step_spec, self.action_spec)

        # Use evaluate to force a copy.
        saved_model_variables = self.evaluate(eager_py_policy.variables())

        checkpoint = tf.train.Checkpoint(policy=eager_py_policy._policy)
        manager = tf.train.CheckpointManager(checkpoint,
                                             directory=checkpoint_path,
                                             max_to_keep=None)

        eager_py_policy.update_from_checkpoint(manager.latest_checkpoint)

        assert_np_not_equal = lambda a, b: self.assertFalse(
            np.equal(a, b).all())
        tf.nest.map_structure(assert_np_not_equal, saved_model_variables,
                              self.evaluate(eager_py_policy.variables()))

        assert_np_all_equal = lambda a, b: self.assertTrue(
            np.equal(a, b).all())
        tf.nest.map_structure(assert_np_all_equal,
                              self.evaluate(self.tf_policy.variables()),
                              self.evaluate(eager_py_policy.variables()))

        # Can't check if the action is different as in some cases depending on
        # variable initialization it will be the same. Checking that they are at
        # least always the same.
        checkpoint_action = eager_py_policy.action(sample_time_step)

        current_policy_action = self.tf_policy.action(batched_sample_time_step)
        current_policy_action = self.evaluate(
            nest_utils.unbatch_nested_tensors(current_policy_action))
        tf.nest.map_structure(assert_np_all_equal, current_policy_action,
                              checkpoint_action)
Beispiel #12
0
  def testValidateOk(self):
    env = get_mock_env(self._action_spec, self._observation_spec, None)
    rng = np.random.RandomState()

    sample_fn = lambda: array_spec.sample_spec_nest(env.observation_spec(), rng)

    def step(unused_time_step):
      if rng.rand() < 0.10:
        return ts.termination(sample_fn(), 0.0)  # pytype: disable=wrong-arg-types
      else:
        return ts.transition(sample_fn(), 1.0)  # pytype: disable=wrong-arg-types

    env.step = step
    env.reset = lambda: ts.restart(sample_fn())

    utils.validate_py_environment(env, episodes=2)
Beispiel #13
0
  def testValidateBoundedSpecDistinctBounds(self):
    observation_spec = array_spec.BoundedArraySpec((3,), np.int32,
                                                   [-10, -5, -2], [10, 5, 2])
    env = get_mock_env(self._action_spec, observation_spec, None)
    rng = np.random.RandomState()
    sample_fn = lambda: array_spec.sample_spec_nest(env.observation_spec(), rng)

    def step(unused_time_step):
      if rng.rand() < 0.10:
        return ts.termination(sample_fn(), 0.0)  # pytype: disable=wrong-arg-types
      else:
        return ts.transition(sample_fn(), 1.0)  # pytype: disable=wrong-arg-types

    env.step = step
    env.reset = lambda: ts.restart(sample_fn())
    utils.validate_py_environment(env, episodes=1)
    def testSavedModel(self):
        if not common.has_eager_been_enabled():
            self.skipTest('Only supported in eager.')

        observation_spec = array_spec.ArraySpec([2], np.float32)
        action_spec = array_spec.BoundedArraySpec([1], np.float32, 2, 3)
        time_step_spec = ts.time_step_spec(observation_spec)

        observation_tensor_spec = tensor_spec.from_spec(observation_spec)
        action_tensor_spec = tensor_spec.from_spec(action_spec)
        time_step_tensor_spec = tensor_spec.from_spec(time_step_spec)

        actor_net = actor_network.ActorNetwork(
            observation_tensor_spec,
            action_tensor_spec,
            fc_layer_params=(10, ),
        )

        tf_policy = actor_policy.ActorPolicy(time_step_tensor_spec,
                                             action_tensor_spec,
                                             actor_network=actor_net)

        path = os.path.join(self.get_temp_dir(), 'saved_policy')
        saver = policy_saver.PolicySaver(tf_policy)
        saver.save(path)

        eager_py_policy = py_tf_eager_policy.SavedModelPyTFEagerPolicy(
            path, time_step_spec, action_spec)

        rng = np.random.RandomState()
        sample_time_step = array_spec.sample_spec_nest(time_step_spec, rng)
        batched_sample_time_step = nest_utils.batch_nested_array(
            sample_time_step)

        original_action = tf_policy.action(batched_sample_time_step)
        unbatched_original_action = nest_utils.unbatch_nested_tensors(
            original_action)
        original_action_np = tf.nest.map_structure(lambda t: t.numpy(),
                                                   unbatched_original_action)
        saved_policy_action = eager_py_policy.action(sample_time_step)

        tf.nest.assert_same_structure(saved_policy_action.action, action_spec)

        np.testing.assert_array_almost_equal(original_action_np.action,
                                             saved_policy_action.action)
    def testNestSample(self, dtype):
        spec = example_nested_spec(dtype)
        sample = array_spec.sample_spec_nest(spec, self.rng)

        bounded = array_spec.BoundedArraySpec.from_spec(spec["array_spec_1"])
        self.assertTrue(np.all(sample["array_spec_1"] >= bounded.minimum))
        self.assertTrue(np.all(sample["array_spec_1"] <= bounded.maximum))

        self.assertTrue(np.all(sample["bounded_spec_1"] >= -10))
        self.assertTrue(np.all(sample["bounded_spec_1"] <= 10))

        self.assertIn("array_spec_2", sample["dict_spec"])
        self.assertIn("bounded_spec_2", sample["dict_spec"])

        self.assertIn("tuple_spec", sample)

        self.assertIn("list_spec", sample)
        self.assertTrue(np.all(sample["list_spec"][1][1] >= -10))
        self.assertTrue(np.all(sample["list_spec"][1][1] <= 10))
Beispiel #16
0
    def _action(self, time_step, policy_state):

        outer_dims = self._outer_dims

        if outer_dims is None:
            if self.time_step_spec.observation:
                outer_dims = nest_utils.get_outer_array_shape(
                    time_step.observation, self.time_step_spec.observation)
            else:
                outer_dims = ()

        random_action = np.array([
            simple_human_policy(
                human_agent_wrapper(
                    self.__convert_tf_obs_to_numpy(
                        time_step.observation))).value
        ])

        info = array_spec.sample_spec_nest(self._info_spec,
                                           self._rng,
                                           outer_dims=outer_dims)

        return policy_step.PolicyStep(random_action, policy_state, info)
 def testBoundedArraySpecSample(self, dtype):
     spec = array_spec.BoundedArraySpec((2, 3), dtype, -10, 10)
     sample = array_spec.sample_spec_nest(spec, self.rng)
     self.assertTrue(np.all(sample >= -10))
     self.assertTrue(np.all(sample <= 10))
Beispiel #18
0
 def _get_observation(self):
     batch_size = (self._batch_size, ) if self._batch_size else ()
     return array_spec.sample_spec_nest(self._observation_spec, self._rng,
                                        batch_size)
 def testMatch(self, dtype):
     spec = example_nested_spec(dtype)
     sample = array_spec.sample_spec_nest(spec, np.random.RandomState())
     self.assertTrue(array_spec.check_arrays_nest(sample, spec))