Example #1
0
    def test_build(self, outer_dims):
        num_obs_dims = 5
        action_spec = tensor_spec.BoundedTensorSpec([1], tf.float32, 2., 3.)
        obs_spec = tensor_spec.TensorSpec([num_obs_dims], tf.float32)
        actor_net = actor_network.ActorNetwork(obs_spec, action_spec)

        obs = tf.random.uniform(list(outer_dims) + [num_obs_dims])
        actions, _ = actor_net(obs)
        self.assertAllEqual(actions.shape.as_list(),
                            list(outer_dims) + action_spec.shape.as_list())
Example #2
0
    def test_actions_within_range(self, outer_dims):
        num_obs_dims = 5
        obs_spec = tensor_spec.TensorSpec([num_obs_dims], tf.float32)
        action_spec = tensor_spec.BoundedTensorSpec([2, 3], tf.float32, 2., 3.)
        actor_net = actor_network.ActorNetwork(obs_spec, action_spec)

        obs = tf.random.uniform(list(outer_dims) + [num_obs_dims])
        actions, _ = actor_net(obs)
        actions_ = self.evaluate(actions)
        self.assertTrue(np.all(actions_ >= action_spec.minimum))
        self.assertTrue(np.all(actions_ <= action_spec.maximum))
Example #3
0
    def test_2d_action(self, outer_dims):
        num_obs_dims = 5
        obs_spec = tensor_spec.TensorSpec([num_obs_dims], tf.float32)
        action_spec = tensor_spec.BoundedTensorSpec([2, 3], tf.float32, 2., 3.)
        actor_net = actor_network.ActorNetwork(obs_spec, action_spec)

        obs = tf.random.uniform(list(outer_dims) + [num_obs_dims])
        actions, _ = actor_net(obs)
        self.assertAllEqual(actions.shape.as_list(),
                            list(outer_dims) + action_spec.shape.as_list())
        self.assertEqual(len(actor_net.trainable_variables), 2)
Example #4
0
    def test_handle_preprocessing_layers(self, outer_dims):
        observation_spec = (tensor_spec.TensorSpec([1], tf.float32),
                            tensor_spec.TensorSpec([], tf.float32))
        time_step_spec = ts.time_step_spec(observation_spec)
        time_step = tensor_spec.sample_spec_nest(time_step_spec,
                                                 outer_dims=outer_dims)

        action_spec = tensor_spec.BoundedTensorSpec((2, ), tf.float32, 2, 3)

        preprocessing_layers = (tf.keras.layers.Dense(4),
                                sequential_layer.SequentialLayer([
                                    tf.keras.layers.Reshape((1, )),
                                    tf.keras.layers.Dense(4)
                                ]))

        net = actor_network.ActorNetwork(
            observation_spec,
            action_spec,
            preprocessing_layers=preprocessing_layers,
            preprocessing_combiner=tf.keras.layers.Add())

        action, _ = net(time_step.observation, time_step.step_type, ())
        self.assertEqual(list(outer_dims) + [2], action.shape.as_list())
        self.assertGreater(len(net.trainable_variables), 4)