def test_build(self, outer_dims): num_obs_dims = 5 action_spec = tensor_spec.BoundedTensorSpec([1], tf.float32, 2., 3.) obs_spec = tensor_spec.TensorSpec([num_obs_dims], tf.float32) actor_net = actor_network.ActorNetwork(obs_spec, action_spec) obs = tf.random.uniform(list(outer_dims) + [num_obs_dims]) actions, _ = actor_net(obs) self.assertAllEqual(actions.shape.as_list(), list(outer_dims) + action_spec.shape.as_list())
def test_actions_within_range(self, outer_dims): num_obs_dims = 5 obs_spec = tensor_spec.TensorSpec([num_obs_dims], tf.float32) action_spec = tensor_spec.BoundedTensorSpec([2, 3], tf.float32, 2., 3.) actor_net = actor_network.ActorNetwork(obs_spec, action_spec) obs = tf.random.uniform(list(outer_dims) + [num_obs_dims]) actions, _ = actor_net(obs) actions_ = self.evaluate(actions) self.assertTrue(np.all(actions_ >= action_spec.minimum)) self.assertTrue(np.all(actions_ <= action_spec.maximum))
def test_2d_action(self, outer_dims): num_obs_dims = 5 obs_spec = tensor_spec.TensorSpec([num_obs_dims], tf.float32) action_spec = tensor_spec.BoundedTensorSpec([2, 3], tf.float32, 2., 3.) actor_net = actor_network.ActorNetwork(obs_spec, action_spec) obs = tf.random.uniform(list(outer_dims) + [num_obs_dims]) actions, _ = actor_net(obs) self.assertAllEqual(actions.shape.as_list(), list(outer_dims) + action_spec.shape.as_list()) self.assertEqual(len(actor_net.trainable_variables), 2)
def test_handle_preprocessing_layers(self, outer_dims): observation_spec = (tensor_spec.TensorSpec([1], tf.float32), tensor_spec.TensorSpec([], tf.float32)) time_step_spec = ts.time_step_spec(observation_spec) time_step = tensor_spec.sample_spec_nest(time_step_spec, outer_dims=outer_dims) action_spec = tensor_spec.BoundedTensorSpec((2, ), tf.float32, 2, 3) preprocessing_layers = (tf.keras.layers.Dense(4), sequential_layer.SequentialLayer([ tf.keras.layers.Reshape((1, )), tf.keras.layers.Dense(4) ])) net = actor_network.ActorNetwork( observation_spec, action_spec, preprocessing_layers=preprocessing_layers, preprocessing_combiner=tf.keras.layers.Add()) action, _ = net(time_step.observation, time_step.step_type, ()) self.assertEqual(list(outer_dims) + [2], action.shape.as_list()) self.assertGreater(len(net.trainable_variables), 4)