def testComputeFeasibilityMask(self): observation_spec = tensor_spec.TensorSpec([2], tf.float32) time_step_spec = ts.time_step_spec(observation_spec) action_spec = tensor_spec.BoundedTensorSpec((), tf.int32, 0, 2) simple_constraint = SimpleConstraint(time_step_spec, action_spec) observations = tf.constant([[1, 2], [3, 4]], dtype=tf.float32) feasibility_prob = constraints.compute_feasibility_probability( observations, [simple_constraint], batch_size=2, num_actions=3, action_mask=None) self.assertAllEqual(0.5 * np.ones([2, 3]), self.evaluate(feasibility_prob))
def testComputeFeasibilityMaskWithActionMask(self): observation_spec = tensor_spec.TensorSpec([2], tf.float32) time_step_spec = ts.time_step_spec(observation_spec) action_spec = tensor_spec.BoundedTensorSpec((), tf.int32, 0, 2) constraint_net = DummyNet(observation_spec, action_spec) neural_constraint = constraints.NeuralConstraint( time_step_spec, action_spec, constraint_network=constraint_net) observations = tf.constant([[1, 2], [3, 4]], dtype=tf.float32) action_mask = tf.constant([[0, 0, 1], [0, 1, 0]], dtype=tf.int32) feasibility_prob = constraints.compute_feasibility_probability( observations, [neural_constraint], batch_size=2, num_actions=3, action_mask=action_mask) self.assertAllEqual(self.evaluate(tf.cast(action_mask, tf.float32)), self.evaluate(feasibility_prob))