Beispiel #1
0
  def testComputeFeasibilityMask(self):
    observation_spec = tensor_spec.TensorSpec([2], tf.float32)
    time_step_spec = ts.time_step_spec(observation_spec)
    action_spec = tensor_spec.BoundedTensorSpec((), tf.int32, 0, 2)
    simple_constraint = SimpleConstraint(time_step_spec, action_spec)

    observations = tf.constant([[1, 2], [3, 4]], dtype=tf.float32)
    feasibility_prob = constraints.compute_feasibility_probability(
        observations, [simple_constraint], batch_size=2, num_actions=3,
        action_mask=None)
    self.assertAllEqual(0.5 * np.ones([2, 3]), self.evaluate(feasibility_prob))
Beispiel #2
0
  def testComputeFeasibilityMaskWithActionMask(self):
    observation_spec = tensor_spec.TensorSpec([2], tf.float32)
    time_step_spec = ts.time_step_spec(observation_spec)
    action_spec = tensor_spec.BoundedTensorSpec((), tf.int32, 0, 2)
    constraint_net = DummyNet(observation_spec, action_spec)
    neural_constraint = constraints.NeuralConstraint(
        time_step_spec,
        action_spec,
        constraint_network=constraint_net)

    observations = tf.constant([[1, 2], [3, 4]], dtype=tf.float32)
    action_mask = tf.constant([[0, 0, 1], [0, 1, 0]], dtype=tf.int32)
    feasibility_prob = constraints.compute_feasibility_probability(
        observations, [neural_constraint], batch_size=2, num_actions=3,
        action_mask=action_mask)
    self.assertAllEqual(self.evaluate(tf.cast(action_mask, tf.float32)),
                        self.evaluate(feasibility_prob))