Beispiel #1
0
 def testMode(self):
   temperature = 1.0
   logits = [.3, .1, .4]
   dist = gumbel_softmax.GumbelSoftmax(
       temperature, logits, validate_args=True)
   self.assertAllEqual(self.evaluate(dist.mode()),
                       self.evaluate(tf.constant([0, 0, 1])))
Beispiel #2
0
 def testSample(self):
   temperature = 0.8
   logits = [.3, .1, .4]
   dist = gumbel_softmax.GumbelSoftmax(
       temperature, logits, dtype=tf.int64, validate_args=True)
   actions = dist.convert_to_one_hot(dist.sample())
   self.assertEqual(actions.dtype, tf.int64)
   self.assertEqual(self.evaluate(tf.reduce_sum(actions, axis=-1)), 1)
Beispiel #3
0
 def testLogProb(self):
   temperature = 0.8
   logits = [.3, .1, .4]
   dist = gumbel_softmax.GumbelSoftmax(
       temperature, logits, validate_args=True)
   x = tf.constant([0, 0, 1])
   log_prob = self.evaluate(dist.log_prob(x))
   expected_log_prob = -0.972918868065
   self.assertAllClose(expected_log_prob, log_prob)