def testCategoricalCategoricalKL(self): def np_softmax(logits): exp_logits = np.exp(logits) return exp_logits / exp_logits.sum(axis=-1, keepdims=True) with self.cached_session() as sess: for categories in [2, 4]: for batch_size in [1, 10]: a_logits = np.random.randn(batch_size, categories) b_logits = np.random.randn(batch_size, categories) a = categorical.Categorical(logits=a_logits) b = categorical.Categorical(logits=b_logits) kl = kullback_leibler.kl_divergence(a, b) kl_val = self.evaluate(kl) # Make sure KL(a||a) is 0 kl_same = sess.run(kullback_leibler.kl_divergence(a, a)) prob_a = np_softmax(a_logits) prob_b = np_softmax(b_logits) kl_expected = np.sum(prob_a * (np.log(prob_a) - np.log(prob_b)), axis=-1) self.assertEqual(kl.get_shape(), (batch_size, )) self.assertAllClose(kl_val, kl_expected) self.assertAllClose(kl_same, np.zeros_like(kl_expected))
def sample(self, time, outputs, state, name=None): if self._softmax_temperature is not None: outputs = outputs / self._softmax_temperature sampler = categorical.Categorical(logits=outputs) sample_ids = sampler.sample() return sample_ids
def testCDFWithDynamicEventShapeKnownNdims(self): """Test that dynamically-sized events with unknown shape work.""" batch_size = 2 histograms = array_ops.placeholder(dtype=dtypes.float32, shape=(batch_size, None)) event = array_ops.placeholder(dtype=dtypes.float32, shape=(batch_size, )) dist = categorical.Categorical(probs=histograms) cdf_op = dist.cdf(event) # Feed values into the placeholder with different shapes three classes. event_feed_one = [0, 1] histograms_feed_one = [[0.5, 0.3, 0.2], [1.0, 0.0, 0.0]] expected_cdf_one = [0.0, 1.0] feed_dict_one = { histograms: histograms_feed_one, event: event_feed_one } # six classes. event_feed_two = [2, 5] histograms_feed_two = [[0.9, 0.0, 0.0, 0.0, 0.0, 0.1], [0.15, 0.2, 0.05, 0.35, 0.13, 0.12]] expected_cdf_two = [0.9, 0.88] feed_dict_two = { histograms: histograms_feed_two, event: event_feed_two } with self.cached_session() as sess: actual_cdf_one = sess.run(cdf_op, feed_dict=feed_dict_one) actual_cdf_two = sess.run(cdf_op, feed_dict=feed_dict_two) self.assertAllClose(actual_cdf_one, expected_cdf_one) self.assertAllClose(actual_cdf_two, expected_cdf_two)
def testVarianceConsistentCovariance(self): gm = tfd.MixtureSameFamily( mixture_distribution=categorical_lib.Categorical(probs=[0.3, 0.7]), components_distribution=tfd.MultivariateNormalDiag( loc=[[-1., 1], [1, -1]], scale_identity_multiplier=[1., 0.5])) cov_, var_ = self.evaluate([gm.covariance(), gm.variance()]) self.assertAllClose(cov_.diagonal(), var_, atol=0.)
def testEntropyGradient(self): with self.cached_session() as sess: logits = constant_op.constant([[1., 2., 3.], [2., 5., 1.]]) probabilities = nn_ops.softmax(logits) log_probabilities = nn_ops.log_softmax(logits) true_entropy = -math_ops.reduce_sum( probabilities * log_probabilities, axis=-1) categorical_distribution = categorical.Categorical( probs=probabilities) categorical_entropy = categorical_distribution.entropy() # works true_entropy_g = gradients_impl.gradients(true_entropy, [logits]) categorical_entropy_g = gradients_impl.gradients( categorical_entropy, [logits]) res = sess.run({ "true_entropy": true_entropy, "categorical_entropy": categorical_entropy, "true_entropy_g": true_entropy_g, "categorical_entropy_g": categorical_entropy_g }) self.assertAllClose(res["true_entropy"], res["categorical_entropy"]) self.assertAllClose(res["true_entropy_g"], res["categorical_entropy_g"])
def testCDFBroadcasting(self): # shape: [batch=2, n_bins=3] histograms = [[0.2, 0.1, 0.7], [0.3, 0.45, 0.25]] # shape: [batch=3, batch=2] devent = [ [0, 0], [1, 1], [2, 2] ] dist = categorical.Categorical(probs=histograms) # We test that the probabilities are correctly broadcasted over the # additional leading batch dimension of size 3. expected_cdf_result = np.zeros((3, 2)) expected_cdf_result[0, 0] = 0 expected_cdf_result[0, 1] = 0 expected_cdf_result[1, 0] = 0.2 expected_cdf_result[1, 1] = 0.3 expected_cdf_result[2, 0] = 0.3 expected_cdf_result[2, 1] = 0.75 with self.test_session(): self.assertAllClose(dist.cdf(devent).eval(), expected_cdf_result)
def testSampleConsistentMeanCovariance(self): with self.test_session() as sess: gm = mixture_same_family_lib.MixtureSameFamily( mixture_distribution=categorical_lib.Categorical(probs=[0.3, 0.7]), components_distribution=mvn_diag_lib.MultivariateNormalDiag( loc=[[-1., 1], [1, -1]], scale_identity_multiplier=[1., 0.5])) self.run_test_sample_consistent_mean_covariance(sess.run, gm)
def testLogPMFBroadcasting(self): with self.cached_session(): # 1 x 2 x 2 histograms = [[[0.2, 0.8], [0.4, 0.6]]] dist = categorical.Categorical(math_ops.log(histograms) - 50.) prob = dist.prob(1) self.assertAllClose([[0.8, 0.6]], self.evaluate(prob)) prob = dist.prob([1]) self.assertAllClose([[0.8, 0.6]], self.evaluate(prob)) prob = dist.prob([0, 1]) self.assertAllClose([[0.2, 0.6]], self.evaluate(prob)) prob = dist.prob([[0, 1]]) self.assertAllClose([[0.2, 0.6]], self.evaluate(prob)) prob = dist.prob([[[0, 1]]]) self.assertAllClose([[[0.2, 0.6]]], self.evaluate(prob)) prob = dist.prob([[1, 0], [0, 1]]) self.assertAllClose([[0.8, 0.4], [0.2, 0.6]], self.evaluate(prob)) prob = dist.prob([[[1, 1], [1, 0]], [[1, 0], [0, 1]]]) self.assertAllClose( [[[0.8, 0.6], [0.8, 0.4]], [[0.8, 0.4], [0.2, 0.6]]], self.evaluate(prob))
def __init__(self, logits, targets=None, seed=None): dist = categorical.Categorical(logits=logits) self._logits = logits self._probs = dist.probs self._sqrt_probs = math_ops.sqrt(self._probs) super(CategoricalLogitsNegativeLogProbLoss, self).__init__( dist, targets=targets, seed=seed)
def testLogPMF(self): logits = np.log([[0.2, 0.8], [0.6, 0.4]]) - 50. dist = categorical.Categorical(logits) with self.cached_session(): self.assertAllClose( dist.log_prob([0, 1]).eval(), np.log([0.2, 0.4])) self.assertAllClose( dist.log_prob([0.0, 1.0]).eval(), np.log([0.2, 0.4]))
def testEntropyWithBatch(self): logits = np.log([[0.2, 0.8], [0.6, 0.4]]) - 50. dist = categorical.Categorical(logits) with self.cached_session(): self.assertAllClose(dist.entropy().eval(), [ -(0.2 * np.log(0.2) + 0.8 * np.log(0.8)), -(0.6 * np.log(0.6) + 0.4 * np.log(0.4)) ])
def testNotReparameterized(self): p = constant_op.constant([0.3, 0.3, 0.4]) with backprop.GradientTape() as tape: tape.watch(p) dist = categorical.Categorical(p) samples = dist.sample(100) grad_p = tape.gradient(samples, p) self.assertIsNone(grad_p)
def testVarianceConsistentCovariance(self): with self.cached_session() as sess: gm = mixture_same_family_lib.MixtureSameFamily( mixture_distribution=categorical_lib.Categorical(probs=[0.3, 0.7]), components_distribution=mvn_diag_lib.MultivariateNormalDiag( loc=[[-1., 1], [1, -1]], scale_identity_multiplier=[1., 0.5])) cov_, var_ = sess.run([gm.covariance(), gm.variance()]) self.assertAllClose(cov_.diagonal(), var_, atol=0.)
def testSampleAndLogProbMultivariateShapes(self): gm = tfd.MixtureSameFamily( mixture_distribution=categorical_lib.Categorical(probs=[0.3, 0.7]), components_distribution=tfd.MultivariateNormalDiag( loc=[[-1., 1], [1, -1]], scale_identity_multiplier=[1., 0.5])) x = gm.sample([4, 5], seed=42) log_prob_x = gm.log_prob(x) self.assertEqual([4, 5, 2], x.shape) self.assertEqual([4, 5], log_prob_x.shape)
def testSampleAndLogProbUnivariateShapes(self): gm = tfd.MixtureSameFamily( mixture_distribution=categorical_lib.Categorical(probs=[0.3, 0.7]), components_distribution=tf.distributions.Normal(loc=[-1., 1], scale=[0.1, 0.5])) x = gm.sample([4, 5], seed=42) log_prob_x = gm.log_prob(x) self.assertEqual([4, 5], x.shape) self.assertEqual([4, 5], log_prob_x.shape)
def testLogits(self): p = np.array([0.2, 0.8], dtype=np.float32) logits = np.log(p) - 50. dist = categorical.Categorical(logits=logits) with self.cached_session(): self.assertAllEqual([2], dist.probs.get_shape()) self.assertAllEqual([2], dist.logits.get_shape()) self.assertAllClose(dist.probs.eval(), p) self.assertAllClose(dist.logits.eval(), logits)
def testCDFNoBatch(self): histogram = [0.1, 0.2, 0.3, 0.4] event = 2 expected_cdf = 0.3 dist = categorical.Categorical(probs=histogram) cdf_op = dist.cdf(event) with self.cached_session(): self.assertAlmostEqual(cdf_op.eval(), expected_cdf)
def testCDFWithBatch(self): histograms = [[0.1, 0.2, 0.3, 0.25, 0.15], [0.0, 0.75, 0.2, 0.05, 0.0]] event = [0, 3] expected_cdf = [0.0, 0.95] dist = categorical.Categorical(probs=histograms) cdf_op = dist.cdf(event) with self.cached_session(): self.assertAllClose(cdf_op.eval(), expected_cdf)
def testSampleAndLogProbBatch(self): with self.cached_session(): gm = mixture_same_family_lib.MixtureSameFamily( mixture_distribution=categorical_lib.Categorical(probs=[[0.3, 0.7]]), components_distribution=normal_lib.Normal( loc=[[-1., 1]], scale=[[0.1, 0.5]])) x = gm.sample([4, 5], seed=42) log_prob_x = gm.log_prob(x) self.assertEqual([4, 5, 1], x.shape) self.assertEqual([4, 5, 1], log_prob_x.shape)
def testSampleConsistentLogProb(self): gm = tfd.MixtureSameFamily( mixture_distribution=categorical_lib.Categorical(probs=[0.3, 0.7]), components_distribution=tfd.MultivariateNormalDiag( loc=[[-1., 1], [1, -1]], scale_identity_multiplier=[1., 0.5])) # Ball centered at component0's mean. self.run_test_sample_consistent_log_prob( self.evaluate, gm, radius=1., center=[-1., 1], rtol=0.02) # Larger ball centered at component1's mean. self.run_test_sample_consistent_log_prob( self.evaluate, gm, radius=1., center=[1., -1], rtol=0.02)
def testLogPMFShapeNoBatch(self): histograms = [0.2, 0.8] dist = categorical.Categorical(math_ops.log(histograms)) log_prob = dist.log_prob(0) self.assertEqual(0, log_prob.get_shape().ndims) self.assertAllEqual([], log_prob.get_shape()) log_prob = dist.log_prob([[[1, 1], [1, 0]], [[1, 0], [0, 1]]]) self.assertEqual(3, log_prob.get_shape().ndims) self.assertAllEqual([2, 2, 2], log_prob.get_shape())
def sample(self, time, outputs, state, name=None): """sample for SyntacticGreedyEmbeddingHelper.""" del time, state # unused by sample_fn # Outputs are logits, use argmax to get the most probable id if not isinstance(outputs, ops.Tensor): raise TypeError("Expected outputs to be a single Tensor, got: %s" % type(outputs)) # Mask outputs to reduce candidates to syntatically correct ones. def mask_output(outputs, end_token): if len(self.previous_tokens) == 0: # when there is no previous token, skip masking. mask = np.zeros(outputs.shape, dtype=outputs.dtype) mask[:, self.dsl_syntax.token2int['DEF']] = 1 return mask tokens = np.stack(self.previous_tokens, axis=1) masks = [] for i in range(outputs.shape[0]): if tokens[i][-1] == end_token: next_tokens = [end_token] else: try: p_str = self.dsl_syntax.intseq2str(tokens[i]) next_tokens_with_counts = self.dsl_syntax.get_next_candidates( '{}'.format(p_str)) next_tokens = [t[0] for t in next_tokens_with_counts if t[1] <= self.max_program_len - len(tokens[i])] except: # TODO: this code rarely cause syntax error, which # should not happen. We should fix this in the future. next_tokens = [t for t in range(len(self.dsl_syntax.int2token))] else: next_tokens = [self.dsl_syntax.token2int[t] for t in next_tokens] mask = np.zeros([outputs.shape[1]], dtype=outputs.dtype) for t in next_tokens: mask[t] = 1 masks.append(mask) return np.stack(masks, axis=0) masks = tf.py_func(mask_output, [outputs, self._end_token], tf.float32) masks.set_shape(outputs.get_shape()) masked_outputs = tf.exp(outputs) * masks masked_probs = masked_outputs / \ tf.reduce_sum(masked_outputs, axis=1, keep_dims=True) sample_id_sampler = categorical.Categorical(probs=masked_probs) sample_ids = sample_id_sampler.sample(seed=self._seed) def add_sample_ids(sample_ids, masked_probs, masks): self.previous_tokens.append(sample_ids) self.previous_probs.append(masked_probs) self.previous_masks.append(masks) return sample_ids new_sample_ids = tf.py_func(add_sample_ids, [sample_ids, masked_probs, masks], tf.int32) new_sample_ids.set_shape(sample_ids.get_shape()) return new_sample_ids
def testSampleWithSampleShape(self): with self.test_session(): histograms = [[[0.2, 0.8], [0.4, 0.6]]] dist = categorical.Categorical(math_ops.log(histograms) - 50.) samples = dist.sample((100, 100), seed=123) prob = dist.prob(samples) prob_val = prob.eval() self.assertAllClose( [0.2**2 + 0.8**2], [prob_val[:, :, :, 0].mean()], atol=1e-2) self.assertAllClose( [0.4**2 + 0.6**2], [prob_val[:, :, :, 1].mean()], atol=1e-2)
def sample(self, time, outputs, state, name=None): """sample for SampleEmbeddingHelper.""" del time, state # unused by sample_fn # Outputs are logits, we sample instead of argmax (greedy). if not isinstance(outputs, ops.Tensor): raise TypeError("Expected outputs to be a single Tensor, got: %s" % type(outputs)) sample_id_sampler = categorical.Categorical(logits=outputs) sample_ids = sample_id_sampler.sample(seed=self._seed) return sample_ids
def testCDFWithDynamicEventShapeUnknownNdims(self, events, histograms, expected_cdf): """Test that dynamically-sized events with unknown shape work.""" event_ph = array_ops.placeholder_with_default(events, shape=None) histograms_ph = array_ops.placeholder_with_default(histograms, shape=None) dist = categorical.Categorical(probs=histograms_ph) cdf_op = dist.cdf(event_ph) actual_cdf = self.evaluate(cdf_op) self.assertAllClose(actual_cdf, expected_cdf)
def sample(self, time, outputs, state, name=None): with ops.name_scope(name, "ScheduledEmbeddingTrainingHelperSample", [time, outputs, state]): # Return -1s where we did not sample, and sample_ids elsewhere select_sample_noise = random_ops.random_uniform( [self.batch_size], seed=self._scheduling_seed) select_sample = (self._sampling_probability > select_sample_noise) sample_id_sampler = categorical.Categorical(logits=outputs) return array_ops.where(select_sample, sample_id_sampler.sample(seed=self._seed), array_ops.tile([-1], [self.batch_size]))
def testSampleAndLogProbBatchMultivariateShapes(self): with self.cached_session(): gm = mixture_same_family_lib.MixtureSameFamily( mixture_distribution=categorical_lib.Categorical( probs=[0.3, 0.7]), components_distribution=mvn_diag_lib.MultivariateNormalDiag( loc=[[[-1., 1], [1, -1]], [[0., 1], [1, 0]]], scale_identity_multiplier=[1., 0.5])) x = gm.sample([4, 5], seed=42) log_prob_x = gm.log_prob(x) self.assertEqual([4, 5, 2, 2], x.shape) self.assertEqual([4, 5, 2], log_prob_x.shape)
def sample(self, time, outputs, state, name=None): with ops.name_scope(name, "ScheduledEmbeddingTrainingHelperSample", [time, outputs, state]): # Return -1s where we did not sample, and sample_ids elsewhere select_sampler = bernoulli.Bernoulli( probs=self._sampling_probability, dtype=dtypes.bool) select_sample = select_sampler.sample(sample_shape=self.batch_size, seed=self._scheduling_seed) sample_id_sampler = categorical.Categorical(logits=outputs) return array_ops.where(select_sample, sample_id_sampler.sample(seed=self._seed), gen_array_ops.fill([self.batch_size], -1))
def testLogPMFShape(self): with self.cached_session(): # shape [1, 2, 2] histograms = [[[0.2, 0.8], [0.4, 0.6]]] dist = categorical.Categorical(math_ops.log(histograms)) log_prob = dist.log_prob([0, 1]) self.assertEqual(2, log_prob.get_shape().ndims) self.assertAllEqual([1, 2], log_prob.get_shape()) log_prob = dist.log_prob([[[1, 1], [1, 0]], [[1, 0], [0, 1]]]) self.assertEqual(3, log_prob.get_shape().ndims) self.assertAllEqual([2, 2, 2], log_prob.get_shape())
def testUnknownShape(self): with self.cached_session(): logits = array_ops.placeholder(dtype=dtypes.float32) dist = categorical.Categorical(logits) sample = dist.sample() # Will sample class 1. sample_value = sample.eval(feed_dict={logits: [-1000.0, 1000.0]}) self.assertEqual(1, sample_value) # Batch entry 0 will sample class 1, batch entry 1 will sample class 0. sample_value_batch = sample.eval( feed_dict={logits: [[-1000.0, 1000.0], [1000.0, -1000.0]]}) self.assertAllEqual([1, 0], sample_value_batch)