def _log_prob(self, counts): counts = self._assert_valid_counts(counts) ordered_prob = (special_math_ops.lbeta(self.alpha + counts) - special_math_ops.lbeta(self.alpha)) log_prob = ordered_prob + distribution_util.log_combinations( self.n, counts) return log_prob
def test_empty_rank2_or_greater_input_gives_empty_output_dynamic_alloc(self): with self.test_session(use_gpu=self._use_gpu): ph = array_ops.placeholder(dtypes.float32) self.assertAllEqual( [], special_math_ops.lbeta(ph).eval(feed_dict={ph: [[]]})) self.assertAllEqual( [[]], special_math_ops.lbeta(ph).eval(feed_dict={ph: [[[]]]}))
def log_pmf(self, counts, name=None): """`Log(P[counts])`, computed for every batch member. For each batch of counts `[c_1,...,c_k]`, `P[counts]` is the probability that after sampling `sum_j c_j` draws from this Dirichlet Multinomial distribution, the number of draws falling in class `j` is `c_j`. Note that different sequences of draws can result in the same counts, thus the probability includes a combinatorial coefficient. Args: counts: Non-negative `float`, `double`, or `int` tensor whose shape can be broadcast with `self.alpha`. For fixed leading dimensions, the last dimension represents counts for the corresponding Dirichlet Multinomial distribution in `self.alpha`. name: Name to give this Op, defaults to "log_pmf". Returns: Log probabilities for each record, shape `[N1,...,Nn]`. """ alpha = self._alpha with ops.op_scope([alpha, counts], name, 'log_pmf'): counts = self._check_counts(counts) ordered_pmf = (special_math_ops.lbeta(alpha + counts) - special_math_ops.lbeta(alpha)) log_pmf = ordered_pmf + _log_combinations(counts) # If alpha = counts = [[]], ordered_pmf carries the right shape, which is # []. However, since reduce_sum([[]]) = [0], log_combinations = [0], # which is not correct. Luckily, [] + [0] = [], so the sum is fine, but # shape must be inferred from ordered_pmf. # Note also that tf.constant([]).get_shape() = TensorShape([Dimension(0)]) log_pmf.set_shape(ordered_pmf.get_shape()) return log_pmf
def log_prob(self, counts, name="log_prob"): """`Log(P[counts])`, computed for every batch member. For each batch of counts `[n_1,...,n_k]`, `P[counts]` is the probability that after sampling `n` draws from this Dirichlet Multinomial distribution, the number of draws falling in class `j` is `n_j`. Note that different sequences of draws can result in the same counts, thus the probability includes a combinatorial coefficient. Args: counts: Non-negative tensor with dtype `dtype` and whose shape can be broadcast with `self.alpha`. For fixed leading dimensions, the last dimension represents counts for the corresponding Dirichlet Multinomial distribution in `self.alpha`. `counts` is only legal if it sums up to `n` and its components are equal to integer values. name: Name to give this Op, defaults to "log_prob". Returns: Log probabilities for each record, shape `[N1,...,Nn]`. """ n = self._n alpha = self._alpha with ops.name_scope(self.name): with ops.name_scope(name, values=[n, alpha, counts]): counts = self._check_counts(counts) ordered_prob = (special_math_ops.lbeta(alpha + counts) - special_math_ops.lbeta(alpha)) log_prob = ordered_prob + distribution_util.log_combinations( n, counts) return log_prob
def _log_prob(self, counts): counts = self._maybe_assert_valid_sample(counts) ordered_prob = ( special_math_ops.lbeta(self.concentration + counts) - special_math_ops.lbeta(self.concentration)) return ordered_prob + distribution_util.log_combinations( self.total_count, counts)
def test_two_dimensional_arg(self): # Should evaluate to 1/2. x_one_half = [[2, 1.], [2, 1.]] with self.test_session(use_gpu=self._use_gpu): self.assertAllClose( [0.5, 0.5], math_ops.exp(special_math_ops.lbeta(x_one_half)).eval()) self.assertEqual((2,), special_math_ops.lbeta(x_one_half).get_shape())
def test_complicated_shape(self): with self.session(use_gpu=True): x = ops.convert_to_tensor(np.random.rand(3, 2, 2)) self.assertAllEqual( (3, 2), self.evaluate(array_ops.shape(special_math_ops.lbeta(x)))) self.assertEqual( tensor_shape.TensorShape([3, 2]), special_math_ops.lbeta(x).get_shape())
def test_length_1_last_dimension_results_in_one(self): # If there is only one coefficient, the formula still works, and we get one # as the answer, always. x_a = [5.5] x_b = [0.1] with self.test_session(use_gpu=True): self.assertAllClose(1, math_ops.exp(special_math_ops.lbeta(x_a)).eval()) self.assertAllClose(1, math_ops.exp(special_math_ops.lbeta(x_b)).eval()) self.assertEqual((), special_math_ops.lbeta(x_a).get_shape())
def test_one_dimensional_arg(self): # Should evaluate to 1 and 1/2. x_one = [1, 1.] x_one_half = [2, 1.] with self.test_session(use_gpu=self._use_gpu): self.assertAllClose(1, math_ops.exp(special_math_ops.lbeta(x_one)).eval()) self.assertAllClose( 0.5, math_ops.exp(special_math_ops.lbeta(x_one_half)).eval()) self.assertEqual([], special_math_ops.lbeta(x_one).get_shape())
def test_two_dimensional_proper_shape(self): # Should evaluate to 1/2. x_one_half = [[2, 1.], [2, 1.]] with self.test_session(use_gpu=True): self.assertAllClose( [0.5, 0.5], math_ops.exp(special_math_ops.lbeta(x_one_half)).eval()) self.assertEqual( (2,), array_ops.shape(special_math_ops.lbeta(x_one_half)).eval()) self.assertEqual( tensor_shape.TensorShape([2]), special_math_ops.lbeta(x_one_half).get_shape())
def test_two_dimensional_arg_dynamic(self): # Should evaluate to 1/2. x_one_half = [[2, 1.], [2, 1.]] with self.test_session(use_gpu=True): ph = array_ops.placeholder(dtypes.float32) beta_ph = math_ops.exp(special_math_ops.lbeta(ph)) self.assertAllClose([0.5, 0.5], beta_ph.eval(feed_dict={ph: x_one_half}))
def test_empty_rank1_returns_negative_infinity(self): with self.test_session(use_gpu=True): x = constant_op.constant([], shape=[0]) lbeta_x = special_math_ops.lbeta(x) expected_result = constant_op.constant(-np.inf, shape=()) self.assertAllEqual(expected_result.eval(), lbeta_x.eval()) self.assertEqual(expected_result.get_shape(), lbeta_x.get_shape())
def _log_prob(self, x): x = ops.convert_to_tensor(x, name="x") x = self._assert_valid_sample(x) unnorm_prob = (self.alpha - 1.) * math_ops.log(x) log_prob = math_ops.reduce_sum( unnorm_prob, reduction_indices=[-1], keep_dims=False) - special_math_ops.lbeta(self.alpha) return log_prob
def _entropy(self): u = array_ops.expand_dims(self.df * self._ones(), -1) v = array_ops.expand_dims(self._ones(), -1) beta_arg = array_ops.concat_v2([u, v], len(u.get_shape()) - 1) / 2 half_df = 0.5 * self.df return ((0.5 + half_df) * (math_ops.digamma(0.5 + half_df) - math_ops.digamma(half_df)) + 0.5 * math_ops.log(self.df) + special_math_ops.lbeta(beta_arg) + math_ops.log(self.sigma))
def _entropy(self): entropy = special_math_ops.lbeta(self.alpha) entropy += math_ops.digamma(self.alpha_sum) * ( self.alpha_sum - math_ops.cast(self.event_shape()[0], self.dtype)) entropy += -math_ops.reduce_sum( (self.alpha - 1.) * math_ops.digamma(self.alpha), reduction_indices=[-1], keep_dims=False) return entropy
def test_one_dimensional_arg_dynamic_alloc(self): # Should evaluate to 1 and 1/2. x_one = [1, 1.] x_one_half = [2, 1.] with self.test_session(use_gpu=self._use_gpu): ph = array_ops.placeholder(dtypes.float32) beta_ph = math_ops.exp(special_math_ops.lbeta(ph)) self.assertAllClose(1, beta_ph.eval(feed_dict={ph: x_one})) self.assertAllClose(0.5, beta_ph.eval(feed_dict={ph: x_one_half}))
def test_empty_rank2_with_zero_last_dim_returns_negative_infinity(self): with self.test_session(use_gpu=True): event_size = 0 for batch_size in [0, 1, 2]: x = constant_op.constant([], shape=[batch_size, event_size]) lbeta_x = special_math_ops.lbeta(x) expected_result = constant_op.constant(-np.inf, shape=[batch_size]) self.assertAllEqual(expected_result.eval(), lbeta_x.eval()) self.assertEqual(expected_result.get_shape(), lbeta_x.get_shape())
def _entropy(self): v = array_ops.ones(self.batch_shape_tensor(), dtype=self.dtype)[..., None] u = v * self.df[..., None] beta_arg = array_ops.concat([u, v], -1) / 2. return (math_ops.log(math_ops.abs(self.scale)) + 0.5 * math_ops.log(self.df) + special_math_ops.lbeta(beta_arg) + 0.5 * (self.df + 1.) * (math_ops.digamma(0.5 * (self.df + 1.)) - math_ops.digamma(0.5 * self.df)))
def log_pmf(self, counts, name='log_pmf'): """`Log(P[counts])`, computed for every batch member. For each batch of counts `[n_1,...,n_k]`, `P[counts]` is the probability that after sampling `n` draws from this Dirichlet Multinomial distribution, the number of draws falling in class `j` is `n_j`. Note that different sequences of draws can result in the same counts, thus the probability includes a combinatorial coefficient. Args: counts: Non-negative `float` or `double` tensor whose shape can be broadcast with `self.alpha`. For fixed leading dimensions, the last dimension represents counts for the corresponding Dirichlet Multinomial distribution in `self.alpha`. `counts` is only legal if it sums up to `n` and its components are equal to integral values. The second condition is relaxed if `allow_arbitrary_counts` is set. name: Name to give this Op, defaults to "log_pmf". Returns: Log probabilities for each record, shape `[N1,...,Nn]`. """ n = self._n alpha = self._alpha with ops.name_scope(self.name): with ops.op_scope([n, alpha, counts], name): counts = self._check_counts(counts) # Use the same dtype as alpha for computations. counts = math_ops.cast(counts, self.dtype) ordered_pmf = (special_math_ops.lbeta(alpha + counts) - special_math_ops.lbeta(alpha)) log_pmf = ordered_pmf + _log_combinations(n, counts) # If alpha = counts = [[]], ordered_pmf carries the right shape, which # is []. However, since reduce_sum([[]]) = [0], log_combinations = [0], # which is not correct. Luckily, [] + [0] = [], so the sum is fine, but # shape must be inferred from ordered_pmf. We must also make this # broadcastable with n, so this is multiplied by n to ensure the shape # is correctly inferred. # Note also that tf.constant([]).get_shape() = # TensorShape([Dimension(0)]) broadcasted_tensor = ordered_pmf * n log_pmf.set_shape(broadcasted_tensor.get_shape()) return log_pmf
def _moment(self, n): """Compute the n'th (uncentered) moment.""" expanded_concentration1 = array_ops.ones_like( self.total_concentration, dtype=self.dtype) * self.concentration1 expanded_concentration0 = array_ops.ones_like( self.total_concentration, dtype=self.dtype) * self.concentration0 beta_arg0 = 1 + n / expanded_concentration1 beta_arg = array_ops.stack([beta_arg0, expanded_concentration0], -1) log_moment = math_ops.log(expanded_concentration0) + special_math_ops.lbeta( beta_arg) return math_ops.exp(log_moment)
def test_empty_rank2_with_zero_batch_dim_returns_empty(self): with self.test_session(use_gpu=self._use_gpu): batch_size = 0 for event_size in [0, 1, 2]: x = constant_op.constant([], shape=[batch_size, event_size]) lbeta_x = special_math_ops.lbeta(x) expected_result = constant_op.constant([], shape=[batch_size]) self.assertAllEqual(expected_result.eval(), lbeta_x.eval()) self.assertEqual(expected_result.get_shape(), lbeta_x.get_shape())
def test_four_dimensional_arg_with_partial_shape_dynamic(self): x_ = np.ones((3, 2, 3, 4)) # Gamma(1) = 0! = 1 # Gamma(1 + 1 + 1 + 1) = Gamma(4) = 3! = 6 # ==> Beta([1, 1, 1, 1]) # = Gamma(1) * Gamma(1) * Gamma(1) * Gamma(1) / Gamma(1 + 1 + 1 + 1) # = 1 / 6 expected_beta_x = 1 / 6 * np.ones((3, 2, 3)) with self.test_session(use_gpu=True): x_ph = array_ops.placeholder(dtypes.float32, [3, 2, 3, None]) beta_ph = math_ops.exp(special_math_ops.lbeta(x_ph)) self.assertAllClose(expected_beta_x, beta_ph.eval(feed_dict={x_ph: x_}))
def entropy(self, name="entropy"): """Entropy of the distribution in nats.""" with ops.name_scope(self.name): with ops.op_scope([self._alpha, self._alpha_0], name): alpha = self._alpha alpha_0 = self._alpha_0 entropy = special_math_ops.lbeta(alpha) entropy += (alpha_0 - math_ops.cast(self.event_shape()[0], self.dtype)) * math_ops.digamma(alpha_0) entropy += -math_ops.reduce_sum( (alpha - 1) * math_ops.digamma(alpha), reduction_indices=[-1], keep_dims=False ) return entropy
def entropy(self, name="entropy"): """The entropy of Student t distribution(s). Args: name: The name to give this op. Returns: entropy: tensor of dtype `dtype`, the entropy. """ with ops.name_scope(self.name): with ops.op_scope([self._df, self._sigma], name): u = array_ops.expand_dims(self._df + self._zeros(), -1) v = array_ops.expand_dims(self._ones(), -1) beta_arg = array_ops.concat(len(u.get_shape()) - 1, [u, v]) / 2 return ((self._df + 1) / 2 * (math_ops.digamma((self._df + 1) / 2) - math_ops.digamma(self._df / 2)) + math_ops.log(self._df) / 2 + special_math_ops.lbeta(beta_arg) + math_ops.log(self._sigma))
def log_prob(self, x, name="log_prob"): """`Log(P[counts])`, computed for every batch member. Args: x: Non-negative tensor with dtype `dtype` and whose shape can be broadcast with `self.alpha`. For fixed leading dimensions, the last dimension represents counts for the corresponding Dirichlet distribution in `self.alpha`. `x` is only legal if it sums up to one. name: Name to give this Op, defaults to "log_prob". Returns: Log probabilities for each record, shape `[N1,...,Nm]`. """ alpha = self._alpha with ops.name_scope(self.name): with ops.op_scope([alpha, x], name): x = self._check_x(x) unnorm_prob = (alpha - 1) * math_ops.log(x) log_prob = math_ops.reduce_sum( unnorm_prob, reduction_indices=[-1], keep_dims=False) - special_math_ops.lbeta(alpha) return log_prob
def test_empty_rank1_dynamic_alloc_input_raises_op_error(self): with self.test_session(use_gpu=self._use_gpu): ph = array_ops.placeholder(dtypes.float32) with self.assertRaisesOpError('rank'): special_math_ops.lbeta(ph).eval(feed_dict={ph: []})
def _log_normalization(self): return special_math_ops.lbeta(self.concentration)
def _kl_dirichlet_dirichlet(d1, d2, name=None): """Batchwise KL divergence KL(d1 || d2) with d1 and d2 Dirichlet. Args: d1: instance of a Dirichlet distribution object. d2: instance of a Dirichlet distribution object. name: (optional) Name to use for created operations. default is "kl_dirichlet_dirichlet". Returns: Batchwise KL(d1 || d2) """ with ops.name_scope(name, "kl_dirichlet_dirichlet", values=[ d1.concentration, d2.concentration]): # The KL between Dirichlet distributions can be derived as follows. We have # # Dir(x; a) = 1 / B(a) * prod_i[x[i]^(a[i] - 1)] # # where B(a) is the multivariate Beta function: # # B(a) = Gamma(a[1]) * ... * Gamma(a[n]) / Gamma(a[1] + ... + a[n]) # # The KL is # # KL(Dir(x; a), Dir(x; b)) = E_Dir(x; a){log(Dir(x; a) / Dir(x; b))} # # so we'll need to know the log density of the Dirichlet. This is # # log(Dir(x; a)) = sum_i[(a[i] - 1) log(x[i])] - log B(a) # # The only term that matters for the expectations is the log(x[i]). To # compute the expectation of this term over the Dirichlet density, we can # use the following facts about the Dirichlet in exponential family form: # 1. log(x[i]) is a sufficient statistic # 2. expected sufficient statistics (of any exp family distribution) are # equal to derivatives of the log normalizer with respect to # corresponding natural parameters: E{T[i](x)} = dA/d(eta[i]) # # To proceed, we can rewrite the Dirichlet density in exponential family # form as follows: # # Dir(x; a) = exp{eta(a) . T(x) - A(a)} # # where '.' is the dot product of vectors eta and T, and A is a scalar: # # eta[i](a) = a[i] - 1 # T[i](x) = log(x[i]) # A(a) = log B(a) # # Now, we can use fact (2) above to write # # E_Dir(x; a)[log(x[i])] # = dA(a) / da[i] # = d/da[i] log B(a) # = d/da[i] (sum_j lgamma(a[j])) - lgamma(sum_j a[j]) # = digamma(a[i])) - digamma(sum_j a[j]) # # Putting it all together, we have # # KL[Dir(x; a) || Dir(x; b)] # = E_Dir(x; a){log(Dir(x; a) / Dir(x; b)} # = E_Dir(x; a){sum_i[(a[i] - b[i]) log(x[i])} - (lbeta(a) - lbeta(b)) # = sum_i[(a[i] - b[i]) * E_Dir(x; a){log(x[i])}] - lbeta(a) + lbeta(b) # = sum_i[(a[i] - b[i]) * (digamma(a[i]) - digamma(sum_j a[j]))] # - lbeta(a) + lbeta(b)) digamma_sum_d1 = math_ops.digamma( math_ops.reduce_sum(d1.concentration, axis=-1, keepdims=True)) digamma_diff = math_ops.digamma(d1.concentration) - digamma_sum_d1 concentration_diff = d1.concentration - d2.concentration return (math_ops.reduce_sum(concentration_diff * digamma_diff, axis=-1) - special_math_ops.lbeta(d1.concentration) + special_math_ops.lbeta(d2.concentration))
def test_empty_rank1_input_raises_value_error(self): with self.test_session(use_gpu=self._use_gpu): with self.assertRaisesRegexp(ValueError, 'rank'): special_math_ops.lbeta([])
def _log_prob(self, counts): counts = self._maybe_assert_valid_sample(counts) ordered_prob = (special_math_ops.lbeta(self.concentration + counts) - special_math_ops.lbeta(self.concentration)) return ordered_prob + distribution_util.log_combinations( self.total_count, counts)
def test_empty_rank2_or_greater_input_gives_empty_output(self): with self.test_session(use_gpu=self._use_gpu): self.assertAllEqual([], special_math_ops.lbeta([[]]).eval()) self.assertEqual((0,), special_math_ops.lbeta([[]]).get_shape()) self.assertAllEqual([[]], special_math_ops.lbeta([[[]]]).eval()) self.assertEqual((1, 0), special_math_ops.lbeta([[[]]]).get_shape())