def _log_prob(self, counts):
   counts = self._assert_valid_counts(counts)
   ordered_prob = (special_math_ops.lbeta(self.alpha + counts) -
                   special_math_ops.lbeta(self.alpha))
   log_prob = ordered_prob + distribution_util.log_combinations(
       self.n, counts)
   return log_prob
 def test_empty_rank2_or_greater_input_gives_empty_output_dynamic_alloc(self):
   with self.test_session(use_gpu=self._use_gpu):
     ph = array_ops.placeholder(dtypes.float32)
     self.assertAllEqual(
         [], special_math_ops.lbeta(ph).eval(feed_dict={ph: [[]]}))
     self.assertAllEqual(
         [[]], special_math_ops.lbeta(ph).eval(feed_dict={ph: [[[]]]}))
  def log_pmf(self, counts, name=None):
    """`Log(P[counts])`, computed for every batch member.

    For each batch of counts `[c_1,...,c_k]`, `P[counts]` is the probability
    that after sampling `sum_j c_j` draws from this Dirichlet Multinomial
    distribution, the number of draws falling in class `j` is `c_j`.  Note that
    different sequences of draws can result in the same counts, thus the
    probability includes a combinatorial coefficient.

    Args:
      counts:  Non-negative `float`, `double`, or `int` tensor whose shape can
        be broadcast with `self.alpha`.  For fixed leading dimensions, the last
        dimension represents counts for the corresponding Dirichlet Multinomial
        distribution in `self.alpha`.
      name:  Name to give this Op, defaults to "log_pmf".

    Returns:
      Log probabilities for each record, shape `[N1,...,Nn]`.
    """
    alpha = self._alpha
    with ops.op_scope([alpha, counts], name, 'log_pmf'):
      counts = self._check_counts(counts)
      ordered_pmf = (special_math_ops.lbeta(alpha + counts) -
                     special_math_ops.lbeta(alpha))
      log_pmf = ordered_pmf + _log_combinations(counts)
      # If alpha = counts = [[]], ordered_pmf carries the right shape, which is
      # [].  However, since reduce_sum([[]]) = [0], log_combinations = [0],
      # which is not correct.  Luckily, [] + [0] = [], so the sum is fine, but
      # shape must be inferred from ordered_pmf.
      # Note also that tf.constant([]).get_shape() = TensorShape([Dimension(0)])
      log_pmf.set_shape(ordered_pmf.get_shape())
      return log_pmf
  def log_prob(self, counts, name="log_prob"):
    """`Log(P[counts])`, computed for every batch member.

    For each batch of counts `[n_1,...,n_k]`, `P[counts]` is the probability
    that after sampling `n` draws from this Dirichlet Multinomial
    distribution, the number of draws falling in class `j` is `n_j`.  Note that
    different sequences of draws can result in the same counts, thus the
    probability includes a combinatorial coefficient.

    Args:
      counts:  Non-negative tensor with dtype `dtype` and whose shape can be
        broadcast with `self.alpha`.  For fixed leading dimensions, the last
        dimension represents counts for the corresponding Dirichlet Multinomial
        distribution in `self.alpha`. `counts` is only legal if it sums up to
        `n` and its components are equal to integer values.
      name:  Name to give this Op, defaults to "log_prob".

    Returns:
      Log probabilities for each record, shape `[N1,...,Nn]`.
    """
    n = self._n
    alpha = self._alpha
    with ops.name_scope(self.name):
      with ops.name_scope(name, values=[n, alpha, counts]):
        counts = self._check_counts(counts)

        ordered_prob = (special_math_ops.lbeta(alpha + counts) -
                        special_math_ops.lbeta(alpha))
        log_prob = ordered_prob + distribution_util.log_combinations(
            n, counts)
        return log_prob
 def _log_prob(self, counts):
   counts = self._maybe_assert_valid_sample(counts)
   ordered_prob = (
       special_math_ops.lbeta(self.concentration + counts)
       - special_math_ops.lbeta(self.concentration))
   return ordered_prob + distribution_util.log_combinations(
       self.total_count, counts)
 def test_two_dimensional_arg(self):
   # Should evaluate to 1/2.
   x_one_half = [[2, 1.], [2, 1.]]
   with self.test_session(use_gpu=self._use_gpu):
     self.assertAllClose(
         [0.5, 0.5], math_ops.exp(special_math_ops.lbeta(x_one_half)).eval())
     self.assertEqual((2,), special_math_ops.lbeta(x_one_half).get_shape())
 def test_complicated_shape(self):
   with self.session(use_gpu=True):
     x = ops.convert_to_tensor(np.random.rand(3, 2, 2))
     self.assertAllEqual(
         (3, 2), self.evaluate(array_ops.shape(special_math_ops.lbeta(x))))
     self.assertEqual(
         tensor_shape.TensorShape([3, 2]),
         special_math_ops.lbeta(x).get_shape())
 def test_length_1_last_dimension_results_in_one(self):
   # If there is only one coefficient, the formula still works, and we get one
   # as the answer, always.
   x_a = [5.5]
   x_b = [0.1]
   with self.test_session(use_gpu=True):
     self.assertAllClose(1, math_ops.exp(special_math_ops.lbeta(x_a)).eval())
     self.assertAllClose(1, math_ops.exp(special_math_ops.lbeta(x_b)).eval())
     self.assertEqual((), special_math_ops.lbeta(x_a).get_shape())
 def test_one_dimensional_arg(self):
   # Should evaluate to 1 and 1/2.
   x_one = [1, 1.]
   x_one_half = [2, 1.]
   with self.test_session(use_gpu=self._use_gpu):
     self.assertAllClose(1, math_ops.exp(special_math_ops.lbeta(x_one)).eval())
     self.assertAllClose(
         0.5, math_ops.exp(special_math_ops.lbeta(x_one_half)).eval())
     self.assertEqual([], special_math_ops.lbeta(x_one).get_shape())
 def test_two_dimensional_proper_shape(self):
   # Should evaluate to 1/2.
   x_one_half = [[2, 1.], [2, 1.]]
   with self.test_session(use_gpu=True):
     self.assertAllClose(
         [0.5, 0.5], math_ops.exp(special_math_ops.lbeta(x_one_half)).eval())
     self.assertEqual(
         (2,), array_ops.shape(special_math_ops.lbeta(x_one_half)).eval())
     self.assertEqual(
         tensor_shape.TensorShape([2]),
         special_math_ops.lbeta(x_one_half).get_shape())
 def test_two_dimensional_arg_dynamic(self):
   # Should evaluate to 1/2.
   x_one_half = [[2, 1.], [2, 1.]]
   with self.test_session(use_gpu=True):
     ph = array_ops.placeholder(dtypes.float32)
     beta_ph = math_ops.exp(special_math_ops.lbeta(ph))
     self.assertAllClose([0.5, 0.5], beta_ph.eval(feed_dict={ph: x_one_half}))
  def test_empty_rank1_returns_negative_infinity(self):
    with self.test_session(use_gpu=True):
      x = constant_op.constant([], shape=[0])
      lbeta_x = special_math_ops.lbeta(x)
      expected_result = constant_op.constant(-np.inf, shape=())

      self.assertAllEqual(expected_result.eval(), lbeta_x.eval())
      self.assertEqual(expected_result.get_shape(), lbeta_x.get_shape())
Beispiel #13
0
 def _log_prob(self, x):
   x = ops.convert_to_tensor(x, name="x")
   x = self._assert_valid_sample(x)
   unnorm_prob = (self.alpha - 1.) * math_ops.log(x)
   log_prob = math_ops.reduce_sum(
       unnorm_prob, reduction_indices=[-1],
       keep_dims=False) - special_math_ops.lbeta(self.alpha)
   return log_prob
Beispiel #14
0
 def _entropy(self):
   u = array_ops.expand_dims(self.df * self._ones(), -1)
   v = array_ops.expand_dims(self._ones(), -1)
   beta_arg = array_ops.concat_v2([u, v], len(u.get_shape()) - 1) / 2
   half_df = 0.5 * self.df
   return ((0.5 + half_df) *
           (math_ops.digamma(0.5 + half_df) - math_ops.digamma(half_df)) + 0.5
           * math_ops.log(self.df) + special_math_ops.lbeta(beta_arg) +
           math_ops.log(self.sigma))
Beispiel #15
0
 def _entropy(self):
   entropy = special_math_ops.lbeta(self.alpha)
   entropy += math_ops.digamma(self.alpha_sum) * (
       self.alpha_sum - math_ops.cast(self.event_shape()[0], self.dtype))
   entropy += -math_ops.reduce_sum(
       (self.alpha - 1.) * math_ops.digamma(self.alpha),
       reduction_indices=[-1],
       keep_dims=False)
   return entropy
 def test_one_dimensional_arg_dynamic_alloc(self):
   # Should evaluate to 1 and 1/2.
   x_one = [1, 1.]
   x_one_half = [2, 1.]
   with self.test_session(use_gpu=self._use_gpu):
     ph = array_ops.placeholder(dtypes.float32)
     beta_ph = math_ops.exp(special_math_ops.lbeta(ph))
     self.assertAllClose(1, beta_ph.eval(feed_dict={ph: x_one}))
     self.assertAllClose(0.5, beta_ph.eval(feed_dict={ph: x_one_half}))
  def test_empty_rank2_with_zero_last_dim_returns_negative_infinity(self):
    with self.test_session(use_gpu=True):
      event_size = 0
      for batch_size in [0, 1, 2]:
        x = constant_op.constant([], shape=[batch_size, event_size])
        lbeta_x = special_math_ops.lbeta(x)
        expected_result = constant_op.constant(-np.inf, shape=[batch_size])

        self.assertAllEqual(expected_result.eval(), lbeta_x.eval())
        self.assertEqual(expected_result.get_shape(), lbeta_x.get_shape())
Beispiel #18
0
 def _entropy(self):
   v = array_ops.ones(self.batch_shape_tensor(), dtype=self.dtype)[..., None]
   u = v * self.df[..., None]
   beta_arg = array_ops.concat([u, v], -1) / 2.
   return (math_ops.log(math_ops.abs(self.scale)) +
           0.5 * math_ops.log(self.df) +
           special_math_ops.lbeta(beta_arg) +
           0.5 * (self.df + 1.) *
           (math_ops.digamma(0.5 * (self.df + 1.)) -
            math_ops.digamma(0.5 * self.df)))
  def log_pmf(self, counts, name='log_pmf'):
    """`Log(P[counts])`, computed for every batch member.

    For each batch of counts `[n_1,...,n_k]`, `P[counts]` is the probability
    that after sampling `n` draws from this Dirichlet Multinomial
    distribution, the number of draws falling in class `j` is `n_j`.  Note that
    different sequences of draws can result in the same counts, thus the
    probability includes a combinatorial coefficient.

    Args:
      counts:  Non-negative `float` or `double` tensor whose shape can
        be broadcast with `self.alpha`.  For fixed leading dimensions, the last
        dimension represents counts for the corresponding Dirichlet Multinomial
        distribution in `self.alpha`. `counts` is only legal if it sums up to
        `n` and its components are equal to integral values. The second
        condition is relaxed if `allow_arbitrary_counts` is set.
      name:  Name to give this Op, defaults to "log_pmf".

    Returns:
      Log probabilities for each record, shape `[N1,...,Nn]`.
    """
    n = self._n
    alpha = self._alpha
    with ops.name_scope(self.name):
      with ops.op_scope([n, alpha, counts], name):
        counts = self._check_counts(counts)
        # Use the same dtype as alpha for computations.
        counts = math_ops.cast(counts, self.dtype)

        ordered_pmf = (special_math_ops.lbeta(alpha + counts) -
                       special_math_ops.lbeta(alpha))
        log_pmf = ordered_pmf + _log_combinations(n, counts)
        # If alpha = counts = [[]], ordered_pmf carries the right shape, which
        # is [].  However, since reduce_sum([[]]) = [0], log_combinations = [0],
        # which is not correct.  Luckily, [] + [0] = [], so the sum is fine, but
        # shape must be inferred from ordered_pmf. We must also make this
        # broadcastable with n, so this is multiplied by n to ensure the shape
        # is correctly inferred.
        # Note also that tf.constant([]).get_shape() =
        # TensorShape([Dimension(0)])
        broadcasted_tensor = ordered_pmf * n
        log_pmf.set_shape(broadcasted_tensor.get_shape())
        return log_pmf
Beispiel #20
0
 def _moment(self, n):
   """Compute the n'th (uncentered) moment."""
   expanded_concentration1 = array_ops.ones_like(
       self.total_concentration, dtype=self.dtype) * self.concentration1
   expanded_concentration0 = array_ops.ones_like(
       self.total_concentration, dtype=self.dtype) * self.concentration0
   beta_arg0 = 1 + n / expanded_concentration1
   beta_arg = array_ops.stack([beta_arg0, expanded_concentration0], -1)
   log_moment = math_ops.log(expanded_concentration0) + special_math_ops.lbeta(
       beta_arg)
   return math_ops.exp(log_moment)
  def test_empty_rank2_with_zero_batch_dim_returns_empty(self):
    with self.test_session(use_gpu=self._use_gpu):
      batch_size = 0
      for event_size in [0, 1, 2]:
        x = constant_op.constant([], shape=[batch_size, event_size])
        lbeta_x = special_math_ops.lbeta(x)

        expected_result = constant_op.constant([], shape=[batch_size])

        self.assertAllEqual(expected_result.eval(), lbeta_x.eval())
        self.assertEqual(expected_result.get_shape(), lbeta_x.get_shape())
 def test_four_dimensional_arg_with_partial_shape_dynamic(self):
   x_ = np.ones((3, 2, 3, 4))
   # Gamma(1) = 0! = 1
   # Gamma(1 + 1 + 1 + 1) = Gamma(4) = 3! = 6
   # ==> Beta([1, 1, 1, 1])
   #     = Gamma(1) * Gamma(1) * Gamma(1) * Gamma(1) / Gamma(1 + 1 + 1 + 1)
   #     = 1 / 6
   expected_beta_x = 1 / 6 * np.ones((3, 2, 3))
   with self.test_session(use_gpu=True):
     x_ph = array_ops.placeholder(dtypes.float32, [3, 2, 3, None])
     beta_ph = math_ops.exp(special_math_ops.lbeta(x_ph))
     self.assertAllClose(expected_beta_x, beta_ph.eval(feed_dict={x_ph: x_}))
    def entropy(self, name="entropy"):
        """Entropy of the distribution in nats."""
        with ops.name_scope(self.name):
            with ops.op_scope([self._alpha, self._alpha_0], name):
                alpha = self._alpha
                alpha_0 = self._alpha_0

                entropy = special_math_ops.lbeta(alpha)
                entropy += (alpha_0 - math_ops.cast(self.event_shape()[0], self.dtype)) * math_ops.digamma(alpha_0)
                entropy += -math_ops.reduce_sum(
                    (alpha - 1) * math_ops.digamma(alpha), reduction_indices=[-1], keep_dims=False
                )
                return entropy
Beispiel #24
0
  def entropy(self, name="entropy"):
    """The entropy of Student t distribution(s).

    Args:
      name: The name to give this op.

    Returns:
      entropy: tensor of dtype `dtype`, the entropy.
    """
    with ops.name_scope(self.name):
      with ops.op_scope([self._df, self._sigma], name):
        u = array_ops.expand_dims(self._df + self._zeros(), -1)
        v = array_ops.expand_dims(self._ones(), -1)
        beta_arg = array_ops.concat(len(u.get_shape()) - 1, [u, v]) / 2
        return ((self._df + 1) / 2 * (math_ops.digamma((self._df + 1) / 2) -
                                      math_ops.digamma(self._df / 2)) +
                math_ops.log(self._df) / 2 +
                special_math_ops.lbeta(beta_arg) +
                math_ops.log(self._sigma))
Beispiel #25
0
  def log_prob(self, x, name="log_prob"):
    """`Log(P[counts])`, computed for every batch member.

    Args:
      x:  Non-negative tensor with dtype `dtype` and whose shape can
        be broadcast with `self.alpha`.  For fixed leading dimensions, the last
        dimension represents counts for the corresponding Dirichlet distribution
        in `self.alpha`. `x` is only legal if it sums up to one.
      name:  Name to give this Op, defaults to "log_prob".

    Returns:
      Log probabilities for each record, shape `[N1,...,Nm]`.
    """
    alpha = self._alpha
    with ops.name_scope(self.name):
      with ops.op_scope([alpha, x], name):
        x = self._check_x(x)

        unnorm_prob = (alpha - 1) * math_ops.log(x)
        log_prob = math_ops.reduce_sum(
            unnorm_prob, reduction_indices=[-1],
            keep_dims=False) - special_math_ops.lbeta(alpha)

        return log_prob
Beispiel #26
0
 def test_empty_rank1_dynamic_alloc_input_raises_op_error(self):
     with self.test_session(use_gpu=self._use_gpu):
         ph = array_ops.placeholder(dtypes.float32)
         with self.assertRaisesOpError('rank'):
             special_math_ops.lbeta(ph).eval(feed_dict={ph: []})
Beispiel #27
0
 def _log_normalization(self):
   return special_math_ops.lbeta(self.concentration)
Beispiel #28
0
def _kl_dirichlet_dirichlet(d1, d2, name=None):
  """Batchwise KL divergence KL(d1 || d2) with d1 and d2 Dirichlet.

  Args:
    d1: instance of a Dirichlet distribution object.
    d2: instance of a Dirichlet distribution object.
    name: (optional) Name to use for created operations.
      default is "kl_dirichlet_dirichlet".

  Returns:
    Batchwise KL(d1 || d2)
  """
  with ops.name_scope(name, "kl_dirichlet_dirichlet", values=[
      d1.concentration, d2.concentration]):
    # The KL between Dirichlet distributions can be derived as follows. We have
    #
    #   Dir(x; a) = 1 / B(a) * prod_i[x[i]^(a[i] - 1)]
    #
    # where B(a) is the multivariate Beta function:
    #
    #   B(a) = Gamma(a[1]) * ... * Gamma(a[n]) / Gamma(a[1] + ... + a[n])
    #
    # The KL is
    #
    #   KL(Dir(x; a), Dir(x; b)) = E_Dir(x; a){log(Dir(x; a) / Dir(x; b))}
    #
    # so we'll need to know the log density of the Dirichlet. This is
    #
    #   log(Dir(x; a)) = sum_i[(a[i] - 1) log(x[i])] - log B(a)
    #
    # The only term that matters for the expectations is the log(x[i]). To
    # compute the expectation of this term over the Dirichlet density, we can
    # use the following facts about the Dirichlet in exponential family form:
    #   1. log(x[i]) is a sufficient statistic
    #   2. expected sufficient statistics (of any exp family distribution) are
    #      equal to derivatives of the log normalizer with respect to
    #      corresponding natural parameters: E{T[i](x)} = dA/d(eta[i])
    #
    # To proceed, we can rewrite the Dirichlet density in exponential family
    # form as follows:
    #
    #   Dir(x; a) = exp{eta(a) . T(x) - A(a)}
    #
    # where '.' is the dot product of vectors eta and T, and A is a scalar:
    #
    #   eta[i](a) = a[i] - 1
    #     T[i](x) = log(x[i])
    #        A(a) = log B(a)
    #
    # Now, we can use fact (2) above to write
    #
    #   E_Dir(x; a)[log(x[i])]
    #       = dA(a) / da[i]
    #       = d/da[i] log B(a)
    #       = d/da[i] (sum_j lgamma(a[j])) - lgamma(sum_j a[j])
    #       = digamma(a[i])) - digamma(sum_j a[j])
    #
    # Putting it all together, we have
    #
    # KL[Dir(x; a) || Dir(x; b)]
    #     = E_Dir(x; a){log(Dir(x; a) / Dir(x; b)}
    #     = E_Dir(x; a){sum_i[(a[i] - b[i]) log(x[i])} - (lbeta(a) - lbeta(b))
    #     = sum_i[(a[i] - b[i]) * E_Dir(x; a){log(x[i])}] - lbeta(a) + lbeta(b)
    #     = sum_i[(a[i] - b[i]) * (digamma(a[i]) - digamma(sum_j a[j]))]
    #          - lbeta(a) + lbeta(b))

    digamma_sum_d1 = math_ops.digamma(
        math_ops.reduce_sum(d1.concentration, axis=-1, keepdims=True))
    digamma_diff = math_ops.digamma(d1.concentration) - digamma_sum_d1
    concentration_diff = d1.concentration - d2.concentration

    return (math_ops.reduce_sum(concentration_diff * digamma_diff, axis=-1) -
            special_math_ops.lbeta(d1.concentration) +
            special_math_ops.lbeta(d2.concentration))
Beispiel #29
0
 def test_empty_rank1_input_raises_value_error(self):
     with self.test_session(use_gpu=self._use_gpu):
         with self.assertRaisesRegexp(ValueError, 'rank'):
             special_math_ops.lbeta([])
 def _log_prob(self, counts):
     counts = self._maybe_assert_valid_sample(counts)
     ordered_prob = (special_math_ops.lbeta(self.concentration + counts) -
                     special_math_ops.lbeta(self.concentration))
     return ordered_prob + distribution_util.log_combinations(
         self.total_count, counts)
Beispiel #31
0
 def _log_normalization(self):
   return special_math_ops.lbeta(self.concentration)
 def test_empty_rank1_input_raises_value_error(self):
   with self.test_session(use_gpu=self._use_gpu):
     with self.assertRaisesRegexp(ValueError, 'rank'):
       special_math_ops.lbeta([])
 def test_empty_rank2_or_greater_input_gives_empty_output(self):
   with self.test_session(use_gpu=self._use_gpu):
     self.assertAllEqual([], special_math_ops.lbeta([[]]).eval())
     self.assertEqual((0,), special_math_ops.lbeta([[]]).get_shape())
     self.assertAllEqual([[]], special_math_ops.lbeta([[[]]]).eval())
     self.assertEqual((1, 0), special_math_ops.lbeta([[[]]]).get_shape())
Beispiel #34
0
def _kl_dirichlet_dirichlet(d1, d2, name=None):
  """Batchwise KL divergence KL(d1 || d2) with d1 and d2 Dirichlet.

  Args:
    d1: instance of a Dirichlet distribution object.
    d2: instance of a Dirichlet distribution object.
    name: (optional) Name to use for created operations.
      default is "kl_dirichlet_dirichlet".

  Returns:
    Batchwise KL(d1 || d2)
  """
  with ops.name_scope(name, "kl_dirichlet_dirichlet", values=[
      d1.concentration, d2.concentration]):
    # The KL between Dirichlet distributions can be derived as follows. We have
    #
    #   Dir(x; a) = 1 / B(a) * prod_i[x[i]^(a[i] - 1)]
    #
    # where B(a) is the multivariate Beta function:
    #
    #   B(a) = Gamma(a[1]) * ... * Gamma(a[n]) / Gamma(a[1] + ... + a[n])
    #
    # The KL is
    #
    #   KL(Dir(x; a), Dir(x; b)) = E_Dir(x; a){log(Dir(x; a) / Dir(x; b))}
    #
    # so we'll need to know the log density of the Dirichlet. This is
    #
    #   log(Dir(x; a)) = sum_i[(a[i] - 1) log(x[i])] - log B(a)
    #
    # The only term that matters for the expectations is the log(x[i]). To
    # compute the expectation of this term over the Dirichlet density, we can
    # use the following facts about the Dirichlet in exponential family form:
    #   1. log(x[i]) is a sufficient statistic
    #   2. expected sufficient statistics (of any exp family distribution) are
    #      equal to derivatives of the log normalizer with respect to
    #      corresponding natural parameters: E{T[i](x)} = dA/d(eta[i])
    #
    # To proceed, we can rewrite the Dirichlet density in exponential family
    # form as follows:
    #
    #   Dir(x; a) = exp{eta(a) . T(x) - A(a)}
    #
    # where '.' is the dot product of vectors eta and T, and A is a scalar:
    #
    #   eta[i](a) = a[i] - 1
    #     T[i](x) = log(x[i])
    #        A(a) = log B(a)
    #
    # Now, we can use fact (2) above to write
    #
    #   E_Dir(x; a)[log(x[i])]
    #       = dA(a) / da[i]
    #       = d/da[i] log B(a)
    #       = d/da[i] (sum_j lgamma(a[j])) - lgamma(sum_j a[j])
    #       = digamma(a[i])) - digamma(sum_j a[j])
    #
    # Putting it all together, we have
    #
    # KL[Dir(x; a) || Dir(x; b)]
    #     = E_Dir(x; a){log(Dir(x; a) / Dir(x; b)}
    #     = E_Dir(x; a){sum_i[(a[i] - b[i]) log(x[i])} - (lbeta(a) - lbeta(b))
    #     = sum_i[(a[i] - b[i]) * E_Dir(x; a){log(x[i])}] - lbeta(a) + lbeta(b)
    #     = sum_i[(a[i] - b[i]) * (digamma(a[i]) - digamma(sum_j a[j]))]
    #          - lbeta(a) + lbeta(b))

    digamma_sum_d1 = math_ops.digamma(
        math_ops.reduce_sum(d1.concentration, axis=-1, keepdims=True))
    digamma_diff = math_ops.digamma(d1.concentration) - digamma_sum_d1
    concentration_diff = d1.concentration - d2.concentration

    return (math_ops.reduce_sum(concentration_diff * digamma_diff, axis=-1) -
            special_math_ops.lbeta(d1.concentration) +
            special_math_ops.lbeta(d2.concentration))
 def test_empty_rank1_dynamic_alloc_input_raises_op_error(self):
   with self.test_session(use_gpu=self._use_gpu):
     ph = array_ops.placeholder(dtypes.float32)
     with self.assertRaisesOpError('rank'):
       special_math_ops.lbeta(ph).eval(feed_dict={ph: []})