Exemple #1
0
  def log_prob(self, counts, name="log_prob"):
    """`Log(P[counts])`, computed for every batch member.

    For each batch member of counts `k`, `P[counts]` is the probability that
    after sampling `n` draws from this Binomial distribution, the number of
    successes is `k`.  Note that different sequences of draws can result in the
    same counts, thus the probability includes a combinatorial coefficient.

    Args:
      counts:  Non-negative tensor with dtype `dtype` and whose shape can be
        broadcast with `self.p` and `self.n`. `counts` is only legal if it is
        less than or equal to `n` and its components are equal to integer
        values.
      name:  Name to give this Op, defaults to "log_prob".

    Returns:
      Log probabilities for each record, shape `[N1,...,Nm]`.
    """
    n = self._n
    p = self._p
    with ops.name_scope(self.name):
      with ops.name_scope(name, values=[self._n, self._p, counts]):
        counts = self._check_counts(counts)

        prob_prob = counts * math_ops.log(p) + (
            n - counts) * math_ops.log(1 - p)

        combinations = math_ops.lgamma(n + 1) - math_ops.lgamma(
            counts + 1) - math_ops.lgamma(n - counts + 1)
        log_prob = prob_prob + combinations
        return log_prob
 def _log_normalization(self, positive_counts):
   if self.validate_args:
     positive_counts = distribution_util.embed_check_nonnegative_discrete(
         positive_counts, check_integer=True)
   return (-math_ops.lgamma(self.total_count + positive_counts)
           + math_ops.lgamma(positive_counts + 1.)
           + math_ops.lgamma(self.total_count))
def log_combinations(n, counts, name="log_combinations"):
  """Multinomial coefficient.

  Given `n` and `counts`, where `counts` has last dimension `k`, we compute
  the multinomial coefficient as:

  ```n! / sum_i n_i!```

  where `i` runs over all `k` classes.

  Args:
    n: Numeric `Tensor` broadcastable with `counts`. This represents `n`
      outcomes.
    counts: Numeric `Tensor` broadcastable with `n`. This represents counts
      in `k` classes, where `k` is the last dimension of the tensor.
    name: A name for this operation (optional).

  Returns:
    `Tensor` representing the multinomial coefficient between `n` and `counts`.
  """
  # First a bit about the number of ways counts could have come in:
  # E.g. if counts = [1, 2], then this is 3 choose 2.
  # In general, this is (sum counts)! / sum(counts!)
  # The sum should be along the last dimension of counts.  This is the
  # "distribution" dimension. Here n a priori represents the sum of counts.
  with ops.name_scope(name, values=[n, counts]):
    n = ops.convert_to_tensor(n, name="n")
    counts = ops.convert_to_tensor(counts, name="counts")
    total_permutations = math_ops.lgamma(n + 1)
    counts_factorial = math_ops.lgamma(counts + 1)
    redundant_permutations = math_ops.reduce_sum(counts_factorial,
                                                 reduction_indices=[-1])
    return total_permutations - redundant_permutations
 def nonempty_lbeta():
     log_prod_gamma_x = math_ops.reduce_sum(
         math_ops.lgamma(x), reduction_indices=[-1])
     sum_x = math_ops.reduce_sum(x, reduction_indices=[-1])
     log_gamma_sum_x = math_ops.lgamma(sum_x)
     result = log_prod_gamma_x - log_gamma_sum_x
     return result
Exemple #5
0
 def _prob(self, x):
   y = (x - self.mu) / self.sigma
   half_df = 0.5 * self.df
   return (math_ops.exp(math_ops.lgamma(0.5 + half_df) -
                        math_ops.lgamma(half_df)) /
           (math_ops.sqrt(self.df) * math.sqrt(math.pi) * self.sigma) *
           math_ops.pow(1. + math_ops.square(y) / self.df, -(0.5 + half_df)))
Exemple #6
0
def _kl_gamma_gamma(g0, g1, name=None):
  """Calculate the batched KL divergence KL(g0 || g1) with g0 and g1 Gamma.

  Args:
    g0: instance of a Gamma distribution object.
    g1: instance of a Gamma distribution object.
    name: (optional) Name to use for created operations.
      Default is "kl_gamma_gamma".

  Returns:
    kl_gamma_gamma: `Tensor`. The batchwise KL(g0 || g1).
  """
  with ops.name_scope(name, "kl_gamma_gamma", values=[
      g0.concentration, g0.rate, g1.concentration, g1.rate]):
    # Result from:
    #   http://www.fil.ion.ucl.ac.uk/~wpenny/publications/densities.ps
    # For derivation see:
    #   http://stats.stackexchange.com/questions/11646/kullback-leibler-divergence-between-two-gamma-distributions   pylint: disable=line-too-long
    return (((g0.concentration - g1.concentration)
             * math_ops.digamma(g0.concentration))
            + math_ops.lgamma(g1.concentration)
            - math_ops.lgamma(g0.concentration)
            + g1.concentration * math_ops.log(g0.rate)
            - g1.concentration * math_ops.log(g1.rate)
            + g0.concentration * (g1.rate / g0.rate - 1.))
Exemple #7
0
 def _log_prob(self, x):
   y = (x - self.mu) / self.sigma
   half_df = 0.5 * self.df
   return (math_ops.lgamma(0.5 + half_df) - math_ops.lgamma(half_df) - 0.5 *
           math_ops.log(self.df) - 0.5 * math.log(math.pi) -
           math_ops.log(self.sigma) -
           (0.5 + half_df) * math_ops.log(1. + math_ops.square(y) / self.df))
Exemple #8
0
  def log_prob(self, x, name="log_prob"):
    """`Log(P[counts])`, computed for every batch member.

    Args:
      x:  Non-negative floating point tensor whose shape can
        be broadcast with `self.a` and `self.b`.  For fixed leading
        dimensions, the last dimension represents counts for the corresponding
        Beta distribution in `self.a` and `self.b`. `x` is only legal if
        0 < x < 1.
      name:  Name to give this Op, defaults to "log_prob".

    Returns:
      Log probabilities for each record, shape `[N1,...,Nm]`.
    """
    a = self._a
    b = self._b
    with ops.name_scope(self.name):
      with ops.name_scope(name, values=[a, x]):
        x = self._check_x(x)

        unnorm_pdf = (a - 1) * math_ops.log(x) + (
            b - 1) * math_ops.log(1 - x)
        normalization_factor = -(math_ops.lgamma(a) + math_ops.lgamma(b)
                                 - math_ops.lgamma(a + b))
        log_prob = unnorm_pdf + normalization_factor

        return log_prob
Exemple #9
0
 def _entropy(self):
   return (math_ops.lgamma(self.a) -
           (self.a - 1.) * math_ops.digamma(self.a) +
           math_ops.lgamma(self.b) -
           (self.b - 1.) * math_ops.digamma(self.b) -
           math_ops.lgamma(self.a_b_sum) +
           (self.a_b_sum - 2.) * math_ops.digamma(self.a_b_sum))
Exemple #10
0
 def _log_prob(self, x):
   x = self._assert_valid_sample(x)
   log_unnormalized_prob = ((self.a - 1.) * math_ops.log(x) +
                            (self.b - 1.) * math_ops.log(1. - x))
   log_normalization = (math_ops.lgamma(self.a) +
                        math_ops.lgamma(self.b) -
                        math_ops.lgamma(self.a_b_sum))
   return log_unnormalized_prob - log_normalization
Exemple #11
0
 def _log_prob(self, counts):
   counts = self._check_counts(counts)
   prob_prob = (counts * math_ops.log(self.p) +
                (self.n - counts) * math_ops.log(1. - self.p))
   combinations = (math_ops.lgamma(self.n + 1) -
                   math_ops.lgamma(counts + 1) -
                   math_ops.lgamma(self.n - counts + 1))
   log_prob = prob_prob + combinations
   return log_prob
Exemple #12
0
 def nonempty_lbeta():
   last_index = array_ops.size(array_ops.shape(x)) - 1
   log_prod_gamma_x = math_ops.reduce_sum(
       math_ops.lgamma(x),
       reduction_indices=last_index)
   sum_x = math_ops.reduce_sum(x, reduction_indices=last_index)
   log_gamma_sum_x = math_ops.lgamma(sum_x)
   result = log_prod_gamma_x - log_gamma_sum_x
   result.set_shape(x.get_shape()[:-1])
   return result
Exemple #13
0
  def entropy(self, name="entropy"):
    """Entropy of the distribution in nats."""
    with ops.name_scope(self.name):
      with ops.name_scope(name, values=[self._a, self._b, self._a_b_sum]):
        a = self._a
        b = self._b
        a_b_sum = self._a_b_sum

        entropy = math_ops.lgamma(a) - (a - 1) * math_ops.digamma(a)
        entropy += math_ops.lgamma(b) - (b - 1) * math_ops.digamma(b)
        entropy += -math_ops.lgamma(a_b_sum) + (
            a_b_sum - 2) * math_ops.digamma(a_b_sum)
        return entropy
 def _entropy(self):
     return (
         self.alpha
         + math_ops.log(self.beta)
         + math_ops.lgamma(self.alpha)
         - (1.0 + self.alpha) * math_ops.digamma(self.alpha)
     )
Exemple #15
0
 def _multi_lgamma(self, a, p, name="multi_lgamma"):
   """Computes the log multivariate gamma function; log(Gamma_p(a))."""
   with self._name_scope(name, values=[a, p]):
     seq = self._multi_gamma_sequence(a, p)
     return (0.25 * p * (p - 1.) * math.log(math.pi) +
             math_ops.reduce_sum(math_ops.lgamma(seq),
                                 reduction_indices=(-1,)))
Exemple #16
0
 def _log_unnormalized_prob(self, x):
   if self.validate_args:
     x = distribution_util.embed_check_nonnegative_integer_form(x)
   else:
     # For consistency with cdf, we take the floor.
     x = math_ops.floor(x)
   return x * self.log_rate - math_ops.lgamma(1. + x)
Exemple #17
0
  def log_prob(self, x, name="log_prob"):
    """Log prob of observations in `x` under these Gamma distribution(s).

    Args:
      x: tensor of dtype `dtype`, must be broadcastable with `alpha` and `beta`.
      name: The name to give this op.

    Returns:
      log_prob: tensor of dtype `dtype`, the log-PDFs of `x`.

    Raises:
      TypeError: if `x` and `alpha` are different dtypes.
    """
    with ops.name_scope(self.name):
      with ops.op_scope([self._alpha, self._beta, x], name):
        alpha = self._alpha
        beta = self._beta
        x = ops.convert_to_tensor(x)
        x = control_flow_ops.with_dependencies(
            [check_ops.assert_positive(x)] if self.strict else [],
            x)
        contrib_tensor_util.assert_same_float_dtype(tensors=[x,],
                                                    dtype=self.dtype)

        return (alpha * math_ops.log(beta) + (alpha - 1) * math_ops.log(x) -
                beta * x - math_ops.lgamma(self._alpha))
Exemple #18
0
  def _log_prob(self, x):
    x = self._assert_valid_sample(x)
    # broadcast logits or x if need be.
    logits = self.logits
    if (not x.get_shape().is_fully_defined() or
        not logits.get_shape().is_fully_defined() or
        x.get_shape() != logits.get_shape()):
      logits = array_ops.ones_like(x, dtype=logits.dtype) * logits
      x = array_ops.ones_like(logits, dtype=x.dtype) * x

    logits_shape = array_ops.shape(logits)
    if logits.get_shape().ndims == 2:
      logits_2d = logits
      x_2d = x
    else:
      logits_2d = array_ops.reshape(logits, [-1, self.event_size])
      x_2d = array_ops.reshape(x, [-1, self.event_size])
    # compute the normalization constant
    log_norm_const = (math_ops.lgamma(self.event_size)
                      + (self.event_size - 1)
                      * math_ops.log(self.temperature))
    # compute the unnormalized density
    log_softmax = nn_ops.log_softmax(logits_2d - x_2d * self.temperature)
    log_unnorm_prob = math_ops.reduce_sum(log_softmax, [-1], keep_dims=False)
    # combine unnormalized density with normalization constant
    log_prob = log_norm_const + log_unnorm_prob
    ret = array_ops.reshape(log_prob, logits_shape)
    return ret
 def _log_prob(self, x):
   x = self._assert_valid_sample(x)
   # broadcast logits or x if need be.
   logits = self.logits
   if (not x.get_shape().is_fully_defined() or
       not logits.get_shape().is_fully_defined() or
       x.get_shape() != logits.get_shape()):
     logits = array_ops.ones_like(x, dtype=logits.dtype) * logits
     x = array_ops.ones_like(logits, dtype=x.dtype) * x
   logits_shape = array_ops.shape(math_ops.reduce_sum(logits, axis=[-1]))
   logits_2d = array_ops.reshape(logits, [-1, self.event_size])
   x_2d = array_ops.reshape(x, [-1, self.event_size])
   # compute the normalization constant
   k = math_ops.cast(self.event_size, x.dtype)
   log_norm_const = (math_ops.lgamma(k)
                     + (k - 1.)
                     * math_ops.log(self.temperature))
   # compute the unnormalized density
   log_softmax = nn_ops.log_softmax(logits_2d - x_2d * self._temperature_2d)
   log_unnorm_prob = math_ops.reduce_sum(log_softmax, [-1], keepdims=False)
   # combine unnormalized density with normalization constant
   log_prob = log_norm_const + log_unnorm_prob
   # Reshapes log_prob to be consistent with shape of user-supplied logits
   ret = array_ops.reshape(log_prob, logits_shape)
   return ret
 def _log_prob(self, x):
     x = control_flow_ops.with_dependencies([check_ops.assert_positive(x)] if self.validate_args else [], x)
     return (
         self.alpha * math_ops.log(self.beta)
         - math_ops.lgamma(self.alpha)
         - (self.alpha + 1.0) * math_ops.log(x)
         - self.beta / x
     )
def lbeta(x, name=None):
  r"""Computes \\(ln(|Beta(x)|)\\), reducing along the last dimension.

  Given one-dimensional `z = [z_0,...,z_{K-1}]`, we define

  $$Beta(z) = \prod_j Gamma(z_j) / Gamma(\sum_j z_j)$$

  And for `n + 1` dimensional `x` with shape `[N1, ..., Nn, K]`, we define
  $$lbeta(x)[i1, ..., in] = Log(|Beta(x[i1, ..., in, :])|)$$.

  In other words, the last dimension is treated as the `z` vector.

  Note that if `z = [u, v]`, then
  \\(Beta(z) = int_0^1 t^{u-1} (1 - t)^{v-1} dt\\), which defines the
  traditional bivariate beta function.

  If the last dimension is empty, we follow the convention that the sum over
  the empty set is zero, and the product is one.

  Args:
    x: A rank `n + 1` `Tensor`, `n >= 0` with type `float`, or `double`.
    name: A name for the operation (optional).

  Returns:
    The logarithm of \\(|Beta(x)|\\) reducing along the last dimension.
  """
  # In the event that the last dimension has zero entries, we return -inf.
  # This is consistent with a convention that the sum over the empty set 0, and
  # the product is 1.
  # This is standard.  See https://en.wikipedia.org/wiki/Empty_set.
  with ops.name_scope(name, 'lbeta', [x]):
    x = ops.convert_to_tensor(x, name='x')

    # Note reduce_sum([]) = 0.
    log_prod_gamma_x = math_ops.reduce_sum(
        math_ops.lgamma(x), reduction_indices=[-1])

    # Note lgamma(0) = infinity, so if x = []
    # log_gamma_sum_x = lgamma(0) = infinity, and
    # log_prod_gamma_x = lgamma(1) = 0,
    # so result = -infinity
    sum_x = math_ops.reduce_sum(x, axis=[-1])
    log_gamma_sum_x = math_ops.lgamma(sum_x)
    result = log_prod_gamma_x - log_gamma_sum_x

    return result
Exemple #22
0
 def actual_hypersphere_volume(dims, radius):
   # https://en.wikipedia.org/wiki/Volume_of_an_n-ball
   # Using tf.math.lgamma because we'd have to otherwise use SciPy which is
   # not a required dependency of core.
   radius = np.asarray(radius)
   dims = math_ops.cast(dims, dtype=radius.dtype)
   return math_ops.exp((dims / 2.) * np.log(np.pi) -
                       math_ops.lgamma(1. + dims / 2.) +
                       dims * math_ops.log(radius))
Exemple #23
0
 def _log_prob(self, x):
   x = control_flow_ops.with_dependencies([check_ops.assert_positive(x)] if
                                          self.validate_args else [], x)
   contrib_tensor_util.assert_same_float_dtype(tensors=[x],
                                               dtype=self.dtype)
   return (self.alpha * math_ops.log(self.beta) +
           (self.alpha - 1.) * math_ops.log(x) -
           self.beta * x -
           math_ops.lgamma(self.alpha))
def _log_combinations(n, counts, name='log_combinations'):
  """Log number of ways counts could have come in."""
  # First a bit about the number of ways counts could have come in:
  # E.g. if counts = [1, 2], then this is 3 choose 2.
  # In general, this is (sum counts)! / sum(counts!)
  # The sum should be along the last dimension of counts.  This is the
  # "distribution" dimension. Here n a priori represents the sum of counts.
  with ops.op_scope([counts], name):
    # To compute factorials, use the fact that Gamma(n + 1) = n!
    # Compute two terms, each a sum over counts.  Compute each for each
    # batch member.
    # Log Gamma((sum counts) + 1) = Log((sum counts)!)
    total_permutations = math_ops.lgamma(n + 1)
    # sum(Log Gamma(counts + 1)) = Log sum(counts!)
    counts_factorial = math_ops.lgamma(counts + 1)
    redundant_permutations = math_ops.reduce_sum(counts_factorial,
                                                 reduction_indices=[-1])
    return total_permutations - redundant_permutations
Exemple #25
0
  def log_pdf(self, x, name="log_pdf"):
    """Log pdf of observations in `x` under these Student's t-distribution(s).

    Args:
      x: tensor of dtype `dtype`, must be broadcastable with `mu` and `df`.
      name: The name to give this op.

    Returns:
      log_pdf: tensor of dtype `dtype`, the log-PDFs of `x`.
    """
    with ops.name_scope(self.name):
      with ops.op_scope([self._df, self._mu, self._sigma, x], name):
        x = ops.convert_to_tensor(x)
        if x.dtype != self.dtype:
          raise TypeError("Input x dtype does not match dtype: %s vs. %s" %
                          (x.dtype, self.dtype))
        df_2 = self._df / 2
        log_beta = (math_ops.lgamma(0.5) + math_ops.lgamma(df_2) -
                    math_ops.lgamma(0.5 + df_2))
        return (-math_ops.log(self._df) / 2 - log_beta - (self._df + 1) / 2 *
                math_ops.log(1 + math_ops.square((x - self._mu) / self._sigma) /
                             self._df) - math_ops.log(self._sigma))
Exemple #26
0
def _IgammaGrad(op, grad):
    """Returns gradient of igamma(a, x) with respect to a and x."""
    # TODO(ebrevdo): Perhaps add the derivative w.r.t. a
    a = op.inputs[0]
    x = op.inputs[1]
    sa = array_ops.shape(a)
    sx = array_ops.shape(x)
    unused_ra, rx = gen_array_ops._broadcast_gradient_args(sa, sx)

    # Perform operations in log space before summing, because Gamma(a)
    # and Gamma'(a) can grow large.
    partial_x = math_ops.exp(-x + (a - 1) * math_ops.log(x) - math_ops.lgamma(a))
    return (None, array_ops.reshape(math_ops.reduce_sum(partial_x * grad, rx), sx))
Exemple #27
0
  def pdf(self, x, name="pdf"):
    """The PDF of observations in `x` under these Student's t distribution(s).

    Args:
      x: tensor of dtype `dtype`, must be broadcastable with `df`, `mu`, and
        `sigma`.
      name: The name to give this op.

    Returns:
      pdf: tensor of dtype `dtype`, the pdf values of `x`.
    """
    with ops.name_scope(self.name):
      with ops.op_scope([self._df, self._mu, self._sigma, x], name):
        x = ops.convert_to_tensor(x)
        if x.dtype != self.dtype:
          raise TypeError("Input x dtype does not match dtype: %s vs. %s" %
                          (x.dtype, self.dtype))
        reloc_scaled = (x - self._mu) / self._sigma
        return (math_ops.exp(math_ops.lgamma((self._df + 1) / 2) -
                             math_ops.lgamma(self._df / 2)) /
                math_ops.sqrt(self._df) / math.sqrt(np.pi) *
                math_ops.pow(1 + math_ops.square(reloc_scaled) / self._df,
                             -(self._df + 1) / 2) / self.sigma)
Exemple #28
0
  def log_prob(self, x, name="log_prob"):
    """Log probability mass function.

    Args:
      x: Non-negative floating point tensor with dtype `dtype` and whose shape
        can be broadcast with `self.lam`. `x` is only legal if it is
        non-negative and its components are equal to integer values.
      name: A name for this operation (optional).

    Returns:
      The log-probabilities of the events.
    """
    with ops.name_scope(self.name):
      with ops.name_scope(name, values=[self.lam, x]):
        x = self._check_x(x, check_integer=True)
        return x * math_ops.log(self.lam) - self.lam - math_ops.lgamma(x + 1)
Exemple #29
0
def _IgammaGrad(op, grad):
  """Returns gradient of igamma(a, x) with respect to a and x."""
  a = op.inputs[0]
  x = op.inputs[1]
  sa = array_ops.shape(a)
  sx = array_ops.shape(x)
  ra, rx = gen_array_ops.broadcast_gradient_args(sa, sx)

  with ops.control_dependencies([grad]):
    partial_a = gen_math_ops.igamma_grad_a(a, x)
    # Perform operations in log space before summing, because Gamma(a)
    # and Gamma'(a) can grow large.
    partial_x = math_ops.exp(-x + (a - 1) * math_ops.log(x)
                             - math_ops.lgamma(a))
    return (array_ops.reshape(math_ops.reduce_sum(partial_a * grad, ra), sa),
            array_ops.reshape(math_ops.reduce_sum(partial_x * grad, rx), sx))
def _IgammaGrad(op, grad):
  """Returns gradient of igamma(a, x) with respect to x."""
  # TODO(ebrevdo): Perhaps add the derivative w.r.t. a
  a = op.inputs[0]
  x = op.inputs[1]
  sa = array_ops.shape(a)
  sx = array_ops.shape(x)
  # pylint: disable=protected-access
  unused_ra, rx = gen_array_ops._broadcast_gradient_args(sa, sx)
  # pylint: enable=protected-access

  # Perform operations in log space before summing, because Gamma(a)
  # and Gamma'(a) can grow large.
  partial_x = math_ops.exp(-x + (a - 1) * math_ops.log(x) - math_ops.lgamma(a))
  # TODO(b/36815900): Mark None return values as NotImplemented
  return (None, array_ops.reshape(
      math_ops.reduce_sum(partial_x * grad, rx), sx))
Exemple #31
0
 def _log_normalization(self):
   return (math_ops.lgamma(self.concentration1)
           + math_ops.lgamma(self.concentration0)
           - math_ops.lgamma(self.total_concentration))
Exemple #32
0
 def _log_normalization(self):
     return (math_ops.log(math_ops.abs(self.scale)) +
             0.5 * math_ops.log(self.df) + 0.5 * np.log(np.pi) +
             math_ops.lgamma(0.5 * self.df) -
             math_ops.lgamma(0.5 * (self.df + 1.)))
Exemple #33
0
 def _log_normalization(self):
     return (math_ops.lgamma(self.concentration) -
             self.concentration * math_ops.log(self.rate))
Exemple #34
0
 def _entropy(self):
   return (self.concentration
           - math_ops.log(self.rate)
           + math_ops.lgamma(self.concentration)
           + ((1. - self.concentration) *
              math_ops.digamma(self.concentration)))