def log_prob(self, counts, name="log_prob"): """`Log(P[counts])`, computed for every batch member. For each batch member of counts `k`, `P[counts]` is the probability that after sampling `n` draws from this Binomial distribution, the number of successes is `k`. Note that different sequences of draws can result in the same counts, thus the probability includes a combinatorial coefficient. Args: counts: Non-negative tensor with dtype `dtype` and whose shape can be broadcast with `self.p` and `self.n`. `counts` is only legal if it is less than or equal to `n` and its components are equal to integer values. name: Name to give this Op, defaults to "log_prob". Returns: Log probabilities for each record, shape `[N1,...,Nm]`. """ n = self._n p = self._p with ops.name_scope(self.name): with ops.name_scope(name, values=[self._n, self._p, counts]): counts = self._check_counts(counts) prob_prob = counts * math_ops.log(p) + ( n - counts) * math_ops.log(1 - p) combinations = math_ops.lgamma(n + 1) - math_ops.lgamma( counts + 1) - math_ops.lgamma(n - counts + 1) log_prob = prob_prob + combinations return log_prob
def _log_normalization(self, positive_counts): if self.validate_args: positive_counts = distribution_util.embed_check_nonnegative_discrete( positive_counts, check_integer=True) return (-math_ops.lgamma(self.total_count + positive_counts) + math_ops.lgamma(positive_counts + 1.) + math_ops.lgamma(self.total_count))
def log_combinations(n, counts, name="log_combinations"): """Multinomial coefficient. Given `n` and `counts`, where `counts` has last dimension `k`, we compute the multinomial coefficient as: ```n! / sum_i n_i!``` where `i` runs over all `k` classes. Args: n: Numeric `Tensor` broadcastable with `counts`. This represents `n` outcomes. counts: Numeric `Tensor` broadcastable with `n`. This represents counts in `k` classes, where `k` is the last dimension of the tensor. name: A name for this operation (optional). Returns: `Tensor` representing the multinomial coefficient between `n` and `counts`. """ # First a bit about the number of ways counts could have come in: # E.g. if counts = [1, 2], then this is 3 choose 2. # In general, this is (sum counts)! / sum(counts!) # The sum should be along the last dimension of counts. This is the # "distribution" dimension. Here n a priori represents the sum of counts. with ops.name_scope(name, values=[n, counts]): n = ops.convert_to_tensor(n, name="n") counts = ops.convert_to_tensor(counts, name="counts") total_permutations = math_ops.lgamma(n + 1) counts_factorial = math_ops.lgamma(counts + 1) redundant_permutations = math_ops.reduce_sum(counts_factorial, reduction_indices=[-1]) return total_permutations - redundant_permutations
def nonempty_lbeta(): log_prod_gamma_x = math_ops.reduce_sum( math_ops.lgamma(x), reduction_indices=[-1]) sum_x = math_ops.reduce_sum(x, reduction_indices=[-1]) log_gamma_sum_x = math_ops.lgamma(sum_x) result = log_prod_gamma_x - log_gamma_sum_x return result
def _prob(self, x): y = (x - self.mu) / self.sigma half_df = 0.5 * self.df return (math_ops.exp(math_ops.lgamma(0.5 + half_df) - math_ops.lgamma(half_df)) / (math_ops.sqrt(self.df) * math.sqrt(math.pi) * self.sigma) * math_ops.pow(1. + math_ops.square(y) / self.df, -(0.5 + half_df)))
def _kl_gamma_gamma(g0, g1, name=None): """Calculate the batched KL divergence KL(g0 || g1) with g0 and g1 Gamma. Args: g0: instance of a Gamma distribution object. g1: instance of a Gamma distribution object. name: (optional) Name to use for created operations. Default is "kl_gamma_gamma". Returns: kl_gamma_gamma: `Tensor`. The batchwise KL(g0 || g1). """ with ops.name_scope(name, "kl_gamma_gamma", values=[ g0.concentration, g0.rate, g1.concentration, g1.rate]): # Result from: # http://www.fil.ion.ucl.ac.uk/~wpenny/publications/densities.ps # For derivation see: # http://stats.stackexchange.com/questions/11646/kullback-leibler-divergence-between-two-gamma-distributions pylint: disable=line-too-long return (((g0.concentration - g1.concentration) * math_ops.digamma(g0.concentration)) + math_ops.lgamma(g1.concentration) - math_ops.lgamma(g0.concentration) + g1.concentration * math_ops.log(g0.rate) - g1.concentration * math_ops.log(g1.rate) + g0.concentration * (g1.rate / g0.rate - 1.))
def _log_prob(self, x): y = (x - self.mu) / self.sigma half_df = 0.5 * self.df return (math_ops.lgamma(0.5 + half_df) - math_ops.lgamma(half_df) - 0.5 * math_ops.log(self.df) - 0.5 * math.log(math.pi) - math_ops.log(self.sigma) - (0.5 + half_df) * math_ops.log(1. + math_ops.square(y) / self.df))
def log_prob(self, x, name="log_prob"): """`Log(P[counts])`, computed for every batch member. Args: x: Non-negative floating point tensor whose shape can be broadcast with `self.a` and `self.b`. For fixed leading dimensions, the last dimension represents counts for the corresponding Beta distribution in `self.a` and `self.b`. `x` is only legal if 0 < x < 1. name: Name to give this Op, defaults to "log_prob". Returns: Log probabilities for each record, shape `[N1,...,Nm]`. """ a = self._a b = self._b with ops.name_scope(self.name): with ops.name_scope(name, values=[a, x]): x = self._check_x(x) unnorm_pdf = (a - 1) * math_ops.log(x) + ( b - 1) * math_ops.log(1 - x) normalization_factor = -(math_ops.lgamma(a) + math_ops.lgamma(b) - math_ops.lgamma(a + b)) log_prob = unnorm_pdf + normalization_factor return log_prob
def _entropy(self): return (math_ops.lgamma(self.a) - (self.a - 1.) * math_ops.digamma(self.a) + math_ops.lgamma(self.b) - (self.b - 1.) * math_ops.digamma(self.b) - math_ops.lgamma(self.a_b_sum) + (self.a_b_sum - 2.) * math_ops.digamma(self.a_b_sum))
def _log_prob(self, x): x = self._assert_valid_sample(x) log_unnormalized_prob = ((self.a - 1.) * math_ops.log(x) + (self.b - 1.) * math_ops.log(1. - x)) log_normalization = (math_ops.lgamma(self.a) + math_ops.lgamma(self.b) - math_ops.lgamma(self.a_b_sum)) return log_unnormalized_prob - log_normalization
def _log_prob(self, counts): counts = self._check_counts(counts) prob_prob = (counts * math_ops.log(self.p) + (self.n - counts) * math_ops.log(1. - self.p)) combinations = (math_ops.lgamma(self.n + 1) - math_ops.lgamma(counts + 1) - math_ops.lgamma(self.n - counts + 1)) log_prob = prob_prob + combinations return log_prob
def nonempty_lbeta(): last_index = array_ops.size(array_ops.shape(x)) - 1 log_prod_gamma_x = math_ops.reduce_sum( math_ops.lgamma(x), reduction_indices=last_index) sum_x = math_ops.reduce_sum(x, reduction_indices=last_index) log_gamma_sum_x = math_ops.lgamma(sum_x) result = log_prod_gamma_x - log_gamma_sum_x result.set_shape(x.get_shape()[:-1]) return result
def entropy(self, name="entropy"): """Entropy of the distribution in nats.""" with ops.name_scope(self.name): with ops.name_scope(name, values=[self._a, self._b, self._a_b_sum]): a = self._a b = self._b a_b_sum = self._a_b_sum entropy = math_ops.lgamma(a) - (a - 1) * math_ops.digamma(a) entropy += math_ops.lgamma(b) - (b - 1) * math_ops.digamma(b) entropy += -math_ops.lgamma(a_b_sum) + ( a_b_sum - 2) * math_ops.digamma(a_b_sum) return entropy
def _entropy(self): return ( self.alpha + math_ops.log(self.beta) + math_ops.lgamma(self.alpha) - (1.0 + self.alpha) * math_ops.digamma(self.alpha) )
def _multi_lgamma(self, a, p, name="multi_lgamma"): """Computes the log multivariate gamma function; log(Gamma_p(a)).""" with self._name_scope(name, values=[a, p]): seq = self._multi_gamma_sequence(a, p) return (0.25 * p * (p - 1.) * math.log(math.pi) + math_ops.reduce_sum(math_ops.lgamma(seq), reduction_indices=(-1,)))
def _log_unnormalized_prob(self, x): if self.validate_args: x = distribution_util.embed_check_nonnegative_integer_form(x) else: # For consistency with cdf, we take the floor. x = math_ops.floor(x) return x * self.log_rate - math_ops.lgamma(1. + x)
def log_prob(self, x, name="log_prob"): """Log prob of observations in `x` under these Gamma distribution(s). Args: x: tensor of dtype `dtype`, must be broadcastable with `alpha` and `beta`. name: The name to give this op. Returns: log_prob: tensor of dtype `dtype`, the log-PDFs of `x`. Raises: TypeError: if `x` and `alpha` are different dtypes. """ with ops.name_scope(self.name): with ops.op_scope([self._alpha, self._beta, x], name): alpha = self._alpha beta = self._beta x = ops.convert_to_tensor(x) x = control_flow_ops.with_dependencies( [check_ops.assert_positive(x)] if self.strict else [], x) contrib_tensor_util.assert_same_float_dtype(tensors=[x,], dtype=self.dtype) return (alpha * math_ops.log(beta) + (alpha - 1) * math_ops.log(x) - beta * x - math_ops.lgamma(self._alpha))
def _log_prob(self, x): x = self._assert_valid_sample(x) # broadcast logits or x if need be. logits = self.logits if (not x.get_shape().is_fully_defined() or not logits.get_shape().is_fully_defined() or x.get_shape() != logits.get_shape()): logits = array_ops.ones_like(x, dtype=logits.dtype) * logits x = array_ops.ones_like(logits, dtype=x.dtype) * x logits_shape = array_ops.shape(logits) if logits.get_shape().ndims == 2: logits_2d = logits x_2d = x else: logits_2d = array_ops.reshape(logits, [-1, self.event_size]) x_2d = array_ops.reshape(x, [-1, self.event_size]) # compute the normalization constant log_norm_const = (math_ops.lgamma(self.event_size) + (self.event_size - 1) * math_ops.log(self.temperature)) # compute the unnormalized density log_softmax = nn_ops.log_softmax(logits_2d - x_2d * self.temperature) log_unnorm_prob = math_ops.reduce_sum(log_softmax, [-1], keep_dims=False) # combine unnormalized density with normalization constant log_prob = log_norm_const + log_unnorm_prob ret = array_ops.reshape(log_prob, logits_shape) return ret
def _log_prob(self, x): x = self._assert_valid_sample(x) # broadcast logits or x if need be. logits = self.logits if (not x.get_shape().is_fully_defined() or not logits.get_shape().is_fully_defined() or x.get_shape() != logits.get_shape()): logits = array_ops.ones_like(x, dtype=logits.dtype) * logits x = array_ops.ones_like(logits, dtype=x.dtype) * x logits_shape = array_ops.shape(math_ops.reduce_sum(logits, axis=[-1])) logits_2d = array_ops.reshape(logits, [-1, self.event_size]) x_2d = array_ops.reshape(x, [-1, self.event_size]) # compute the normalization constant k = math_ops.cast(self.event_size, x.dtype) log_norm_const = (math_ops.lgamma(k) + (k - 1.) * math_ops.log(self.temperature)) # compute the unnormalized density log_softmax = nn_ops.log_softmax(logits_2d - x_2d * self._temperature_2d) log_unnorm_prob = math_ops.reduce_sum(log_softmax, [-1], keepdims=False) # combine unnormalized density with normalization constant log_prob = log_norm_const + log_unnorm_prob # Reshapes log_prob to be consistent with shape of user-supplied logits ret = array_ops.reshape(log_prob, logits_shape) return ret
def _log_prob(self, x): x = control_flow_ops.with_dependencies([check_ops.assert_positive(x)] if self.validate_args else [], x) return ( self.alpha * math_ops.log(self.beta) - math_ops.lgamma(self.alpha) - (self.alpha + 1.0) * math_ops.log(x) - self.beta / x )
def lbeta(x, name=None): r"""Computes \\(ln(|Beta(x)|)\\), reducing along the last dimension. Given one-dimensional `z = [z_0,...,z_{K-1}]`, we define $$Beta(z) = \prod_j Gamma(z_j) / Gamma(\sum_j z_j)$$ And for `n + 1` dimensional `x` with shape `[N1, ..., Nn, K]`, we define $$lbeta(x)[i1, ..., in] = Log(|Beta(x[i1, ..., in, :])|)$$. In other words, the last dimension is treated as the `z` vector. Note that if `z = [u, v]`, then \\(Beta(z) = int_0^1 t^{u-1} (1 - t)^{v-1} dt\\), which defines the traditional bivariate beta function. If the last dimension is empty, we follow the convention that the sum over the empty set is zero, and the product is one. Args: x: A rank `n + 1` `Tensor`, `n >= 0` with type `float`, or `double`. name: A name for the operation (optional). Returns: The logarithm of \\(|Beta(x)|\\) reducing along the last dimension. """ # In the event that the last dimension has zero entries, we return -inf. # This is consistent with a convention that the sum over the empty set 0, and # the product is 1. # This is standard. See https://en.wikipedia.org/wiki/Empty_set. with ops.name_scope(name, 'lbeta', [x]): x = ops.convert_to_tensor(x, name='x') # Note reduce_sum([]) = 0. log_prod_gamma_x = math_ops.reduce_sum( math_ops.lgamma(x), reduction_indices=[-1]) # Note lgamma(0) = infinity, so if x = [] # log_gamma_sum_x = lgamma(0) = infinity, and # log_prod_gamma_x = lgamma(1) = 0, # so result = -infinity sum_x = math_ops.reduce_sum(x, axis=[-1]) log_gamma_sum_x = math_ops.lgamma(sum_x) result = log_prod_gamma_x - log_gamma_sum_x return result
def actual_hypersphere_volume(dims, radius): # https://en.wikipedia.org/wiki/Volume_of_an_n-ball # Using tf.math.lgamma because we'd have to otherwise use SciPy which is # not a required dependency of core. radius = np.asarray(radius) dims = math_ops.cast(dims, dtype=radius.dtype) return math_ops.exp((dims / 2.) * np.log(np.pi) - math_ops.lgamma(1. + dims / 2.) + dims * math_ops.log(radius))
def _log_prob(self, x): x = control_flow_ops.with_dependencies([check_ops.assert_positive(x)] if self.validate_args else [], x) contrib_tensor_util.assert_same_float_dtype(tensors=[x], dtype=self.dtype) return (self.alpha * math_ops.log(self.beta) + (self.alpha - 1.) * math_ops.log(x) - self.beta * x - math_ops.lgamma(self.alpha))
def _log_combinations(n, counts, name='log_combinations'): """Log number of ways counts could have come in.""" # First a bit about the number of ways counts could have come in: # E.g. if counts = [1, 2], then this is 3 choose 2. # In general, this is (sum counts)! / sum(counts!) # The sum should be along the last dimension of counts. This is the # "distribution" dimension. Here n a priori represents the sum of counts. with ops.op_scope([counts], name): # To compute factorials, use the fact that Gamma(n + 1) = n! # Compute two terms, each a sum over counts. Compute each for each # batch member. # Log Gamma((sum counts) + 1) = Log((sum counts)!) total_permutations = math_ops.lgamma(n + 1) # sum(Log Gamma(counts + 1)) = Log sum(counts!) counts_factorial = math_ops.lgamma(counts + 1) redundant_permutations = math_ops.reduce_sum(counts_factorial, reduction_indices=[-1]) return total_permutations - redundant_permutations
def log_pdf(self, x, name="log_pdf"): """Log pdf of observations in `x` under these Student's t-distribution(s). Args: x: tensor of dtype `dtype`, must be broadcastable with `mu` and `df`. name: The name to give this op. Returns: log_pdf: tensor of dtype `dtype`, the log-PDFs of `x`. """ with ops.name_scope(self.name): with ops.op_scope([self._df, self._mu, self._sigma, x], name): x = ops.convert_to_tensor(x) if x.dtype != self.dtype: raise TypeError("Input x dtype does not match dtype: %s vs. %s" % (x.dtype, self.dtype)) df_2 = self._df / 2 log_beta = (math_ops.lgamma(0.5) + math_ops.lgamma(df_2) - math_ops.lgamma(0.5 + df_2)) return (-math_ops.log(self._df) / 2 - log_beta - (self._df + 1) / 2 * math_ops.log(1 + math_ops.square((x - self._mu) / self._sigma) / self._df) - math_ops.log(self._sigma))
def _IgammaGrad(op, grad): """Returns gradient of igamma(a, x) with respect to a and x.""" # TODO(ebrevdo): Perhaps add the derivative w.r.t. a a = op.inputs[0] x = op.inputs[1] sa = array_ops.shape(a) sx = array_ops.shape(x) unused_ra, rx = gen_array_ops._broadcast_gradient_args(sa, sx) # Perform operations in log space before summing, because Gamma(a) # and Gamma'(a) can grow large. partial_x = math_ops.exp(-x + (a - 1) * math_ops.log(x) - math_ops.lgamma(a)) return (None, array_ops.reshape(math_ops.reduce_sum(partial_x * grad, rx), sx))
def pdf(self, x, name="pdf"): """The PDF of observations in `x` under these Student's t distribution(s). Args: x: tensor of dtype `dtype`, must be broadcastable with `df`, `mu`, and `sigma`. name: The name to give this op. Returns: pdf: tensor of dtype `dtype`, the pdf values of `x`. """ with ops.name_scope(self.name): with ops.op_scope([self._df, self._mu, self._sigma, x], name): x = ops.convert_to_tensor(x) if x.dtype != self.dtype: raise TypeError("Input x dtype does not match dtype: %s vs. %s" % (x.dtype, self.dtype)) reloc_scaled = (x - self._mu) / self._sigma return (math_ops.exp(math_ops.lgamma((self._df + 1) / 2) - math_ops.lgamma(self._df / 2)) / math_ops.sqrt(self._df) / math.sqrt(np.pi) * math_ops.pow(1 + math_ops.square(reloc_scaled) / self._df, -(self._df + 1) / 2) / self.sigma)
def log_prob(self, x, name="log_prob"): """Log probability mass function. Args: x: Non-negative floating point tensor with dtype `dtype` and whose shape can be broadcast with `self.lam`. `x` is only legal if it is non-negative and its components are equal to integer values. name: A name for this operation (optional). Returns: The log-probabilities of the events. """ with ops.name_scope(self.name): with ops.name_scope(name, values=[self.lam, x]): x = self._check_x(x, check_integer=True) return x * math_ops.log(self.lam) - self.lam - math_ops.lgamma(x + 1)
def _IgammaGrad(op, grad): """Returns gradient of igamma(a, x) with respect to a and x.""" a = op.inputs[0] x = op.inputs[1] sa = array_ops.shape(a) sx = array_ops.shape(x) ra, rx = gen_array_ops.broadcast_gradient_args(sa, sx) with ops.control_dependencies([grad]): partial_a = gen_math_ops.igamma_grad_a(a, x) # Perform operations in log space before summing, because Gamma(a) # and Gamma'(a) can grow large. partial_x = math_ops.exp(-x + (a - 1) * math_ops.log(x) - math_ops.lgamma(a)) return (array_ops.reshape(math_ops.reduce_sum(partial_a * grad, ra), sa), array_ops.reshape(math_ops.reduce_sum(partial_x * grad, rx), sx))
def _IgammaGrad(op, grad): """Returns gradient of igamma(a, x) with respect to x.""" # TODO(ebrevdo): Perhaps add the derivative w.r.t. a a = op.inputs[0] x = op.inputs[1] sa = array_ops.shape(a) sx = array_ops.shape(x) # pylint: disable=protected-access unused_ra, rx = gen_array_ops._broadcast_gradient_args(sa, sx) # pylint: enable=protected-access # Perform operations in log space before summing, because Gamma(a) # and Gamma'(a) can grow large. partial_x = math_ops.exp(-x + (a - 1) * math_ops.log(x) - math_ops.lgamma(a)) # TODO(b/36815900): Mark None return values as NotImplemented return (None, array_ops.reshape( math_ops.reduce_sum(partial_x * grad, rx), sx))
def _log_normalization(self): return (math_ops.lgamma(self.concentration1) + math_ops.lgamma(self.concentration0) - math_ops.lgamma(self.total_concentration))
def _log_normalization(self): return (math_ops.log(math_ops.abs(self.scale)) + 0.5 * math_ops.log(self.df) + 0.5 * np.log(np.pi) + math_ops.lgamma(0.5 * self.df) - math_ops.lgamma(0.5 * (self.df + 1.)))
def _log_normalization(self): return (math_ops.lgamma(self.concentration) - self.concentration * math_ops.log(self.rate))
def _entropy(self): return (self.concentration - math_ops.log(self.rate) + math_ops.lgamma(self.concentration) + ((1. - self.concentration) * math_ops.digamma(self.concentration)))