def _kl_beta_beta(d1, d2, name=None): """Calculate the batchwise KL divergence KL(d1 || d2) with d1 and d2 Beta. Args: d1: instance of a Beta distribution object. d2: instance of a Beta distribution object. name: (optional) Name to use for created operations. default is "kl_beta_beta". Returns: Batchwise KL(d1 || d2) """ def delta(fn, is_property=True): fn1 = getattr(d1, fn) fn2 = getattr(d2, fn) return (fn2 - fn1) if is_property else (fn2() - fn1()) with ops.name_scope(name, "kl_beta_beta", values=[ d1.concentration1, d1.concentration0, d1.total_concentration, d2.concentration1, d2.concentration0, d2.total_concentration, ]): return (delta("_log_normalization", is_property=False) - math_ops.digamma(d1.concentration1) * delta("concentration1") - math_ops.digamma(d1.concentration0) * delta("concentration0") + (math_ops.digamma(d1.total_concentration) * delta("total_concentration")))
def _entropy(self): return (math_ops.lgamma(self.a) - (self.a - 1.) * math_ops.digamma(self.a) + math_ops.lgamma(self.b) - (self.b - 1.) * math_ops.digamma(self.b) - math_ops.lgamma(self.a_b_sum) + (self.a_b_sum - 2.) * math_ops.digamma(self.a_b_sum))
def _entropy(self): return ( self._log_normalization() - (self.concentration1 - 1.) * math_ops.digamma(self.concentration1) - (self.concentration0 - 1.) * math_ops.digamma(self.concentration0) + ((self.total_concentration - 2.) * math_ops.digamma(self.total_concentration)))
def _entropy(self): entropy = special_math_ops.lbeta(self.alpha) entropy += math_ops.digamma(self.alpha_sum) * ( self.alpha_sum - math_ops.cast(self.event_shape()[0], self.dtype)) entropy += -math_ops.reduce_sum( (self.alpha - 1.) * math_ops.digamma(self.alpha), reduction_indices=[-1], keep_dims=False) return entropy
def _entropy(self): u = array_ops.expand_dims(self.df * self._ones(), -1) v = array_ops.expand_dims(self._ones(), -1) beta_arg = array_ops.concat_v2([u, v], len(u.get_shape()) - 1) / 2 half_df = 0.5 * self.df return ((0.5 + half_df) * (math_ops.digamma(0.5 + half_df) - math_ops.digamma(half_df)) + 0.5 * math_ops.log(self.df) + special_math_ops.lbeta(beta_arg) + math_ops.log(self.sigma))
def _entropy(self): k = math_ops.cast(self.event_shape_tensor()[0], self.dtype) return ( self._log_normalization() + ((self.total_concentration - k) * math_ops.digamma(self.total_concentration)) - math_ops.reduce_sum( (self.concentration - 1.) * math_ops.digamma(self.concentration), axis=-1))
def _entropy(self): u = array_ops.expand_dims(self.df * self._ones(), -1) v = array_ops.expand_dims(self._ones(), -1) beta_arg = array_ops.concat(len(u.get_shape()) - 1, [u, v]) / 2 half_df = 0.5 * self.df return ((0.5 + half_df) * (math_ops.digamma(0.5 + half_df) - math_ops.digamma(half_df)) + 0.5 * math_ops.log(self.df) + special_math_ops.lbeta(beta_arg) + math_ops.log(self.sigma))
def _entropy(self): v = array_ops.ones(self.batch_shape(), dtype=self.dtype)[..., None] u = v * self.df[..., None] beta_arg = array_ops.concat([u, v], -1) / 2. return (math_ops.log(math_ops.abs(self.sigma)) + 0.5 * math_ops.log(self.df) + special_math_ops.lbeta(beta_arg) + 0.5 * (self.df + 1.) * (math_ops.digamma(0.5 * (self.df + 1.)) - math_ops.digamma(0.5 * self.df)))
def _entropy(self): v = array_ops.ones(self.batch_shape_tensor(), dtype=self.dtype)[..., None] u = v * self.df[..., None] beta_arg = array_ops.concat([u, v], -1) / 2. return (math_ops.log(math_ops.abs(self.scale)) + 0.5 * math_ops.log(self.df) + special_math_ops.lbeta(beta_arg) + 0.5 * (self.df + 1.) * (math_ops.digamma(0.5 * (self.df + 1.)) - math_ops.digamma(0.5 * self.df)))
def entropy(self, name="entropy"): """Entropy of the distribution in nats.""" with ops.name_scope(self.name): with ops.name_scope(name, values=[self._a, self._b, self._a_b_sum]): a = self._a b = self._b a_b_sum = self._a_b_sum entropy = math_ops.lgamma(a) - (a - 1) * math_ops.digamma(a) entropy += math_ops.lgamma(b) - (b - 1) * math_ops.digamma(b) entropy += -math_ops.lgamma(a_b_sum) + ( a_b_sum - 2) * math_ops.digamma(a_b_sum) return entropy
def entropy(self, name="entropy"): """Entropy of the distribution in nats.""" with ops.name_scope(self.name): with ops.op_scope([self._a, self._b, self._a_b_sum], name): a = self._a b = self._b a_b_sum = self._a_b_sum entropy = math_ops.lgamma(a) - (a - 1) * math_ops.digamma(a) entropy += math_ops.lgamma(b) - (b - 1) * math_ops.digamma(b) entropy += -math_ops.lgamma(a_b_sum) + ( a_b_sum - 2) * math_ops.digamma(a_b_sum) return entropy
def _multi_digamma(self, a, p, name='multi_digamma'): """Computes the multivariate digamma function; Psi_p(a).""" with ops.name_scope(self.name): with ops.name_scope(name, values=[a, p]): seq = self._multi_gamma_sequence(a, p) return math_ops.reduce_sum(math_ops.digamma(seq), reduction_indices=(-1,))
def _kl_gamma_gamma(g0, g1, name=None): """Calculate the batched KL divergence KL(g0 || g1) with g0 and g1 Gamma. Args: g0: instance of a Gamma distribution object. g1: instance of a Gamma distribution object. name: (optional) Name to use for created operations. Default is "kl_gamma_gamma". Returns: kl_gamma_gamma: `Tensor`. The batchwise KL(g0 || g1). """ with ops.name_scope(name, "kl_gamma_gamma", values=[ g0.concentration, g0.rate, g1.concentration, g1.rate]): # Result from: # http://www.fil.ion.ucl.ac.uk/~wpenny/publications/densities.ps # For derivation see: # http://stats.stackexchange.com/questions/11646/kullback-leibler-divergence-between-two-gamma-distributions pylint: disable=line-too-long return (((g0.concentration - g1.concentration) * math_ops.digamma(g0.concentration)) + math_ops.lgamma(g1.concentration) - math_ops.lgamma(g0.concentration) + g1.concentration * math_ops.log(g0.rate) - g1.concentration * math_ops.log(g1.rate) + g0.concentration * (g1.rate / g0.rate - 1.))
def _harmonic_number(x): """Compute the harmonic number from its analytic continuation. Derivation from [here]( https://en.wikipedia.org/wiki/Digamma_function#Relation_to_harmonic_numbers) and [Euler's constant]( https://en.wikipedia.org/wiki/Euler%E2%80%93Mascheroni_constant). Args: x: input float. Returns: z: The analytic continuation of the harmonic number for the input. """ one = array_ops.ones([], dtype=x.dtype) return math_ops.digamma(x + one) - math_ops.digamma(one)
def _chain_gets_correct_expectations(self, x, independent_chain_ndims, sess, feed_dict=None): counter = collections.Counter() def log_gamma_log_prob(x): counter["target_calls"] += 1 event_dims = math_ops.range(independent_chain_ndims, array_ops.rank(x)) return self._log_gamma_log_prob(x, event_dims) num_results = array_ops.placeholder(np.int32, [], name="num_results") step_size = array_ops.placeholder(np.float32, [], name="step_size") num_leapfrog_steps = array_ops.placeholder(np.int32, [], name="num_leapfrog_steps") if feed_dict is None: feed_dict = {} feed_dict.update({ num_results: 150, step_size: 0.05, num_leapfrog_steps: 2 }) samples, kernel_results = hmc.sample_chain( num_results=num_results, target_log_prob_fn=log_gamma_log_prob, current_state=x, step_size=step_size, num_leapfrog_steps=num_leapfrog_steps, num_burnin_steps=150, seed=42) self.assertAllEqual(dict(target_calls=2), counter) expected_x = (math_ops.digamma(self._shape_param) - np.log(self._rate_param)) expected_exp_x = self._shape_param / self._rate_param log_accept_ratio_, samples_, expected_x_ = sess.run( [kernel_results.log_accept_ratio, samples, expected_x], feed_dict) actual_x = samples_.mean() actual_exp_x = np.exp(samples_).mean() acceptance_probs = np.exp(np.minimum(log_accept_ratio_, 0.)) logging_ops.vlog( 1, "True E[x, exp(x)]: {}\t{}".format(expected_x_, expected_exp_x)) logging_ops.vlog( 1, "Estimated E[x, exp(x)]: {}\t{}".format(actual_x, actual_exp_x)) self.assertNear(actual_x, expected_x_, 2e-2) self.assertNear(actual_exp_x, expected_exp_x, 2e-2) self.assertAllEqual(np.ones_like(acceptance_probs, np.bool), acceptance_probs > 0.5) self.assertAllEqual(np.ones_like(acceptance_probs, np.bool), acceptance_probs <= 1.)
def _entropy(self): return ( self.alpha + math_ops.log(self.beta) + math_ops.lgamma(self.alpha) - (1.0 + self.alpha) * math_ops.digamma(self.alpha) )
def entropy(self, name="entropy"): """Entropy of the distribution in nats.""" with ops.name_scope(self.name): with ops.name_scope(name, values=[self._alpha, self._alpha_0]): alpha = self._alpha alpha_0 = self._alpha_0 entropy = special_math_ops.lbeta(alpha) entropy += (alpha_0 - math_ops.cast( self.event_shape()[0], self.dtype)) * math_ops.digamma( alpha_0) entropy += -math_ops.reduce_sum( (alpha - 1) * math_ops.digamma(alpha), reduction_indices=[-1], keep_dims=False) return entropy
def entropy(self, name="entropy"): """Entropy of the distribution in nats.""" with ops.name_scope(self.name): with ops.op_scope([self._alpha, self._alpha_0], name): alpha = self._alpha alpha_0 = self._alpha_0 entropy = special_math_ops.lbeta(alpha) entropy += (alpha_0 - math_ops.cast( self.event_shape()[0], self.dtype)) * math_ops.digamma( alpha_0) entropy += -math_ops.reduce_sum( (alpha - 1) * math_ops.digamma(alpha), reduction_indices=[-1], keep_dims=False) return entropy
def _kl_gamma_gamma(g0, g1, name=None): """Calculate the batched KL divergence KL(g0 || g1) with g0 and g1 Gamma. Args: g0: instance of a Gamma distribution object. g1: instance of a Gamma distribution object. name: (optional) Name to use for created operations. Default is "kl_gamma_gamma". Returns: kl_gamma_gamma: `Tensor`. The batchwise KL(g0 || g1). """ with ops.name_scope( name, "kl_gamma_gamma", values=[g0.concentration, g0.rate, g1.concentration, g1.rate]): # Result from: # http://www.fil.ion.ucl.ac.uk/~wpenny/publications/densities.ps # For derivation see: # http://stats.stackexchange.com/questions/11646/kullback-leibler-divergence-between-two-gamma-distributions pylint: disable=line-too-long return (((g0.concentration - g1.concentration) * math_ops.digamma(g0.concentration)) + math_ops.lgamma(g1.concentration) - math_ops.lgamma(g0.concentration) + g1.concentration * math_ops.log(g0.rate) - g1.concentration * math_ops.log(g1.rate) + g0.concentration * (g1.rate / g0.rate - 1.))
def _chain_gets_correct_expectations(self, x, independent_chain_ndims, sess, feed_dict=None): counter = collections.Counter() def log_gamma_log_prob(x): counter["target_calls"] += 1 event_dims = math_ops.range(independent_chain_ndims, array_ops.rank(x)) return self._log_gamma_log_prob(x, event_dims) num_results = array_ops.placeholder( np.int32, [], name="num_results") step_size = array_ops.placeholder( np.float32, [], name="step_size") num_leapfrog_steps = array_ops.placeholder( np.int32, [], name="num_leapfrog_steps") if feed_dict is None: feed_dict = {} feed_dict.update({num_results: 150, step_size: 0.05, num_leapfrog_steps: 2}) samples, kernel_results = hmc.sample_chain( num_results=num_results, target_log_prob_fn=log_gamma_log_prob, current_state=x, step_size=step_size, num_leapfrog_steps=num_leapfrog_steps, num_burnin_steps=150, seed=42) self.assertAllEqual(dict(target_calls=2), counter) expected_x = (math_ops.digamma(self._shape_param) - np.log(self._rate_param)) expected_exp_x = self._shape_param / self._rate_param log_accept_ratio_, samples_, expected_x_ = sess.run( [kernel_results.log_accept_ratio, samples, expected_x], feed_dict) actual_x = samples_.mean() actual_exp_x = np.exp(samples_).mean() acceptance_probs = np.exp(np.minimum(log_accept_ratio_, 0.)) logging_ops.vlog(1, "True E[x, exp(x)]: {}\t{}".format( expected_x_, expected_exp_x)) logging_ops.vlog(1, "Estimated E[x, exp(x)]: {}\t{}".format( actual_x, actual_exp_x)) self.assertNear(actual_x, expected_x_, 2e-2) self.assertNear(actual_exp_x, expected_exp_x, 2e-2) self.assertAllEqual(np.ones_like(acceptance_probs, np.bool), acceptance_probs > 0.5) self.assertAllEqual(np.ones_like(acceptance_probs, np.bool), acceptance_probs <= 1.)
def entropy(self, name="entropy"): """The entropy of Student t distribution(s). Args: name: The name to give this op. Returns: entropy: tensor of dtype `dtype`, the entropy. """ with ops.name_scope(self.name): with ops.op_scope([self._df, self._sigma], name): u = array_ops.expand_dims(self._df + self._zeros(), -1) v = array_ops.expand_dims(self._ones(), -1) beta_arg = array_ops.concat(len(u.get_shape()) - 1, [u, v]) / 2 return ((self._df + 1) / 2 * (math_ops.digamma((self._df + 1) / 2) - math_ops.digamma(self._df / 2)) + math_ops.log(self._df) / 2 + special_math_ops.lbeta(beta_arg) + math_ops.log(self._sigma))
def _chain_gets_correct_expectations(self, x, independent_chain_ndims, sess, feed_dict=None): def log_gamma_log_prob(x): event_dims = math_ops.range(independent_chain_ndims, array_ops.rank(x)) return self._log_gamma_log_prob(x, event_dims) num_results = array_ops.placeholder(np.int32, [], name="num_results") step_size = array_ops.placeholder(np.float32, [], name="step_size") num_leapfrog_steps = array_ops.placeholder(np.int32, [], name="num_leapfrog_steps") if feed_dict is None: feed_dict = {} feed_dict.update({ num_results: 150, step_size: 0.1, num_leapfrog_steps: 2 }) samples, kernel_results = hmc.sample_chain( num_results=num_results, target_log_prob_fn=log_gamma_log_prob, current_state=x, step_size=step_size, num_leapfrog_steps=num_leapfrog_steps, num_burnin_steps=150, seed=42) expected_x = (math_ops.digamma(self._shape_param) - np.log(self._rate_param)) expected_exp_x = self._shape_param / self._rate_param acceptance_probs_, samples_, expected_x_ = sess.run( [kernel_results.acceptance_probs, samples, expected_x], feed_dict) actual_x = samples_.mean() actual_exp_x = np.exp(samples_).mean() logging_ops.vlog( 1, "True E[x, exp(x)]: {}\t{}".format(expected_x_, expected_exp_x)) logging_ops.vlog( 1, "Estimated E[x, exp(x)]: {}\t{}".format(actual_x, actual_exp_x)) self.assertNear(actual_x, expected_x_, 2e-2) self.assertNear(actual_exp_x, expected_exp_x, 2e-2) self.assertTrue((acceptance_probs_ > 0.5).all()) self.assertTrue((acceptance_probs_ <= 1.0).all())
def _kl_beta_beta(d1, d2, name=None): """Calculate the batched KL divergence KL(d1 || d2) with d1 and d2 Beta. Args: d1: instance of a Beta distribution object. d2: instance of a Beta distribution object. name: (optional) Name to use for created operations. default is "kl_beta_beta". Returns: Batchwise KL(d1 || d2) """ inputs = [d1.a, d1.b, d1.a_b_sum, d2.a_b_sum] with ops.name_scope(name, "kl_beta_beta", inputs): # ln(B(a', b') / B(a, b)) log_betas = (math_ops.lgamma(d2.a) + math_ops.lgamma(d2.b) - math_ops.lgamma(d2.a_b_sum) + math_ops.lgamma(d1.a_b_sum) - math_ops.lgamma(d1.a) - math_ops.lgamma(d1.b)) # (a - a')*psi(a) + (b - b')*psi(b) + (a' - a + b' - b)*psi(a + b) digammas = ((d1.a - d2.a)*math_ops.digamma(d1.a) + (d1.b - d2.b)*math_ops.digamma(d1.b) + (d2.a_b_sum - d1.a_b_sum)*math_ops.digamma(d1.a_b_sum)) return log_betas + digammas
def _chain_gets_correct_expectations(self, x, independent_chain_ndims, sess, feed_dict=None): def log_gamma_log_prob(x): event_dims = math_ops.range(independent_chain_ndims, array_ops.rank(x)) return self._log_gamma_log_prob(x, event_dims) num_results = array_ops.placeholder( np.int32, [], name="num_results") step_size = array_ops.placeholder( np.float32, [], name="step_size") num_leapfrog_steps = array_ops.placeholder( np.int32, [], name="num_leapfrog_steps") if feed_dict is None: feed_dict = {} feed_dict.update({num_results: 150, step_size: 0.1, num_leapfrog_steps: 2}) samples, kernel_results = hmc.sample_chain( num_results=num_results, target_log_prob_fn=log_gamma_log_prob, current_state=x, step_size=step_size, num_leapfrog_steps=num_leapfrog_steps, num_burnin_steps=150, seed=42) expected_x = (math_ops.digamma(self._shape_param) - np.log(self._rate_param)) expected_exp_x = self._shape_param / self._rate_param acceptance_probs_, samples_, expected_x_ = sess.run( [kernel_results.acceptance_probs, samples, expected_x], feed_dict) actual_x = samples_.mean() actual_exp_x = np.exp(samples_).mean() logging_ops.vlog(1, "True E[x, exp(x)]: {}\t{}".format( expected_x_, expected_exp_x)) logging_ops.vlog(1, "Estimated E[x, exp(x)]: {}\t{}".format( actual_x, actual_exp_x)) self.assertNear(actual_x, expected_x_, 2e-2) self.assertNear(actual_exp_x, expected_exp_x, 2e-2) self.assertTrue((acceptance_probs_ > 0.5).all()) self.assertTrue((acceptance_probs_ <= 1.0).all())
def entropy(self, name="entropy"): """The entropy of Gamma distribution(s). This is defined to be ```entropy = alpha - log(beta) + log(Gamma(alpha)) + (1-alpha)digamma(alpha)``` where digamma(alpha) is the digamma function. Args: name: The name to give this op. Returns: entropy: tensor of dtype `dtype`, the entropy. """ with ops.op_scope([self.alpha, self._beta], self.name): with ops.name_scope(name): alpha = self._alpha beta = self._beta return (alpha - math_ops.log(beta) + math_ops.lgamma(alpha) + (1 - alpha) * math_ops.digamma(alpha))
def _entropy(self): return ( self.concentration + math_ops.log(self.rate) + math_ops.lgamma(self.concentration) - ((1. + self.concentration) * math_ops.digamma(self.concentration)))
def _multi_digamma(self, a, p, name="multi_digamma"): """Computes the multivariate digamma function; Psi_p(a).""" with self._name_scope(name, values=[a, p]): seq = self._multi_gamma_sequence(a, p) return math_ops.reduce_sum(math_ops.digamma(seq), axis=[-1])
def _kl_dirichlet_dirichlet(d1, d2, name=None): """Batchwise KL divergence KL(d1 || d2) with d1 and d2 Dirichlet. Args: d1: instance of a Dirichlet distribution object. d2: instance of a Dirichlet distribution object. name: (optional) Name to use for created operations. default is "kl_dirichlet_dirichlet". Returns: Batchwise KL(d1 || d2) """ with ops.name_scope(name, "kl_dirichlet_dirichlet", values=[ d1.concentration, d2.concentration]): # The KL between Dirichlet distributions can be derived as follows. We have # # Dir(x; a) = 1 / B(a) * prod_i[x[i]^(a[i] - 1)] # # where B(a) is the multivariate Beta function: # # B(a) = Gamma(a[1]) * ... * Gamma(a[n]) / Gamma(a[1] + ... + a[n]) # # The KL is # # KL(Dir(x; a), Dir(x; b)) = E_Dir(x; a){log(Dir(x; a) / Dir(x; b))} # # so we'll need to know the log density of the Dirichlet. This is # # log(Dir(x; a)) = sum_i[(a[i] - 1) log(x[i])] - log B(a) # # The only term that matters for the expectations is the log(x[i]). To # compute the expectation of this term over the Dirichlet density, we can # use the following facts about the Dirichlet in exponential family form: # 1. log(x[i]) is a sufficient statistic # 2. expected sufficient statistics (of any exp family distribution) are # equal to derivatives of the log normalizer with respect to # corresponding natural parameters: E{T[i](x)} = dA/d(eta[i]) # # To proceed, we can rewrite the Dirichlet density in exponential family # form as follows: # # Dir(x; a) = exp{eta(a) . T(x) - A(a)} # # where '.' is the dot product of vectors eta and T, and A is a scalar: # # eta[i](a) = a[i] - 1 # T[i](x) = log(x[i]) # A(a) = log B(a) # # Now, we can use fact (2) above to write # # E_Dir(x; a)[log(x[i])] # = dA(a) / da[i] # = d/da[i] log B(a) # = d/da[i] (sum_j lgamma(a[j])) - lgamma(sum_j a[j]) # = digamma(a[i])) - digamma(sum_j a[j]) # # Putting it all together, we have # # KL[Dir(x; a) || Dir(x; b)] # = E_Dir(x; a){log(Dir(x; a) / Dir(x; b)} # = E_Dir(x; a){sum_i[(a[i] - b[i]) log(x[i])} - (lbeta(a) - lbeta(b)) # = sum_i[(a[i] - b[i]) * E_Dir(x; a){log(x[i])}] - lbeta(a) + lbeta(b) # = sum_i[(a[i] - b[i]) * (digamma(a[i]) - digamma(sum_j a[j]))] # - lbeta(a) + lbeta(b)) digamma_sum_d1 = math_ops.digamma( math_ops.reduce_sum(d1.concentration, axis=-1, keepdims=True)) digamma_diff = math_ops.digamma(d1.concentration) - digamma_sum_d1 concentration_diff = d1.concentration - d2.concentration return (math_ops.reduce_sum(concentration_diff * digamma_diff, axis=-1) - special_math_ops.lbeta(d1.concentration) + special_math_ops.lbeta(d2.concentration))
def _entropy(self): return (self.alpha - math_ops.log(self.beta) + math_ops.lgamma(self.alpha) + (1. - self.alpha) * math_ops.digamma(self.alpha))
def _LgammaGrad(op, grad): """Returns grad * digamma(x).""" x = op.inputs[0] with ops.control_dependencies([grad.op]): return grad * math_ops.digamma(x)
def _entropy(self): return (self.concentration + math_ops.log(self.rate) + math_ops.lgamma(self.concentration) - ((1. + self.concentration) * math_ops.digamma(self.concentration)))
def _LgammaGrad(op, grad): """Returns grad * digamma(x).""" x = op.inputs[0] with ops.control_dependencies([grad]): x = math_ops.conj(x) return grad * math_ops.digamma(x)