def _kl_dirichlet_dirichlet(p, q): # From http://bariskurt.com/kullback-leibler-divergence-between-two-dirichlet-and-beta-distributions/ sum_p_concentration = p.concentration.sum(-1) sum_q_concentration = q.concentration.sum(-1) t1 = gammaln(sum_p_concentration) - gammaln(sum_q_concentration) t2 = (gammaln(p.concentration) - gammaln(q.concentration)).sum(-1) t3 = p.concentration - q.concentration t4 = digamma(p.concentration) - digamma(sum_p_concentration)[..., None] return t1 - t2 + (t3 * t4).sum(-1)
def kl_divergence(p, q): # From https://en.wikipedia.org/wiki/Beta_distribution#Quantities_of_information_(entropy) a, b = p.concentration1, p.concentration0 alpha, beta = q.concentration1, q.concentration0 a_diff = alpha - a b_diff = beta - b t1 = betaln(alpha, beta) - betaln(a, b) t2 = a_diff * digamma(a) + b_diff * digamma(b) t3 = (a_diff + b_diff) * digamma(a + b) return t1 - t2 + t3
def par_efe(p_c, params, U): # expected free energy p_aco = params / params.sum(-1, keepdims=True) q_ao = einsum('...aco,...c->...ao', p_aco, p_c) KL_a = -jnp.sum(q_ao * U, -1) + jnp.sum(q_ao * log(q_ao), -1) H_ac = -(p_aco * digamma(params)).sum(-1) + digamma(params.sum(-1) + 1) H_a = einsum('...c,...ac->...a', p_c, H_ac) return KL_a + H_a
def _compute_e_log_lambda(self, X, eta, W): """ Compute the expected value of every log det Lambda_k, i.e, compute ∀k. E[log|Lambda_k|] """ _, M = X.shape return special.digamma((eta + 1 - M) / 2) + M * jnp.log(2) + det(W)
def kl_beta_beta(dist1: Beta, dist2: Beta): a1 = dist1.alpha a2 = dist2.alpha b1 = dist1.beta b2 = dist2.beta s1 = a1 + b1 s2 = a2 + b2 kl_div = jax_special.gammaln(s1) \ - jax_special.gammaln(a1) \ - jax_special.gammaln(b1) \ - jax_special.gammaln(s2) \ + jax_special.gammaln(a2) \ + jax_special.gammaln(b2) \ + (a1 - a2) * (jax_special.digamma(a1 - jax_special.digamma(s1))) \ + (b1 - b2) * (jax_special.digamma(b1 - jax_special.digamma(s1))) return kl_div
def kl_divergence(p, q): # From https://en.wikipedia.org/wiki/Gamma_distribution#Kullback%E2%80%93Leibler_divergence a, b = p.concentration, p.rate alpha, beta = q.concentration, q.rate b_ratio = beta / b t1 = gammaln(alpha) - gammaln(a) t2 = (a - alpha) * digamma(a) t3 = alpha * jnp.log(b_ratio) t4 = a * (b_ratio - 1) return t1 + t2 - t3 + t4
def kl_divergence(p, q): # From https://arxiv.org/abs/1605.06197 Formula (12) a, b = p.concentration1, p.concentration0 alpha, beta = q.concentration1, q.concentration0 b_reciprocal = jnp.reciprocal(b) a_b = a * b t1 = (alpha / a - 1) * (jnp.euler_gamma + digamma(b) + b_reciprocal) t2 = jnp.log(a_b) + betaln(alpha, beta) + (b_reciprocal - 1) a_ = jnp.expand_dims(a, -1) b_ = jnp.expand_dims(b, -1) a_b_ = jnp.expand_dims(a_b, -1) m = jnp.arange(1, p.KL_KUMARASWAMY_BETA_TAYLOR_ORDER + 1) t3 = (beta - 1) * b * (jnp.exp(betaln(m / a_, b_)) / (m + a_b_)).sum(-1) return t1 + t2 + t3
def conditional_expectations(nonconjugate_params, conjugate_params, data, **kwargs): """Compute expectations under the conditional distribution over the auxiliary variables.` """ df, = nonconjugate_params loc, variance = conjugate_params # The auxiliary precision \tau is conditionally gamma distributed. alpha = 0.5 * (df + 1) beta = 0.5 * (df + (data - loc)**2 / variance) # Compute gamma expectations E_tau = alpha / beta E_log_tau = spsp.digamma(alpha) - np.log(beta) return E_tau, E_log_tau
def conditional_expectations(nonconjugate_params, conjugate_params, data, **kwargs): """Compute expectations under the conditional distribution over the auxiliary variables.` """ df, = nonconjugate_params loc, covariance_matrix = conjugate_params scale = np.linalg.cholesky(covariance_matrix) dim = loc.shape[-1] # The auxiliary precision is conditionally gamma distributed. alpha = 0.5 * (df + dim) tmp = np.linalg.solve(scale, (data - loc).T).T beta = 0.5 * (df + np.sum(tmp**2, axis=1)) # Compute gamma expectations E_tau = alpha / beta E_log_tau = spsp.digamma(alpha) - np.log(beta) return E_tau, E_log_tau
def entropy(self, alpha): alpha0 = np.sum(alpha, axis=-1) lnB = _lnB(alpha) K = alpha.shape[-1] return lnB + (alpha0 - K) * digamma(alpha0) - np.inner( (alpha - 1) * digamma(alpha))
def to_exp(self) -> ChiSquareEP: k_over_two = self.k_over_two_minus_one + 1.0 return ChiSquareEP(jss.digamma(k_over_two) - jnp.log(0.5))
def _entropy(self, a): return digamma(a) * (1 - a) + a + gammaln(a)
def multigammaln_derivative(a, p): return np.sum(digamma(a + (1 - np.arange(1, p + 1)) / 2))
def digamma(x): return spec.digamma(x)
def expected_carrier_measure(self) -> RealArray: q = self.to_nat() k_over_two = q.k_over_two_minus_one + 1.0 return -1.0 * k_over_two + 0.5 * jss.digamma(k_over_two) + 1.5 * jnp.log(2.0)
def _compute_e_log_pi(self, alpha): """ Compute the expected value of every log pi_k, i.e., compute ∀k. E[log pi_k] """ return special.digamma(alpha) - special.digamma(alpha.sum())