def test_frozen_dirichlet(): np.random.seed(2846) n = np.random.randint(1, 32) alpha = np.random.uniform(10e-10, 100, n) d = dirichlet(alpha) assert_equal(d.var(), dirichlet.var(alpha)) assert_equal(d.mean(), dirichlet.mean(alpha)) assert_equal(d.entropy(), dirichlet.entropy(alpha)) num_tests = 10 for i in range(num_tests): x = np.random.uniform(10e-10, 100, n) x /= np.sum(x) assert_equal(d.pdf(x[:-1]), dirichlet.pdf(x[:-1], alpha)) assert_equal(d.logpdf(x[:-1]), dirichlet.logpdf(x[:-1], alpha))
def objective_function(): global a, b, phi, alpha sum1 = 0.0 sum6 = 0.0 for i in range(0, N): xi = x[i] for j in range(0, K): sum1 += phi[i][j] * ( log(comb(20, xi)) + xi * E_beta_ln(a[j], b[j]) + (20 - xi) * E_beta_ln_m(a[j], b[j]) + E_Dir_ln_j(alpha, j)) sum6 += phi[i][j] * log(phi[i][j]) sum2 = loggamma(sum(alpha_p)) - sum(loggamma(alpha_p)) sum3 = 0.0 #sum4 = Entropy_Dir(alpha) sum4 = dirichlet.entropy(alpha) sum5 = 0.0 for j in range(0, K): sum2 += (alpha_p[0] - 1) * E_Dir_ln_j(alpha, j) sum3 += loggamma(a_p + b_p) - loggamma(a_p) - loggamma( b_p) + (a_p - 1) * E_beta_ln(a[j], b[j]) + (b_p - 1) * E_beta_ln_m( a[j], b[j]) sum5 += beta.entropy(a[j], b[j]) return sum1 + sum2 + sum3 + sum4 + sum5 - sum6
def ebcc_vb(tuples, num_groups=10, a_pi=0.1, alpha=1, a_v=4, b_v=1, seed=1234, max_iter=500, empirical_prior=False): num_items, num_workers, num_classes = tuples.max(axis=0) + 1 num_labels = tuples.shape[0] y_is_one_lij = [] y_is_one_lji = [] for k in range(num_classes): selected = (tuples[:, 2] == k) coo_ij = ssp.coo_matrix((np.ones(selected.sum()), tuples[selected, :2].T), shape=(num_items, num_workers), dtype=np.bool) y_is_one_lij.append(coo_ij.tocsr()) y_is_one_lji.append(coo_ij.T.tocsr()) beta_kl = np.eye(num_classes)*(a_v-b_v) + b_v # initialize z_ik, zg_ikm z_ik = np.zeros((num_items, num_classes)) for l in range(num_classes): z_ik[:, [l]] += y_is_one_lij[l].sum(axis=-1) z_ik /= z_ik.sum(axis=-1, keepdims=True) if empirical_prior: alpha = z_ik.sum(axis=0) np.random.seed(seed) zg_ikm = np.random.dirichlet(np.ones(num_groups), z_ik.shape) * z_ik[:, :, None] for it in range(max_iter): eta_km = a_pi/num_groups + zg_ikm.sum(axis=0) nu_k = alpha + z_ik.sum(axis=0) mu_jkml = np.zeros((num_workers, num_classes, num_groups, num_classes)) + beta_kl[None, :, None, :] for l in range(num_classes): for k in range(num_classes): mu_jkml[:, k, :, l] += y_is_one_lji[l].dot(zg_ikm[:, k, :]) Eq_log_pi_km = digamma(eta_km) - digamma(eta_km.sum(axis=-1, keepdims=True)) Eq_log_tau_k = digamma(nu_k) - digamma(nu_k.sum()) Eq_log_v_jkml = digamma(mu_jkml) - digamma(mu_jkml.sum(axis=-1, keepdims=True)) zg_ikm[:] = Eq_log_pi_km[None, :, :] + Eq_log_tau_k[None, :, None] for l in range(num_classes): for k in range(num_classes): zg_ikm[:, k, :] += y_is_one_lij[l].dot(Eq_log_v_jkml[:, k, :, l]) zg_ikm = np.exp(zg_ikm) zg_ikm /= zg_ikm.reshape(num_items, -1).sum(axis=-1)[:, None, None] last_z_ik = z_ik z_ik = zg_ikm.sum(axis=-1) if np.allclose(last_z_ik, z_ik, atol=1e-3): break ELBO = ((eta_km-1)*Eq_log_pi_km).sum() + ((nu_k-1)*Eq_log_tau_k).sum() + ((mu_jkml-1)*Eq_log_v_jkml).sum() ELBO += dirichlet.entropy(nu_k) for k in range(num_classes): ELBO += dirichlet.entropy(eta_km[k]) ELBO += (gammaln(mu_jkml) - (mu_jkml-1)*digamma(mu_jkml)).sum() alpha0_jkm = mu_jkml.sum(axis=-1) ELBO += ((alpha0_jkm-num_classes)*digamma(alpha0_jkm) - gammaln(alpha0_jkm)).sum() ELBO += entropy(zg_ikm.reshape(num_items, -1).T).sum() return z_ik, ELBO
def get_cost(X, K, cluster_assignments, phi, alphas, mu_means, mu_covs, a, B, orig_alphas, orig_c, orig_a, orig_B): N, D = X.shape total = 0 ln2pi = np.log(2 * np.pi) # calculate B inverse since we will need it Binv = np.empty((K, D, D)) for j in xrange(K): Binv[j] = np.linalg.inv(B[j]) # calculate expectations first Elnpi = digamma(alphas) - digamma(alphas.sum()) # E[ln(pi)] Elambda = np.empty((K, D, D)) Elnlambda = np.empty(K) for j in xrange(K): Elambda[j] = a[j] * Binv[j] Elnlambda[j] = D * np.log(2) - np.log(np.linalg.det(B[j])) for d in xrange(D): Elnlambda[j] += digamma(a[j] / 2.0 + (1 - d) / 2.0) # now calculate the log joint likelihood # Gaussian part # total -= N*D*ln2pi # total += 0.5*Elnlambda.sum() # for j in xrange(K): # # total += 0.5*Elnlambda[j] # vectorized # for i in xrange(N): # if cluster_assignments[i] == j: # diff_ij = X[i] - mu_means[j] # total -= 0.5*( diff_ij.dot(Elambda[j]).dot(diff_ij) + np.trace(Elambda[j].dot(mu_covs[j])) ) # mixture coefficient part # total += Elnpi.sum() # use phi instead for j in xrange(K): for i in xrange(N): diff_ij = X[i] - mu_means[j] inside = Elnlambda[j] - D * ln2pi inside += -diff_ij.dot(Elambda[j]).dot(diff_ij) - np.trace( Elambda[j].dot(mu_covs[j])) # inside += Elnpi[j] total += phi[i, j] * (0.5 * inside + Elnpi[j]) # E{lnp(mu)} - based on original prior for j in xrange(K): E_mu_dot_mu = np.trace(mu_covs[j]) + mu_means[j].dot(mu_means[j]) total += -0.5 * D * np.log( 2 * np.pi * orig_c) - 0.5 * E_mu_dot_mu / orig_c # print "total:", total # E{lnp(lambda)} - based on original prior for j in xrange(K): total += (orig_a[j] - D - 1) / 2.0 * Elnlambda[j] - 0.5 * np.trace( orig_B[j].dot(Elambda[j])) # print "total 1:", total total += -orig_a[j] * D / 2.0 * np.log(2) + 0.5 * orig_a[j] * np.log( np.linalg.det(orig_B[j])) # print "total 2:", total total -= D * (D - 1) / 4.0 * np.log(np.pi) # print "total 3:", total for d in xrange(D): total -= np.log(gamma(orig_a[j] / 2.0 + (1 - d) / 2.0)) # E{lnp(pi)} - based on original prior # - lnB(orig_alpha) + sum[j]{ orig_alpha[j] - 1}*E[lnpi_j] total += np.log(gamma(orig_alphas.sum())) - np.log( gamma(orig_alphas)).sum() total += ((orig_alphas - 1) * Elnpi).sum() # should be 0 since orig_alpha = 1 # calculate entropies of the q distributions # q(c) for i in xrange(N): total += stats.entropy(phi[i]) # categorical entropy # q(pi) total += dirichlet.entropy(alphas) # q(mu) for j in xrange(K): total += mvn.entropy(cov=mu_covs[j]) # q(lambda) for j in xrange(K): total += wishart.entropy(df=a[j], scale=Binv[j]) return total
def entropy(self) -> np.ndarray: return np.array([ dirichlet.entropy(self._alphas[i] + 1e-6) for i in range(self._k) ])
def get_cost(X, K, cluster_assignments, phi, alphas, mu_means, mu_covs, a, B, orig_alphas, orig_c, orig_a, orig_B): N, D = X.shape total = 0 ln2pi = np.log(2*np.pi) # calculate B inverse since we will need it Binv = np.empty((K, D, D)) for j in xrange(K): Binv[j] = np.linalg.inv(B[j]) # calculate expectations first Elnpi = digamma(alphas) - digamma(alphas.sum()) # E[ln(pi)] Elambda = np.empty((K, D, D)) Elnlambda = np.empty(K) for j in xrange(K): Elambda[j] = a[j]*Binv[j] Elnlambda[j] = D*np.log(2) - np.log(np.linalg.det(B[j])) for d in xrange(D): Elnlambda[j] += digamma(a[j]/2.0 + (1 - d)/2.0) # now calculate the log joint likelihood # Gaussian part # total -= N*D*ln2pi # total += 0.5*Elnlambda.sum() # for j in xrange(K): # # total += 0.5*Elnlambda[j] # vectorized # for i in xrange(N): # if cluster_assignments[i] == j: # diff_ij = X[i] - mu_means[j] # total -= 0.5*( diff_ij.dot(Elambda[j]).dot(diff_ij) + np.trace(Elambda[j].dot(mu_covs[j])) ) # mixture coefficient part # total += Elnpi.sum() # use phi instead for j in xrange(K): for i in xrange(N): diff_ij = X[i] - mu_means[j] inside = Elnlambda[j] - D*ln2pi inside += -diff_ij.dot(Elambda[j]).dot(diff_ij) - np.trace(Elambda[j].dot(mu_covs[j])) # inside += Elnpi[j] total += phi[i,j]*(0.5*inside + Elnpi[j]) # E{lnp(mu)} - based on original prior for j in xrange(K): E_mu_dot_mu = np.trace(mu_covs[j]) + mu_means[j].dot(mu_means[j]) total += -0.5*D*np.log(2*np.pi*orig_c) - 0.5*E_mu_dot_mu/orig_c # print "total:", total # E{lnp(lambda)} - based on original prior for j in xrange(K): total += (orig_a[j] - D - 1)/2.0*Elnlambda[j] - 0.5*np.trace(orig_B[j].dot(Elambda[j])) # print "total 1:", total total += -orig_a[j]*D/2.0*np.log(2) + 0.5*orig_a[j]*np.log(np.linalg.det(orig_B[j])) # print "total 2:", total total -= D*(D-1)/4.0*np.log(np.pi) # print "total 3:", total for d in xrange(D): total -= np.log(gamma(orig_a[j]/2.0 + (1 - d)/2.0)) # E{lnp(pi)} - based on original prior # - lnB(orig_alpha) + sum[j]{ orig_alpha[j] - 1}*E[lnpi_j] total += np.log(gamma(orig_alphas.sum())) - np.log(gamma(orig_alphas)).sum() total += ((orig_alphas - 1)*Elnpi).sum() # should be 0 since orig_alpha = 1 # calculate entropies of the q distributions # q(c) for i in xrange(N): total += stats.entropy(phi[i]) # categorical entropy # q(pi) total += dirichlet.entropy(alphas) # q(mu) for j in xrange(K): total += mvn.entropy(cov=mu_covs[j]) # q(lambda) for j in xrange(K): total += wishart.entropy(df=a[j], scale=Binv[j]) return total
counts = np.array(list(count_obs.values()), dtype=int) dirichlet_prior = np.ones_like( counts) # uninformative prior based on pseudo-counts dirichlet_posterior = dirichlet_prior + counts prior_samples = get_samples(dirichlet_prior) posterior_samples = get_samples(dirichlet_posterior) print('prior means: %s' % (str(dlt.mean(dirichlet_prior)))) PoM = dlt.mean(dirichlet_posterior) print('posterior means: %s' % (str(PoM))) PoV = dlt.var(dirichlet_posterior) print('posterior variances: %s' % (str(PoV))) print('naive posterior means: %s' % ((counts + 1) / np.sum(counts + 1)) ) # expected from value counts plus assumed prior counts print('Entropy DLT prior:', dlt.entropy(dirichlet_prior)) print('Entropy DLT posterior:', dlt.entropy(dirichlet_posterior)) if plot_priors: plt.figure(figsize=(9, 6)) for i, label in enumerate(count_obs.keys()): ax = plt.hist(prior_samples[:, i], bins=50, density=True, alpha=.35, label=label, histtype='stepfilled') print('sampled', i, ': ', np.mean(prior_samples[:, i])) #if i==0: plt.plot(np.linspace(0,1,1000), DLT_[:,1], 'k-', alpha=.7, label=label) plt.legend(fontsize=15) plt.title('Prior Probs', fontsize=16)
def entropy_dir(self): return dirichlet.entropy(self.alpha)