Beispiel #1
0
def test_frozen_dirichlet():
    np.random.seed(2846)

    n = np.random.randint(1, 32)
    alpha = np.random.uniform(10e-10, 100, n)

    d = dirichlet(alpha)

    assert_equal(d.var(), dirichlet.var(alpha))
    assert_equal(d.mean(), dirichlet.mean(alpha))
    assert_equal(d.entropy(), dirichlet.entropy(alpha))
    num_tests = 10
    for i in range(num_tests):
        x = np.random.uniform(10e-10, 100, n)
        x /= np.sum(x)
        assert_equal(d.pdf(x[:-1]), dirichlet.pdf(x[:-1], alpha))
        assert_equal(d.logpdf(x[:-1]), dirichlet.logpdf(x[:-1], alpha))
def objective_function():
    global a, b, phi, alpha
    sum1 = 0.0
    sum6 = 0.0
    for i in range(0, N):
        xi = x[i]
        for j in range(0, K):
            sum1 += phi[i][j] * (
                log(comb(20, xi)) + xi * E_beta_ln(a[j], b[j]) +
                (20 - xi) * E_beta_ln_m(a[j], b[j]) + E_Dir_ln_j(alpha, j))
            sum6 += phi[i][j] * log(phi[i][j])
    sum2 = loggamma(sum(alpha_p)) - sum(loggamma(alpha_p))
    sum3 = 0.0
    #sum4 = Entropy_Dir(alpha)
    sum4 = dirichlet.entropy(alpha)
    sum5 = 0.0
    for j in range(0, K):
        sum2 += (alpha_p[0] - 1) * E_Dir_ln_j(alpha, j)
        sum3 += loggamma(a_p + b_p) - loggamma(a_p) - loggamma(
            b_p) + (a_p - 1) * E_beta_ln(a[j], b[j]) + (b_p - 1) * E_beta_ln_m(
                a[j], b[j])
        sum5 += beta.entropy(a[j], b[j])

    return sum1 + sum2 + sum3 + sum4 + sum5 - sum6
def ebcc_vb(tuples, num_groups=10, a_pi=0.1, alpha=1, a_v=4, b_v=1, seed=1234, max_iter=500, empirical_prior=False):
    num_items, num_workers, num_classes = tuples.max(axis=0) + 1
    num_labels = tuples.shape[0]
    
    y_is_one_lij = []
    y_is_one_lji = []
    for k in range(num_classes):
        selected = (tuples[:, 2] == k)
        coo_ij = ssp.coo_matrix((np.ones(selected.sum()), tuples[selected, :2].T), shape=(num_items, num_workers), dtype=np.bool)
        y_is_one_lij.append(coo_ij.tocsr())
        y_is_one_lji.append(coo_ij.T.tocsr())
    
    beta_kl = np.eye(num_classes)*(a_v-b_v) + b_v
    
    # initialize z_ik, zg_ikm
    z_ik = np.zeros((num_items, num_classes))
    for l in range(num_classes):
        z_ik[:, [l]] += y_is_one_lij[l].sum(axis=-1)
    z_ik /= z_ik.sum(axis=-1, keepdims=True)
    
    if empirical_prior:
        alpha = z_ik.sum(axis=0)
    
    np.random.seed(seed)
    zg_ikm = np.random.dirichlet(np.ones(num_groups), z_ik.shape) * z_ik[:, :, None]
    for it in range(max_iter):
        eta_km = a_pi/num_groups + zg_ikm.sum(axis=0)
        nu_k   = alpha + z_ik.sum(axis=0)
        
        mu_jkml = np.zeros((num_workers, num_classes, num_groups, num_classes)) + beta_kl[None, :, None, :]
        for l in range(num_classes):
            for k in range(num_classes):
                mu_jkml[:, k, :, l] += y_is_one_lji[l].dot(zg_ikm[:, k, :])
            
        Eq_log_pi_km = digamma(eta_km) - digamma(eta_km.sum(axis=-1, keepdims=True))
        Eq_log_tau_k = digamma(nu_k) - digamma(nu_k.sum())
        Eq_log_v_jkml = digamma(mu_jkml) - digamma(mu_jkml.sum(axis=-1, keepdims=True))
        
        zg_ikm[:] = Eq_log_pi_km[None, :, :] + Eq_log_tau_k[None, :, None]
        for l in range(num_classes):
            for k in range(num_classes):
                zg_ikm[:, k, :] += y_is_one_lij[l].dot(Eq_log_v_jkml[:, k, :, l])
            
        zg_ikm = np.exp(zg_ikm)
        zg_ikm /= zg_ikm.reshape(num_items, -1).sum(axis=-1)[:, None, None]
        
        last_z_ik = z_ik
        z_ik = zg_ikm.sum(axis=-1)
        
        if np.allclose(last_z_ik, z_ik, atol=1e-3):
            break

    ELBO = ((eta_km-1)*Eq_log_pi_km).sum() + ((nu_k-1)*Eq_log_tau_k).sum() + ((mu_jkml-1)*Eq_log_v_jkml).sum()
    ELBO += dirichlet.entropy(nu_k)
    for k in range(num_classes):
        ELBO += dirichlet.entropy(eta_km[k])
    ELBO += (gammaln(mu_jkml) - (mu_jkml-1)*digamma(mu_jkml)).sum()
    alpha0_jkm = mu_jkml.sum(axis=-1)
    ELBO += ((alpha0_jkm-num_classes)*digamma(alpha0_jkm) - gammaln(alpha0_jkm)).sum()
    ELBO += entropy(zg_ikm.reshape(num_items, -1).T).sum()
    return z_ik, ELBO
def get_cost(X, K, cluster_assignments, phi, alphas, mu_means, mu_covs, a, B,
             orig_alphas, orig_c, orig_a, orig_B):
    N, D = X.shape
    total = 0
    ln2pi = np.log(2 * np.pi)

    # calculate B inverse since we will need it
    Binv = np.empty((K, D, D))
    for j in xrange(K):
        Binv[j] = np.linalg.inv(B[j])

    # calculate expectations first
    Elnpi = digamma(alphas) - digamma(alphas.sum())  # E[ln(pi)]
    Elambda = np.empty((K, D, D))
    Elnlambda = np.empty(K)
    for j in xrange(K):
        Elambda[j] = a[j] * Binv[j]
        Elnlambda[j] = D * np.log(2) - np.log(np.linalg.det(B[j]))
        for d in xrange(D):
            Elnlambda[j] += digamma(a[j] / 2.0 + (1 - d) / 2.0)

    # now calculate the log joint likelihood
    # Gaussian part
    # total -= N*D*ln2pi
    # total += 0.5*Elnlambda.sum()
    # for j in xrange(K):
    #   # total += 0.5*Elnlambda[j] # vectorized
    #   for i in xrange(N):
    #     if cluster_assignments[i] == j:
    #       diff_ij = X[i] - mu_means[j]
    #       total -= 0.5*( diff_ij.dot(Elambda[j]).dot(diff_ij) + np.trace(Elambda[j].dot(mu_covs[j])) )

    # mixture coefficient part
    # total += Elnpi.sum()

    # use phi instead
    for j in xrange(K):
        for i in xrange(N):
            diff_ij = X[i] - mu_means[j]
            inside = Elnlambda[j] - D * ln2pi
            inside += -diff_ij.dot(Elambda[j]).dot(diff_ij) - np.trace(
                Elambda[j].dot(mu_covs[j]))
            # inside += Elnpi[j]
            total += phi[i, j] * (0.5 * inside + Elnpi[j])

    # E{lnp(mu)} - based on original prior
    for j in xrange(K):
        E_mu_dot_mu = np.trace(mu_covs[j]) + mu_means[j].dot(mu_means[j])
        total += -0.5 * D * np.log(
            2 * np.pi * orig_c) - 0.5 * E_mu_dot_mu / orig_c

    # print "total:", total

    # E{lnp(lambda)} - based on original prior
    for j in xrange(K):
        total += (orig_a[j] - D - 1) / 2.0 * Elnlambda[j] - 0.5 * np.trace(
            orig_B[j].dot(Elambda[j]))
        # print "total 1:", total
        total += -orig_a[j] * D / 2.0 * np.log(2) + 0.5 * orig_a[j] * np.log(
            np.linalg.det(orig_B[j]))
        # print "total 2:", total
        total -= D * (D - 1) / 4.0 * np.log(np.pi)
        # print "total 3:", total
        for d in xrange(D):
            total -= np.log(gamma(orig_a[j] / 2.0 + (1 - d) / 2.0))

    # E{lnp(pi)} - based on original prior
    # - lnB(orig_alpha) + sum[j]{ orig_alpha[j] - 1}*E[lnpi_j]
    total += np.log(gamma(orig_alphas.sum())) - np.log(
        gamma(orig_alphas)).sum()
    total += ((orig_alphas - 1) *
              Elnpi).sum()  # should be 0 since orig_alpha = 1

    # calculate entropies of the q distributions
    # q(c)
    for i in xrange(N):
        total += stats.entropy(phi[i])  # categorical entropy

    # q(pi)
    total += dirichlet.entropy(alphas)

    # q(mu)
    for j in xrange(K):
        total += mvn.entropy(cov=mu_covs[j])

    # q(lambda)
    for j in xrange(K):
        total += wishart.entropy(df=a[j], scale=Binv[j])

    return total
Beispiel #5
0
 def entropy(self) -> np.ndarray:
     return np.array([
         dirichlet.entropy(self._alphas[i] + 1e-6) for i in range(self._k)
     ])
def get_cost(X, K, cluster_assignments, phi, alphas, mu_means, mu_covs, a, B, orig_alphas, orig_c, orig_a, orig_B):
  N, D = X.shape
  total = 0
  ln2pi = np.log(2*np.pi)

  # calculate B inverse since we will need it
  Binv = np.empty((K, D, D))
  for j in xrange(K):
    Binv[j] = np.linalg.inv(B[j])

  # calculate expectations first 
  Elnpi = digamma(alphas) - digamma(alphas.sum()) # E[ln(pi)]
  Elambda = np.empty((K, D, D))
  Elnlambda = np.empty(K)
  for j in xrange(K):
    Elambda[j] = a[j]*Binv[j]
    Elnlambda[j] = D*np.log(2) - np.log(np.linalg.det(B[j]))
    for d in xrange(D):
      Elnlambda[j] += digamma(a[j]/2.0 + (1 - d)/2.0)

  # now calculate the log joint likelihood
  # Gaussian part
  # total -= N*D*ln2pi
  # total += 0.5*Elnlambda.sum()
  # for j in xrange(K):
  #   # total += 0.5*Elnlambda[j] # vectorized
  #   for i in xrange(N):
  #     if cluster_assignments[i] == j:
  #       diff_ij = X[i] - mu_means[j]
  #       total -= 0.5*( diff_ij.dot(Elambda[j]).dot(diff_ij) + np.trace(Elambda[j].dot(mu_covs[j])) )

  # mixture coefficient part
  # total += Elnpi.sum()

  # use phi instead
  for j in xrange(K):
    for i in xrange(N):
      diff_ij = X[i] - mu_means[j]
      inside = Elnlambda[j] - D*ln2pi
      inside += -diff_ij.dot(Elambda[j]).dot(diff_ij) - np.trace(Elambda[j].dot(mu_covs[j]))
      # inside += Elnpi[j]
      total += phi[i,j]*(0.5*inside + Elnpi[j])
  

  # E{lnp(mu)} - based on original prior
  for j in xrange(K):
    E_mu_dot_mu = np.trace(mu_covs[j]) + mu_means[j].dot(mu_means[j])
    total += -0.5*D*np.log(2*np.pi*orig_c) - 0.5*E_mu_dot_mu/orig_c

  # print "total:", total

  # E{lnp(lambda)} - based on original prior
  for j in xrange(K):
    total += (orig_a[j] - D - 1)/2.0*Elnlambda[j] - 0.5*np.trace(orig_B[j].dot(Elambda[j]))
    # print "total 1:", total
    total += -orig_a[j]*D/2.0*np.log(2) + 0.5*orig_a[j]*np.log(np.linalg.det(orig_B[j]))
    # print "total 2:", total
    total -= D*(D-1)/4.0*np.log(np.pi)
    # print "total 3:", total
    for d in xrange(D):
      total -= np.log(gamma(orig_a[j]/2.0 + (1 - d)/2.0))

  # E{lnp(pi)} - based on original prior
  # - lnB(orig_alpha) + sum[j]{ orig_alpha[j] - 1}*E[lnpi_j]
  total += np.log(gamma(orig_alphas.sum())) - np.log(gamma(orig_alphas)).sum()
  total += ((orig_alphas - 1)*Elnpi).sum() # should be 0 since orig_alpha = 1

  # calculate entropies of the q distributions
  # q(c)
  for i in xrange(N):
    total += stats.entropy(phi[i]) # categorical entropy

  # q(pi)
  total += dirichlet.entropy(alphas)

  # q(mu)
  for j in xrange(K):
    total += mvn.entropy(cov=mu_covs[j])

  # q(lambda)
  for j in xrange(K):
    total += wishart.entropy(df=a[j], scale=Binv[j])

  return total
Beispiel #7
0
counts = np.array(list(count_obs.values()), dtype=int)

dirichlet_prior = np.ones_like(
    counts)  # uninformative prior based on pseudo-counts
dirichlet_posterior = dirichlet_prior + counts
prior_samples = get_samples(dirichlet_prior)
posterior_samples = get_samples(dirichlet_posterior)

print('prior means: %s' % (str(dlt.mean(dirichlet_prior))))
PoM = dlt.mean(dirichlet_posterior)
print('posterior means: %s' % (str(PoM)))
PoV = dlt.var(dirichlet_posterior)
print('posterior variances: %s' % (str(PoV)))
print('naive posterior means: %s' % ((counts + 1) / np.sum(counts + 1))
      )  # expected from value counts plus assumed prior counts
print('Entropy DLT prior:', dlt.entropy(dirichlet_prior))
print('Entropy DLT posterior:', dlt.entropy(dirichlet_posterior))

if plot_priors:
    plt.figure(figsize=(9, 6))
    for i, label in enumerate(count_obs.keys()):
        ax = plt.hist(prior_samples[:, i],
                      bins=50,
                      density=True,
                      alpha=.35,
                      label=label,
                      histtype='stepfilled')
        print('sampled', i, ':  ', np.mean(prior_samples[:, i]))
        #if i==0: plt.plot(np.linspace(0,1,1000), DLT_[:,1], 'k-', alpha=.7, label=label)
    plt.legend(fontsize=15)
    plt.title('Prior Probs', fontsize=16)
Beispiel #8
0
 def entropy_dir(self):
     return dirichlet.entropy(self.alpha)