def test_frozen(self): # Test that the frozen and non-frozen Wishart gives the same answers # Construct an arbitrary positive definite scale matrix dim = 4 scale = np.diag(np.arange(dim)+1) scale[np.tril_indices(dim, k=-1)] = np.arange(dim * (dim-1) // 2) scale = np.dot(scale.T, scale) # Construct a collection of positive definite matrices to test the PDF X = [] for i in range(5): x = np.diag(np.arange(dim)+(i+1)**2) x[np.tril_indices(dim, k=-1)] = np.arange(dim * (dim-1) // 2) x = np.dot(x.T, x) X.append(x) X = np.array(X).T # Construct a 1D and 2D set of parameters parameters = [ (10, 1, np.linspace(0.1, 10, 5)), # 1D case (10, scale, X) ] for (df, scale, x) in parameters: w = wishart(df, scale) assert_equal(w.var(), wishart.var(df, scale)) assert_equal(w.mean(), wishart.mean(df, scale)) assert_equal(w.mode(), wishart.mode(df, scale)) assert_equal(w.entropy(), wishart.entropy(df, scale)) assert_equal(w.pdf(x), wishart.pdf(x, df, scale))
def test_frozen(self): # Test that the frozen and non-frozen Wishart gives the same answers # Construct an arbitrary positive definite scale matrix dim = 4 scale = np.diag(np.arange(dim) + 1) scale[np.tril_indices(dim, k=-1)] = np.arange(dim * (dim - 1) // 2) scale = np.dot(scale.T, scale) # Construct a collection of positive definite matrices to test the PDF X = [] for i in range(5): x = np.diag(np.arange(dim) + (i + 1)**2) x[np.tril_indices(dim, k=-1)] = np.arange(dim * (dim - 1) // 2) x = np.dot(x.T, x) X.append(x) X = np.array(X).T # Construct a 1D and 2D set of parameters parameters = [ (10, 1, np.linspace(0.1, 10, 5)), # 1D case (10, scale, X) ] for (df, scale, x) in parameters: w = wishart(df, scale) assert_equal(w.var(), wishart.var(df, scale)) assert_equal(w.mean(), wishart.mean(df, scale)) assert_equal(w.mode(), wishart.mode(df, scale)) assert_equal(w.entropy(), wishart.entropy(df, scale)) assert_equal(w.pdf(x), wishart.pdf(x, df, scale))
def get_cost(X, K, cluster_assignments, phi, alphas, mu_means, mu_covs, a, B, orig_alphas, orig_c, orig_a, orig_B): N, D = X.shape total = 0 ln2pi = np.log(2 * np.pi) # calculate B inverse since we will need it Binv = np.empty((K, D, D)) for j in xrange(K): Binv[j] = np.linalg.inv(B[j]) # calculate expectations first Elnpi = digamma(alphas) - digamma(alphas.sum()) # E[ln(pi)] Elambda = np.empty((K, D, D)) Elnlambda = np.empty(K) for j in xrange(K): Elambda[j] = a[j] * Binv[j] Elnlambda[j] = D * np.log(2) - np.log(np.linalg.det(B[j])) for d in xrange(D): Elnlambda[j] += digamma(a[j] / 2.0 + (1 - d) / 2.0) # now calculate the log joint likelihood # Gaussian part # total -= N*D*ln2pi # total += 0.5*Elnlambda.sum() # for j in xrange(K): # # total += 0.5*Elnlambda[j] # vectorized # for i in xrange(N): # if cluster_assignments[i] == j: # diff_ij = X[i] - mu_means[j] # total -= 0.5*( diff_ij.dot(Elambda[j]).dot(diff_ij) + np.trace(Elambda[j].dot(mu_covs[j])) ) # mixture coefficient part # total += Elnpi.sum() # use phi instead for j in xrange(K): for i in xrange(N): diff_ij = X[i] - mu_means[j] inside = Elnlambda[j] - D * ln2pi inside += -diff_ij.dot(Elambda[j]).dot(diff_ij) - np.trace( Elambda[j].dot(mu_covs[j])) # inside += Elnpi[j] total += phi[i, j] * (0.5 * inside + Elnpi[j]) # E{lnp(mu)} - based on original prior for j in xrange(K): E_mu_dot_mu = np.trace(mu_covs[j]) + mu_means[j].dot(mu_means[j]) total += -0.5 * D * np.log( 2 * np.pi * orig_c) - 0.5 * E_mu_dot_mu / orig_c # print "total:", total # E{lnp(lambda)} - based on original prior for j in xrange(K): total += (orig_a[j] - D - 1) / 2.0 * Elnlambda[j] - 0.5 * np.trace( orig_B[j].dot(Elambda[j])) # print "total 1:", total total += -orig_a[j] * D / 2.0 * np.log(2) + 0.5 * orig_a[j] * np.log( np.linalg.det(orig_B[j])) # print "total 2:", total total -= D * (D - 1) / 4.0 * np.log(np.pi) # print "total 3:", total for d in xrange(D): total -= np.log(gamma(orig_a[j] / 2.0 + (1 - d) / 2.0)) # E{lnp(pi)} - based on original prior # - lnB(orig_alpha) + sum[j]{ orig_alpha[j] - 1}*E[lnpi_j] total += np.log(gamma(orig_alphas.sum())) - np.log( gamma(orig_alphas)).sum() total += ((orig_alphas - 1) * Elnpi).sum() # should be 0 since orig_alpha = 1 # calculate entropies of the q distributions # q(c) for i in xrange(N): total += stats.entropy(phi[i]) # categorical entropy # q(pi) total += dirichlet.entropy(alphas) # q(mu) for j in xrange(K): total += mvn.entropy(cov=mu_covs[j]) # q(lambda) for j in xrange(K): total += wishart.entropy(df=a[j], scale=Binv[j]) return total
def get_cost(X, K, cluster_assignments, phi, alphas, mu_means, mu_covs, a, B, orig_alphas, orig_c, orig_a, orig_B): N, D = X.shape total = 0 ln2pi = np.log(2*np.pi) # calculate B inverse since we will need it Binv = np.empty((K, D, D)) for j in xrange(K): Binv[j] = np.linalg.inv(B[j]) # calculate expectations first Elnpi = digamma(alphas) - digamma(alphas.sum()) # E[ln(pi)] Elambda = np.empty((K, D, D)) Elnlambda = np.empty(K) for j in xrange(K): Elambda[j] = a[j]*Binv[j] Elnlambda[j] = D*np.log(2) - np.log(np.linalg.det(B[j])) for d in xrange(D): Elnlambda[j] += digamma(a[j]/2.0 + (1 - d)/2.0) # now calculate the log joint likelihood # Gaussian part # total -= N*D*ln2pi # total += 0.5*Elnlambda.sum() # for j in xrange(K): # # total += 0.5*Elnlambda[j] # vectorized # for i in xrange(N): # if cluster_assignments[i] == j: # diff_ij = X[i] - mu_means[j] # total -= 0.5*( diff_ij.dot(Elambda[j]).dot(diff_ij) + np.trace(Elambda[j].dot(mu_covs[j])) ) # mixture coefficient part # total += Elnpi.sum() # use phi instead for j in xrange(K): for i in xrange(N): diff_ij = X[i] - mu_means[j] inside = Elnlambda[j] - D*ln2pi inside += -diff_ij.dot(Elambda[j]).dot(diff_ij) - np.trace(Elambda[j].dot(mu_covs[j])) # inside += Elnpi[j] total += phi[i,j]*(0.5*inside + Elnpi[j]) # E{lnp(mu)} - based on original prior for j in xrange(K): E_mu_dot_mu = np.trace(mu_covs[j]) + mu_means[j].dot(mu_means[j]) total += -0.5*D*np.log(2*np.pi*orig_c) - 0.5*E_mu_dot_mu/orig_c # print "total:", total # E{lnp(lambda)} - based on original prior for j in xrange(K): total += (orig_a[j] - D - 1)/2.0*Elnlambda[j] - 0.5*np.trace(orig_B[j].dot(Elambda[j])) # print "total 1:", total total += -orig_a[j]*D/2.0*np.log(2) + 0.5*orig_a[j]*np.log(np.linalg.det(orig_B[j])) # print "total 2:", total total -= D*(D-1)/4.0*np.log(np.pi) # print "total 3:", total for d in xrange(D): total -= np.log(gamma(orig_a[j]/2.0 + (1 - d)/2.0)) # E{lnp(pi)} - based on original prior # - lnB(orig_alpha) + sum[j]{ orig_alpha[j] - 1}*E[lnpi_j] total += np.log(gamma(orig_alphas.sum())) - np.log(gamma(orig_alphas)).sum() total += ((orig_alphas - 1)*Elnpi).sum() # should be 0 since orig_alpha = 1 # calculate entropies of the q distributions # q(c) for i in xrange(N): total += stats.entropy(phi[i]) # categorical entropy # q(pi) total += dirichlet.entropy(alphas) # q(mu) for j in xrange(K): total += mvn.entropy(cov=mu_covs[j]) # q(lambda) for j in xrange(K): total += wishart.entropy(df=a[j], scale=Binv[j]) return total