def test_log_post_pred(): # Data generated with np.random.seed(2); np.random.rand(11, 4) X = np.array([[0.4359949, 0.02592623, 0.54966248, 0.43532239], [0.4203678, 0.33033482, 0.20464863, 0.61927097], [0.29965467, 0.26682728, 0.62113383, 0.52914209], [0.13457995, 0.51357812, 0.18443987, 0.78533515], [0.85397529, 0.49423684, 0.84656149, 0.07964548], [0.50524609, 0.0652865, 0.42812233, 0.09653092], [0.12715997, 0.59674531, 0.226012, 0.10694568], [0.22030621, 0.34982629, 0.46778748, 0.20174323], [0.64040673, 0.48306984, 0.50523672, 0.38689265], [0.79363745, 0.58000418, 0.1622986, 0.70075235], [0.96455108, 0.50000836, 0.88952006, 0.34161365]]) N, D = X.shape # Setup densities m_0 = X.mean(axis=0) k_0 = 0.05 v_0 = D + 10 S_0 = 0.5 * np.ones(D) prior = NIW(m_0, k_0, v_0, S_0) gmm = GaussianComponentsDiag(X, prior, [0, 0, 0, 1, 0, 1, 3, 4, 3, 2, -1]) expected_log_post_pred = log_post_pred_unvectorized(gmm, 10) npt.assert_almost_equal(gmm.log_post_pred(10), expected_log_post_pred)
def test_log_marg_k(): np.random.seed(1) # Generate data D = 10 N_1 = 10 X_1 = 5 * np.random.rand(N_1, D) - 1 # Prior m_0 = 5 * np.random.rand(D) - 2 k_0 = np.random.randint(15) v_0 = D + np.random.randint(5) S_0 = 2 * np.random.rand(D) + 3 prior = NIW(m_0=m_0, k_0=k_0, v_0=v_0, S_0=S_0) # Setup GMM assignments = np.concatenate([np.zeros(N_1)]) gmm = GaussianComponentsDiag(X_1, prior, assignments=assignments) # Calculate marginal for component by hand k_N = k_0 + N_1 v_N = v_0 + N_1 m_N = (k_0 * m_0 + N_1 * X_1.mean(axis=0)) / k_N S_N = S_0 + np.square(X_1).sum( axis=0) + k_0 * np.square(m_0) - k_N * np.square(m_N) var = S_N * (k_N + 1) / (k_N * v_N) expected_log_marg = (-N_1 * D / 2. * math.log(np.pi) + D / 2. * math.log(k_0) - D / 2. * math.log(k_N) + v_0 / 2. * np.log(S_0).sum() - v_N / 2. * np.log(S_N).sum() + D * (gammaln(v_N / 2.) - gammaln(v_0 / 2.))) npt.assert_almost_equal(gmm.log_marg_k(0), expected_log_marg)
def test_3component_with_delete_post_pred_k(): np.random.seed(1) # Generate data D = 10 N_1 = 10 N_2 = 5 N_3 = 5 X = 5 * np.random.rand(N_1 + N_2 + N_3, D) - 1 X_1 = X[:N_1] X_2 = X[N_1:N_1 + N_2] X_3 = X[N_1 + N_2:] # Prior m_0 = 5 * np.random.rand(D) - 2 k_0 = np.random.randint(15) v_0 = D + np.random.randint(5) S_0 = 2 * np.random.rand(D) + 3 prior = NIW(m_0=m_0, k_0=k_0, v_0=v_0, S_0=S_0) # Setup GMM assignments = np.concatenate( [np.zeros(N_1), np.ones(N_2), 2 * np.ones(N_3)]) gmm = GaussianComponentsDiag(X, prior, assignments=assignments) # Remove everything from component 2 for i in range(N_1, N_1 + N_2): gmm.del_item(i) # Calculate posterior for first component by hand x_1 = X_1[0] k_N = k_0 + N_1 v_N = v_0 + N_1 m_N = (k_0 * m_0 + N_1 * X_1.mean(axis=0)) / k_N S_N = S_0 + np.square(X_1).sum( axis=0) + k_0 * np.square(m_0) - k_N * np.square(m_N) var = S_N * (k_N + 1) / (k_N * v_N) expected_posterior = np.sum([ students_t(x_1[i], m_N[i], S_N[i] * (k_N + 1) / (k_N * v_N), v_N) for i in range(len(x_1)) ]) npt.assert_almost_equal(gmm.log_post_pred_k(0, 0), expected_posterior) # Calculate posterior for second component by hand x_1 = X_3[0] k_N = k_0 + N_3 v_N = v_0 + N_3 m_N = (k_0 * m_0 + N_3 * X_3.mean(axis=0)) / k_N S_N = S_0 + np.square(X_3).sum( axis=0) + k_0 * np.square(m_0) - k_N * np.square(m_N) var = S_N * (k_N + 1) / (k_N * v_N) expected_posterior = np.sum([ students_t(x_1[i], m_N[i], S_N[i] * (k_N + 1) / (k_N * v_N), v_N) for i in range(len(x_1)) ]) npt.assert_almost_equal(gmm.log_post_pred_k(N_1 + N_2, 1), expected_posterior)
def test_log_post_pred_k(): np.random.seed(1) # Prior D = 10 m_0 = 5 * np.random.rand(D) - 2 k_0 = np.random.randint(15) v_0 = D + np.random.randint(5) S_0 = 2 * np.random.rand(D) + 3 prior = NIW(m_0=m_0, k_0=k_0, v_0=v_0, S_0=S_0) # Data N = 12 X = 5 * np.random.rand(N, D) - 1 # Setup GMM gmm = GaussianComponentsDiag(X, prior) for i in range(N): gmm.add_item(i, 0) # Calculate posterior by hand x = X[0] k_N = k_0 + N v_N = v_0 + N m_N = (k_0 * m_0 + N * X[:N].mean(axis=0)) / k_N S_N = S_0 + np.square( X[:N]).sum(axis=0) + k_0 * np.square(m_0) - k_N * np.square(m_N) var = S_N * (k_N + 1) / (k_N * v_N) expected_posterior = np.sum([ students_t(x[i], m_N[i], S_N[i] * (k_N + 1) / (k_N * v_N), v_N) for i in range(len(x)) ]) npt.assert_almost_equal(gmm.log_post_pred_k(0, 0), expected_posterior)
def test_2component_post_pred_k(): np.random.seed(1) # Generate data D = 10 N_1 = 10 N_2 = 5 X = 5 * np.random.rand(N_1 + N_2, D) - 1 X_1 = X[:N_1] X_2 = X[N_1:] # Prior m_0 = 5 * np.random.rand(D) - 2 k_0 = np.random.randint(15) v_0 = D + np.random.randint(5) S_0 = 2 * np.random.rand(D) + 3 prior = NIW(m_0=m_0, k_0=k_0, v_0=v_0, S_0=S_0) # Setup GMM assignments = np.concatenate([np.zeros(N_1), np.ones(N_2)]) gmm = GaussianComponentsDiag(X, prior, assignments=assignments) # Remove one item (as additional check) gmm.del_item(N_1 + N_2 - 1) X_2 = X_2[:-1] N_2 -= 1 # Calculate posterior for first component by hand x_1 = X_1[0] k_N = k_0 + N_1 v_N = v_0 + N_1 m_N = (k_0 * m_0 + N_1 * X_1.mean(axis=0)) / k_N S_N = S_0 + np.square(X_1).sum( axis=0) + k_0 * np.square(m_0) - k_N * np.square(m_N) var = S_N * (k_N + 1) / (k_N * v_N) expected_posterior = np.sum([ students_t(x_1[i], m_N[i], S_N[i] * (k_N + 1) / (k_N * v_N), v_N) for i in range(len(x_1)) ]) npt.assert_almost_equal(gmm.log_post_pred_k(0, 0), expected_posterior) # Calculate posterior for second component by hand x_1 = X_2[0] k_N = k_0 + N_2 v_N = v_0 + N_2 m_N = (k_0 * m_0 + N_2 * X_2.mean(axis=0)) / k_N S_N = S_0 + np.square(X_2).sum( axis=0) + k_0 * np.square(m_0) - k_N * np.square(m_N) var = S_N * (k_N + 1) / (k_N * v_N) expected_posterior = np.sum([ students_t(x_1[i], m_N[i], S_N[i] * (k_N + 1) / (k_N * v_N), v_N) for i in range(len(x_1)) ]) npt.assert_almost_equal(gmm.log_post_pred_k(N_1, 1), expected_posterior)
def main(): # Load data X = pickle.load(open(data_fn, "rb")) N, D = X.shape # Model parameters alpha = 1. K = 4 # number of components mu_scale = 3.0 covar_scale = 1.0 # Sampling parameters n_runs = 2 n_iter = 12 # Intialize prior m_0 = np.zeros(D) k_0 = covar_scale**2 / mu_scale**2 v_0 = D + 3 S_0 = covar_scale**2 * v_0 * np.eye(D) prior = NIW(m_0, k_0, v_0, S_0) # Initialize component assignment: this is not random for testing purposes z = np.array([i * np.ones(N / K) for i in range(K)], dtype=np.int).flatten() # Setup FBGMM fbgmm = FBGMM(X, prior, alpha, K, assignments=z) print("Initial log marginal prob:", fbgmm.log_marg()) # Perform several Gibbs sampling runs and average the log marginals log_margs = np.zeros(n_iter) for j in range(n_runs): # Perform Gibbs sampling record = fbgmm.gibbs_sample(n_iter) log_margs += record["log_marg"] log_margs /= n_runs # Plot results fig = plt.figure() ax = fig.add_subplot(111) plot_mixture_model(ax, fbgmm) for k in range(fbgmm.components.K): mu, sigma = fbgmm.components.rand_k(k) plot_ellipse(ax, mu, sigma) # Plot log probability plt.figure() plt.plot(list(range(n_iter)), log_margs) plt.xlabel("Iterations") plt.ylabel("Log prob") plt.show()
def gmm(X, K=4, n_iter=100, alpha=1.0, mu_scale=4.0, var_scale=0.5, covar_scale=0.7, posterior_predictive_check=False): N, D = X.shape # Initialize prior m_0 = np.zeros(D) k_0 = covar_scale**2 / mu_scale**2 v_0 = D + 3 S_0 = covar_scale**2 * v_0 * np.ones(D) prior = NIW(m_0, k_0, v_0, S_0) # Setup FBGMM fbgmm = FBGMM(X, prior, alpha, K, "rand") # Perform Gibbs sampling record = fbgmm.gibbs_sample(n_iter) K = fbgmm.components.K mus = np.zeros(shape=(K, D)) covars = [np.zeros((D, D)) for i in range(0, K)] for k in range(fbgmm.components.K): mu, var = fbgmm.components.rand_k(k) mus[k, :] = mu covars[k] = np.diag(var) # Generate new points for posterior predictive check # Generate the same number of points as N if posterior_predictive_check: np.random.seed(1) rstate = 1 alphas = (alpha / K) + fbgmm.components.counts pis = dirichlet.rvs(alphas, random_state=rstate)[0] Z = np.zeros(N, dtype=np.uint32) X = np.zeros((N, D)) for n in range(N): Z[n] = np.floor(np.argmax(multinomial(1, pis))) X[n] = multivariate_normal.rvs(mean=mus[Z[n]], cov=covars[Z[n]]) return fbgmm.components.assignments, mus, (X, Z) return fbgmm.components.assignments, mus
def test_log_post_pred_k(): # Setup densities prior = NIW(m_0=np.array([0.0, 0.0]), k_0=2., v_0=5., S_0=5. * np.eye(2)) gmm = GaussianComponents(np.array([[1.2, 0.9], [-0.1, 0.8], [0.5, 0.4]]), prior) # Add data vectors to a single component gmm.add_item(0, 0) gmm.add_item(1, 0) # Calculate log predictave lp = gmm.log_post_pred_k(2, 0) lp_expected = -2.07325364088 npt.assert_almost_equal(lp, lp_expected)
def main(): # Data parameters D = 2 # dimensions N = 100 # number of points to generate K_true = 4 # the true number of components # Model parameters alpha = 1. K = 3 # initial number of components n_iter = 20 # Generate data mu_scale = 4.0 covar_scale = 0.7 z_true = np.random.randint(0, K_true, N) mu = np.random.randn(D, K_true) * mu_scale X = mu[:, z_true] + np.random.randn(D, N) * covar_scale X = X.T # Intialize prior m_0 = np.zeros(D) k_0 = covar_scale**2 / mu_scale**2 v_0 = D + 3 S_0 = covar_scale**2 * v_0 * np.ones(D) prior = NIW(m_0, k_0, v_0, S_0) # Setup IGMM igmm = IGMM(X, prior, alpha, assignments="rand", K=K, covariance_type="diag") # igmm = IGMM(X, prior, alpha, assignments="one-by-one", K=K) # Perform Gibbs sampling record = igmm.gibbs_sample(n_iter) # Plot results fig = plt.figure() ax = fig.add_subplot(111) plot_mixture_model(ax, igmm) for k in xrange(igmm.components.K): mu, sigma = igmm.components.rand_k(k) plot_ellipse(ax, mu, np.diag(sigma)) plt.show()
def test_sampling_2d_assignments(): random.seed(1) np.random.seed(1) # Data parameters D = 2 # dimensions N = 100 # number of points to generate K_true = 4 # the true number of components # Model parameters alpha = 1. K = 3 # number of components n_iter = 10 # Generate data mu_scale = 4.0 covar_scale = 0.7 z_true = np.random.randint(0, K_true, N) mu = np.random.randn(D, K_true)*mu_scale X = mu[:, z_true] + np.random.randn(D, N)*covar_scale X = X.T # Intialize prior m_0 = np.zeros(D) k_0 = covar_scale**2/mu_scale**2 v_0 = D + 3 S_0 = covar_scale**2*v_0*np.eye(D) prior = NIW(m_0, k_0, v_0, S_0) # Setup FBGMM fbgmm = FBGMM(X, prior, alpha, K, "rand") # Perform Gibbs sampling record = fbgmm.gibbs_sample(n_iter) assignments_expected = np.array([ 0, 2, 0, 0, 2, 0, 2, 2, 2, 0, 0, 0, 0, 2, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 2, 0, 1, 0, 2, 1, 1, 0, 2, 2, 0, 0, 2, 1, 0, 1, 0, 0, 0, 2, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 1, 0, 0, 1, 2, 2, 1, 0, 0, 0, 2, 0, 0, 0, 2, 0, 1, 0, 0, 0, 2, 2, 1, 2, 0, 0, 0, 2, 1, 2, 2, 1, 0, 0, 1, 0, 2, 2, 1, 2, 0, 0, 2 ]) assignments = fbgmm.components.assignments npt.assert_array_equal(assignments, assignments_expected)
def test_del_item(): np.random.seed(1) # Prior D = 10 m_0 = 5 * np.random.rand(D) - 2 k_0 = np.random.randint(15) v_0 = D + np.random.randint(5) S_0 = 2 * np.random.rand(D) + 3 prior = NIW(m_0=m_0, k_0=k_0, v_0=v_0, S_0=S_0) # Data N = 12 X = 5 * np.random.rand(N, D) - 1 # Setup GMM gmm = GaussianComponentsDiag(X, prior) for i in range(N): gmm.add_item(i, 0) # Remove 5 random items del_items = set(np.random.randint(1, N, size=5)) for i in del_items: gmm.del_item(i) indices = list(set(range(N)).difference(del_items)) # Calculate posterior by hand X = X[indices] N, _ = X.shape x = X[0] k_N = k_0 + N v_N = v_0 + N m_N = (k_0 * m_0 + N * X[:N].mean(axis=0)) / k_N S_N = S_0 + np.square( X[:N]).sum(axis=0) + k_0 * np.square(m_0) - k_N * np.square(m_N) var = S_N * (k_N + 1) / (k_N * v_N) expected_posterior = np.sum([ students_t(x[i], m_N[i], S_N[i] * (k_N + 1) / (k_N * v_N), v_N) for i in range(len(x)) ]) npt.assert_almost_equal(gmm.log_post_pred_k(0, 0), expected_posterior)
def test_log_prior_3d(): # Data X = np.array([[-0.3406, -0.0593, -0.0686]]) N, D = X.shape # Setup densities m_0 = np.zeros(D) k_0 = 0.05 v_0 = D + 1 S_0 = 0.001 * np.eye(D) prior = NIW(m_0, k_0, v_0, S_0) gmm = GaussianComponents(X, prior) # Calculate log predictave under prior alone lp = gmm.log_prior(0) lp_expected = -0.472067277015 npt.assert_almost_equal(lp, lp_expected)
def test_sampling_2d_log_marg_deleted_components(): random.seed(1) np.random.seed(1) # Data parameters D = 2 # dimensions N = 10 # number of points to generate K_true = 4 # the true number of components # Model parameters alpha = 1. K = 6 # number of components n_iter = 1 # Generate data mu_scale = 4.0 covar_scale = 0.7 z_true = np.random.randint(0, K_true, N) mu = np.random.randn(D, K_true)*mu_scale X = mu[:, z_true] + np.random.randn(D, N)*covar_scale X = X.T # Intialize prior m_0 = np.zeros(D) k_0 = covar_scale**2/mu_scale**2 v_0 = D + 3 S_0 = covar_scale**2*v_0*np.eye(D) prior = NIW(m_0, k_0, v_0, S_0) # Setup FBGMM fbgmm = FBGMM(X, prior, alpha, K, "rand") # Perform Gibbs sampling record = fbgmm.gibbs_sample(n_iter) expected_log_marg = -60.1448630929 log_marg = fbgmm.log_marg() print(fbgmm.components.assignments) npt.assert_almost_equal(log_marg, expected_log_marg)
def main(): # Data parameters D = 2 # dimensions N = 100 # number of points to generate K_true = 4 # the true number of components # Model parameters alpha = 1. K = 4 # number of components n_iter = 20 # Generate data mu_scale = 4.0 covar_scale = 0.7 z_true = np.random.randint(0, K_true, N) mu = np.random.randn(D, K_true) * mu_scale X = mu[:, z_true] + np.random.randn(D, N) * covar_scale X = X.T # Intialize prior m_0 = np.zeros(D) k_0 = covar_scale**2 / mu_scale**2 v_0 = D + 3 S_0 = covar_scale**2 * v_0 * np.eye(D) prior = NIW(m_0, k_0, v_0, S_0) # Setup FBGMM fbgmm = FBGMM(X, prior, alpha, K, "rand") # Perform Gibbs sampling record = fbgmm.gibbs_sample(n_iter) # Plot results fig = plt.figure() ax = fig.add_subplot(111) plot_mixture_model(ax, fbgmm) for k in range(fbgmm.components.K): # mu, sigma = fbgmm.components.map(k) mu, sigma = fbgmm.components.rand_k(k) plot_ellipse(ax, mu, sigma) plt.show()
def test_sampling_2d_log_marg_deleted_components(): random.seed(2) np.random.seed(2) # Data parameters D = 2 # dimensions N = 5 # number of points to generate K_true = 4 # the true number of components # Model parameters alpha = 1. K = 3 # initial number of components n_iter = 1 # Generate data mu_scale = 4.0 covar_scale = 0.7 z_true = np.random.randint(0, K_true, N) mu = np.random.randn(D, K_true) * mu_scale X = mu[:, z_true] + np.random.randn(D, N) * covar_scale X = X.T print(X) # Intialize prior m_0 = np.zeros(D) k_0 = covar_scale**2 / mu_scale**2 v_0 = D + 3 S_0 = covar_scale**2 * v_0 * np.eye(D) prior = NIW(m_0, k_0, v_0, S_0) # Setup IGMM igmm = IGMM(X, prior, alpha, assignments="each-in-own") # Perform Gibbs sampling record = igmm.gibbs_sample(n_iter) print(igmm.components.assignments) expected_log_marg = -30.771535771 log_marg = igmm.log_marg() npt.assert_almost_equal(log_marg, expected_log_marg)
def test_map(): # Setup densities prior = NIW(m_0=np.array([0.0, 0.0]), k_0=2.0, v_0=5.0, S_0=5.0 * np.eye(2)) gmm = GaussianComponents(np.array([[1.2, 0.9], [-0.1, 0.8]]), prior) gmm.add_item(0, 0) gmm.add_item(1, 0) mu_expected = np.array([0.275, 0.425]) sigma_expected = np.array([[0.55886364, 0.04840909], [0.04840909, 0.52068182]]) # Calculate the posterior MAP of the parameters mu, sigma = gmm.map(0) npt.assert_almost_equal(mu, mu_expected) npt.assert_almost_equal(sigma, sigma_expected)
def test_sampling_2d_assignments_deleted_components(): random.seed(1) np.random.seed(1) # Data parameters D = 2 # dimensions N = 20 # number of points to generate K_true = 4 # the true number of components # Model parameters alpha = 1. K = 3 # initial number of components n_iter = 1 # Generate data mu_scale = 4.0 covar_scale = 0.7 z_true = np.random.randint(0, K_true, N) mu = np.random.randn(D, K_true) * mu_scale X = mu[:, z_true] + np.random.randn(D, N) * covar_scale X = X.T # Intialize prior m_0 = np.zeros(D) k_0 = covar_scale**2 / mu_scale**2 v_0 = 5 S_0 = covar_scale**2 * v_0 * np.eye(D) prior = NIW(m_0, k_0, v_0, S_0) # Setup IGMM igmm = IGMM(X, prior, alpha, assignments="each-in-own") # Perform Gibbs sampling record = igmm.gibbs_sample(n_iter) assignments_expected = np.array( [5, 2, 4, 3, 2, 7, 2, 7, 1, 0, 4, 6, 4, 1, 6, 4, 1, 7, 1, 0]) assignments = igmm.components.assignments npt.assert_array_equal(assignments, assignments_expected)
def test_log_marg_k(): # Data X = np.array([[-0.3406, -0.3593, -0.0686], [-0.3381, 0.2993, 0.925], [-0.5, -0.101, 0.75]]) N, D = X.shape # Setup densities m_0 = np.zeros(D) k_0 = 0.05 v_0 = D + 3 S_0 = 0.5 * np.eye(D) prior = NIW(m_0, k_0, v_0, S_0) gmm = GaussianComponents(X, prior, [0, 0, 0]) log_marg_expected = -8.42365141729 # Calculate log marginal of data log_marg = gmm.log_marg_k(0) npt.assert_almost_equal(log_marg, log_marg_expected)
def test_sampling_2d_log_marg(): random.seed(1) np.random.seed(1) # Data parameters D = 2 # dimensions N = 100 # number of points to generate K_true = 4 # the true number of components # Model parameters alpha = 1. K = 3 # initial number of components n_iter = 10 # Generate data mu_scale = 4.0 covar_scale = 0.7 z_true = np.random.randint(0, K_true, N) mu = np.random.randn(D, K_true) * mu_scale X = mu[:, z_true] + np.random.randn(D, N) * covar_scale X = X.T # Intialize prior m_0 = np.zeros(D) k_0 = covar_scale**2 / mu_scale**2 v_0 = 5 S_0 = covar_scale**2 * v_0 * np.eye(D) prior = NIW(m_0, k_0, v_0, S_0) # Setup IGMM igmm = IGMM(X, prior, alpha, K=K) # Perform Gibbs sampling record = igmm.gibbs_sample(n_iter) expected_log_marg = -411.811711231 log_marg = igmm.log_marg() npt.assert_almost_equal(log_marg, expected_log_marg)
def test_log_prod_students_t(): np.random.seed(1) # Prior D = 10 m_0 = 5 * np.random.rand(D) - 2 k_0 = np.random.randint(15) v_0 = D + np.random.randint(5) S_0 = 2 * np.random.rand(D) + 3 prior = NIW(m_0=m_0, k_0=k_0, v_0=v_0, S_0=S_0) # GMM we will use to access `_log_prod_students_t` x = 3 * np.random.rand(D) + 4 gmm = GaussianComponentsDiag(np.array([x]), prior) expected_prior = np.sum([ students_t(x[i], m_0[i], S_0[i] * (k_0 + 1) / (k_0 * v_0), v_0) for i in range(len(x)) ]) npt.assert_almost_equal(gmm.log_prior(0), expected_prior)