def test_lower_bound(self): """ Test the Wishart VB lower bound """ # # By having the Wishart node as the only latent node, VB will give exact # results, thus the VB lower bound is the true marginal log likelihood. # Thus, check that they are equal. The true marginal likelihood is the # multivariate Student-t distribution. # np.random.seed(42) D = 3 n = (D-1) + np.random.uniform(0.1, 0.5) V = random.covariance(D) Lambda = Wishart(n, V) mu = np.random.randn(D) Y = Gaussian(mu, Lambda) y = np.random.randn(D) Y.observe(y) Lambda.update() L = Y.lower_bound_contribution() + Lambda.lower_bound_contribution() mu = mu nu = n + 1 - D Cov = V / nu self.assertAllClose(L, _student_logpdf(y, mu, Cov, nu)) pass
def test_moments(self): """ Test the moments of Wishart node """ np.random.seed(42) # Test prior moments D = 3 n = (D-1) + np.random.uniform(0.1,2) V = random.covariance(D) Lambda = Wishart(n, V) Lambda.update() u = Lambda.get_moments() self.assertAllClose(u[0], n*np.linalg.inv(V), msg='Mean incorrect') self.assertAllClose(u[1], (np.sum(special.digamma((n - np.arange(D))/2)) + D*np.log(2) - np.linalg.slogdet(V)[1]), msg='Log determinant incorrect') # Test posterior moments D = 3 n = (D-1) + np.random.uniform(0.1,2) V = random.covariance(D) Lambda = Wishart(n, V) mu = np.random.randn(D) Y = Gaussian(mu, Lambda) y = np.random.randn(D) Y.observe(y) Lambda.update() u = Lambda.get_moments() n = n + 1 V = V + np.outer(y-mu, y-mu) self.assertAllClose(u[0], n*np.linalg.inv(V), msg='Mean incorrect') self.assertAllClose(u[1], (np.sum(special.digamma((n - np.arange(D))/2)) + D*np.log(2) - np.linalg.slogdet(V)[1]), msg='Log determinant incorrect') pass
def test_gaussian_mixture_plot(): """ Test the gaussian_mixture plotting function. The code is from http://www.bayespy.org/examples/gmm.html """ np.random.seed(1) y0 = np.random.multivariate_normal([0, 0], [[1, 0], [0, 0.02]], size=50) y1 = np.random.multivariate_normal([0, 0], [[0.02, 0], [0, 1]], size=50) y2 = np.random.multivariate_normal([2, 2], [[1, -0.9], [-0.9, 1]], size=50) y3 = np.random.multivariate_normal([-2, -2], [[0.1, 0], [0, 0.1]], size=50) y = np.vstack([y0, y1, y2, y3]) bpplt.pyplot.plot(y[:, 0], y[:, 1], 'rx') N = 200 D = 2 K = 10 alpha = Dirichlet(1e-5 * np.ones(K), name='alpha') Z = Categorical(alpha, plates=(N, ), name='z') mu = Gaussian(np.zeros(D), 1e-5 * np.identity(D), plates=(K, ), name='mu') Lambda = Wishart(D, 1e-5 * np.identity(D), plates=(K, ), name='Lambda') Y = Mixture(Z, Gaussian, mu, Lambda, name='Y') Z.initialize_from_random() Q = VB(Y, mu, Lambda, Z, alpha) Y.observe(y) Q.update(repeat=1000) bpplt.gaussian_mixture_2d(Y, scale=2)
def test_moments(self): np.random.seed(42) N = 4 D1 = 2 D2 = 3 X1 = Gaussian(np.random.randn(N, D1), random.covariance(D1)) X2 = Gaussian(np.random.randn(N, D2), random.covariance(D2)) Z = ConcatGaussian(X1, X2) u = Z._message_to_child() # First moment self.assertAllClose( u[0][...,:D1], X1.u[0] ) self.assertAllClose( u[0][...,D1:], X2.u[0] ) # Second moment self.assertAllClose( u[1][...,:D1,:D1], X1.u[1] ) self.assertAllClose( u[1][...,D1:,D1:], X2.u[1] ) self.assertAllClose( u[1][...,:D1,D1:], X1.u[0][...,:,None] * X2.u[0][...,None,:] ) self.assertAllClose( u[1][...,D1:,:D1], X2.u[0][...,:,None] * X1.u[0][...,None,:] ) pass
def test_message_to_parents(self): """ Check gradient passed to inputs parent node """ D = 3 X = Gaussian(np.random.randn(D), random.covariance(D)) V = Wishart(D + np.random.rand(), random.covariance(D)) Y = Gaussian(X, V) self.assert_moments( Y, lambda u: [u[0], u[1] + u[1].T] ) Y.observe(np.random.randn(D)) self.assert_message_to_parent(Y, X) #self.assert_message_to_parent(Y, V) pass
def test_message_to_parents(self): np.random.seed(42) N = 5 D1 = 3 D2 = 4 D3 = 2 X1 = Gaussian(np.random.randn(N, D1), random.covariance(D1)) X2 = Gaussian(np.random.randn(N, D2), random.covariance(D2)) X3 = np.random.randn(N, D3) Z = ConcatGaussian(X1, X2, X3) Y = Gaussian(Z, random.covariance(D1 + D2 + D3)) Y.observe(np.random.randn(*(Y.plates + Y.dims[0]))) self.assert_message_to_parent( Y, X1, eps=1e-7, rtol=1e-5, atol=1e-5 ) self.assert_message_to_parent( Y, X2, eps=1e-7, rtol=1e-5, atol=1e-5 ) pass
def test_rotate_plates(self): # Basic test for Gaussian vectors X = GaussianARD(np.random.randn(3,2), np.random.rand(3,2), shape=(2,), plates=(3,)) (u0, u1) = X.get_moments() Cov = u1 - linalg.outer(u0, u0, ndim=1) Q = np.random.randn(3,3) Qu0 = np.einsum('ik,kj->ij', Q, u0) QCov = np.einsum('k,kij->kij', np.sum(Q, axis=0)**2, Cov) Qu1 = QCov + linalg.outer(Qu0, Qu0, ndim=1) X.rotate_plates(Q, plate_axis=-1) (u0, u1) = X.get_moments() self.assertAllClose(u0, Qu0) self.assertAllClose(u1, Qu1) # Test full covariance, that is, with observations X = GaussianARD(np.random.randn(3,2), np.random.rand(3,2), shape=(2,), plates=(3,)) Y = Gaussian(X, [[2.0, 1.5], [1.5, 3.0]], plates=(3,)) Y.observe(np.random.randn(3,2)) X.update() (u0, u1) = X.get_moments() Cov = u1 - linalg.outer(u0, u0, ndim=1) Q = np.random.randn(3,3) Qu0 = np.einsum('ik,kj->ij', Q, u0) QCov = np.einsum('k,kij->kij', np.sum(Q, axis=0)**2, Cov) Qu1 = QCov + linalg.outer(Qu0, Qu0, ndim=1) X.rotate_plates(Q, plate_axis=-1) (u0, u1) = X.get_moments() self.assertAllClose(u0, Qu0) self.assertAllClose(u1, Qu1) pass
def test_message_to_parents(self): """ Check gradient passed to inputs parent node """ D = 3 X = Gaussian(np.random.randn(D), random.covariance(D)) a = Gamma(np.random.rand(D), np.random.rand(D)) Y = GaussianARD(X, a) Y.observe(np.random.randn(D)) self.assert_message_to_parent(Y, X) self.assert_message_to_parent(Y, a) pass
def test_message_to_parent_alpha(self): """ Test the message from GaussianARD the 2nd parent (alpha). """ # Check formula with uncertain parent mu mu = GaussianARD(1, 1) tau = Gamma(0.5 * 1e10, 1e10) X = GaussianARD(mu, tau) X.observe(3) (m0, m1) = tau._message_from_children() self.assertAllClose(m0, -0.5 * (3**2 - 2 * 3 * 1 + 1**2 + 1)) self.assertAllClose(m1, 0.5) # Check formula with uncertain node tau = Gamma(1e10, 1e10) X = GaussianARD(2, tau) Y = GaussianARD(X, 1) Y.observe(5) X.update() (m0, m1) = tau._message_from_children() self.assertAllClose(m0, -0.5 * (1 / (1 + 1) + 3.5**2 - 2 * 3.5 * 2 + 2**2)) self.assertAllClose(m1, 0.5) # Check alpha larger than mu alpha = Gamma(np.ones((3, 2, 3)) * 1e10, 1e10) X = GaussianARD(np.ones((2, 3)), alpha, ndim=3) X.observe(2 * np.ones((3, 2, 3))) (m0, m1) = alpha._message_from_children() self.assertAllClose( m0 * np.ones((3, 2, 3)), -0.5 * (2**2 - 2 * 2 * 1 + 1**2) * np.ones((3, 2, 3))) self.assertAllClose(m1 * np.ones((3, 2, 3)), 0.5 * np.ones((3, 2, 3))) # Check mu larger than alpha tau = Gamma(np.ones((2, 3)) * 1e10, 1e10) X = GaussianARD(np.ones((3, 2, 3)), tau, ndim=3) X.observe(2 * np.ones((3, 2, 3))) (m0, m1) = tau._message_from_children() self.assertAllClose( m0, -0.5 * (2**2 - 2 * 2 * 1 + 1**2) * 3 * np.ones((2, 3))) self.assertAllClose(m1 * np.ones((2, 3)), 0.5 * 3 * np.ones((2, 3))) # Check node larger than mu and alpha tau = Gamma(np.ones((3, )) * 1e10, 1e10) X = GaussianARD(np.ones((2, 3)), tau, shape=(3, 2, 3)) X.observe(2 * np.ones((3, 2, 3))) (m0, m1) = tau._message_from_children() self.assertAllClose( m0 * np.ones(3), -0.5 * (2**2 - 2 * 2 * 1 + 1**2) * 6 * np.ones( (3, ))) self.assertAllClose(m1 * np.ones(3), 0.5 * 6 * np.ones(3)) # Check plates for smaller mu than node tau = Gamma(np.ones((4, 1, 2, 3)) * 1e10, 1e10) X = GaussianARD(GaussianARD(1, 1, shape=(3, ), plates=(4, 1, 1)), tau, shape=(2, 3), plates=(4, 5)) X.observe(2 * np.ones((4, 5, 2, 3))) (m0, m1) = tau._message_from_children() self.assertAllClose(m0 * np.ones( (4, 1, 2, 3)), (-0.5 * (2**2 - 2 * 2 * 1 + 1**2 + 1) * 5 * np.ones( (4, 1, 2, 3)))) self.assertAllClose(m1 * np.ones((4, 1, 2, 3)), 5 * 0.5 * np.ones( (4, 1, 2, 3))) # Check mask tau = Gamma(np.ones((4, 3)) * 1e10, 1e10) X = GaussianARD(np.ones(3), tau, shape=(3, ), plates=( 2, 4, )) X.observe(2 * np.ones((2, 4, 3)), mask=[[True, False, True, False], [False, True, True, False]]) (m0, m1) = tau._message_from_children() self.assertAllClose(m0 * np.ones((4, 3)), (-0.5 * (2**2 - 2 * 2 * 1 + 1**2) * np.ones( (4, 3)) * np.array([[1], [1], [2], [0]]))) self.assertAllClose( m1 * np.ones((4, 3)), 0.5 * np.array([[1], [1], [2], [0]]) * np.ones((4, 3))) # Check non-ARD Gaussian child mu = np.array([1, 2]) alpha = np.array([3, 4]) Alpha = Gamma(alpha * 1e10, 1e10) Lambda = np.array([[1, 0.5], [0.5, 1]]) X = GaussianARD(mu, Alpha, ndim=1) Y = Gaussian(X, Lambda) y = np.array([5, 6]) Y.observe(y) X.update() (m0, m1) = Alpha._message_from_children() Cov = np.linalg.inv(np.diag(alpha) + Lambda) mean = np.dot(Cov, np.dot(np.diag(alpha), mu) + np.dot(Lambda, y)) self.assertAllClose( m0 * np.ones(2), -0.5 * np.diag( np.outer(mean, mean) + Cov - np.outer(mean, mu) - np.outer(mu, mean) + np.outer(mu, mu))) self.assertAllClose(m1 * np.ones(2), 0.5 * np.ones(2)) pass
def test_message_to_parent_mu(self): """ Test that GaussianARD computes the message to the 1st parent correctly. """ # Check formula with uncertain parent alpha mu = GaussianARD(0, 1) alpha = Gamma(2, 1) X = GaussianARD(mu, alpha) X.observe(3) (m0, m1) = mu._message_from_children() #(m0, m1) = X._message_to_parent(0) self.assertAllClose(m0, 2 * 3) self.assertAllClose(m1, -0.5 * 2) # Check formula with uncertain node mu = GaussianARD(1, 1e10) X = GaussianARD(mu, 2) Y = GaussianARD(X, 1) Y.observe(5) X.update() (m0, m1) = mu._message_from_children() self.assertAllClose(m0, 2 * 1 / (2 + 1) * (2 * 1 + 1 * 5)) self.assertAllClose(m1, -0.5 * 2) # Check alpha larger than mu mu = GaussianARD(np.zeros((2, 3)), 1e10, shape=(2, 3)) X = GaussianARD(mu, 2 * np.ones((3, 2, 3))) X.observe(3 * np.ones((3, 2, 3))) (m0, m1) = mu._message_from_children() self.assertAllClose(m0, 2 * 3 * 3 * np.ones((2, 3))) self.assertAllClose(m1, -0.5 * 3 * 2 * misc.identity(2, 3)) # Check mu larger than alpha mu = GaussianARD(np.zeros((3, 2, 3)), 1e10, shape=(3, 2, 3)) X = GaussianARD(mu, 2 * np.ones((2, 3))) X.observe(3 * np.ones((3, 2, 3))) (m0, m1) = mu._message_from_children() self.assertAllClose(m0, 2 * 3 * np.ones((3, 2, 3))) self.assertAllClose(m1, -0.5 * 2 * misc.identity(3, 2, 3)) # Check node larger than mu and alpha mu = GaussianARD(np.zeros((2, 3)), 1e10, shape=(2, 3)) X = GaussianARD(mu, 2 * np.ones((3, )), shape=(3, 2, 3)) X.observe(3 * np.ones((3, 2, 3))) (m0, m1) = mu._message_from_children() self.assertAllClose(m0, 2 * 3 * 3 * np.ones((2, 3))) self.assertAllClose(m1, -0.5 * 2 * 3 * misc.identity(2, 3)) # Check broadcasting of dimensions mu = GaussianARD(np.zeros((2, 1)), 1e10, shape=(2, 1)) X = GaussianARD(mu, 2 * np.ones((2, 3)), shape=(2, 3)) X.observe(3 * np.ones((2, 3))) (m0, m1) = mu._message_from_children() self.assertAllClose(m0, 2 * 3 * 3 * np.ones((2, 1))) self.assertAllClose(m1, -0.5 * 2 * 3 * misc.identity(2, 1)) # Check plates for smaller mu than node mu = GaussianARD(0, 1, shape=(3, ), plates=(4, 1, 1)) X = GaussianARD(mu, 2 * np.ones((3, )), shape=(2, 3), plates=(4, 5)) X.observe(3 * np.ones((4, 5, 2, 3))) (m0, m1) = mu._message_from_children() self.assertAllClose(m0 * np.ones((4, 1, 1, 3)), 2 * 3 * 5 * 2 * np.ones((4, 1, 1, 3))) self.assertAllClose( m1 * np.ones((4, 1, 1, 3, 3)), -0.5 * 2 * 5 * 2 * misc.identity(3) * np.ones((4, 1, 1, 3, 3))) # Check mask mu = GaussianARD(np.zeros((2, 1, 3)), 1e10, shape=(3, )) X = GaussianARD(mu, 2 * np.ones((2, 4, 3)), shape=(3, ), plates=( 2, 4, )) X.observe(3 * np.ones((2, 4, 3)), mask=[[True, True, True, False], [False, True, False, True]]) (m0, m1) = mu._message_from_children() self.assertAllClose(m0, (2 * 3 * np.ones( (2, 1, 3)) * np.array([[[3]], [[2]]]))) self.assertAllClose(m1, (-0.5 * 2 * misc.identity(3) * np.ones( (2, 1, 1, 1)) * np.array([[[[3]]], [[[2]]]]))) # Check mask with different shapes mu = GaussianARD(np.zeros((2, 1, 3)), 1e10, shape=()) X = GaussianARD(mu, 2 * np.ones((2, 4, 3)), shape=(3, ), plates=( 2, 4, )) mask = np.array([[True, True, True, False], [False, True, False, True]]) X.observe(3 * np.ones((2, 4, 3)), mask=mask) (m0, m1) = mu._message_from_children() self.assertAllClose( m0, 2 * 3 * np.sum( np.ones((2, 4, 3)) * mask[..., None], axis=-2, keepdims=True)) self.assertAllClose(m1, (-0.5 * 2 * np.sum( np.ones((2, 4, 3)) * mask[..., None], axis=-2, keepdims=True))) # Check non-ARD Gaussian child mu = np.array([1, 2]) Mu = GaussianARD(mu, 1e10, shape=(2, )) alpha = np.array([3, 4]) Lambda = np.array([[1, 0.5], [0.5, 1]]) X = GaussianARD(Mu, alpha) Y = Gaussian(X, Lambda) y = np.array([5, 6]) Y.observe(y) X.update() (m0, m1) = Mu._message_from_children() mean = np.dot(np.linalg.inv(np.diag(alpha) + Lambda), np.dot(np.diag(alpha), mu) + np.dot(Lambda, y)) self.assertAllClose(m0, np.dot(np.diag(alpha), mean)) self.assertAllClose(m1, -0.5 * np.diag(alpha)) # Check broadcasted variable axes mu = GaussianARD(np.zeros(1), 1e10, shape=(1, )) X = GaussianARD(mu, 2, shape=(3, )) X.observe(3 * np.ones(3)) (m0, m1) = mu._message_from_children() self.assertAllClose(m0, 2 * 3 * np.sum(np.ones(3), axis=-1, keepdims=True)) self.assertAllClose( m1, -0.5 * 2 * np.sum(np.identity(3), axis=(-1, -2), keepdims=True)) pass
def test_message_to_child(self): """ Test the message to child of GaussianGamma node. """ # Simple test mu = np.array([1,2,3]) Lambda = np.identity(3) a = 2 b = 10 X_alpha = GaussianGamma(mu, Lambda, a, b) u = X_alpha._message_to_child() self.assertEqual(len(u), 4) tau = np.array(a/b) self.assertAllClose(u[0], tau[...,None] * mu) self.assertAllClose(u[1], (linalg.inv(Lambda) + tau[...,None,None] * linalg.outer(mu, mu))) self.assertAllClose(u[2], tau) self.assertAllClose(u[3], -np.log(b) + special.psi(a)) # Test with unknown parents mu = Gaussian(np.arange(3), 10*np.identity(3)) Lambda = Wishart(10, np.identity(3)) a = 2 b = Gamma(3, 15) X_alpha = GaussianGamma(mu, Lambda, a, b) u = X_alpha._message_to_child() (mu, mumu) = mu._message_to_child() Cov_mu = mumu - linalg.outer(mu, mu) (Lambda, _) = Lambda._message_to_child() (b, _) = b._message_to_child() (tau, logtau) = Gamma(a, b + 0.5*np.sum(Lambda*Cov_mu))._message_to_child() self.assertAllClose(u[0], tau[...,None] * mu) self.assertAllClose(u[1], (linalg.inv(Lambda) + tau[...,None,None] * linalg.outer(mu, mu))) self.assertAllClose(u[2], tau) self.assertAllClose(u[3], logtau) # Test with plates mu = Gaussian(np.reshape(np.arange(3*4), (4,3)), 10*np.identity(3), plates=(4,)) Lambda = Wishart(10, np.identity(3)) a = 2 b = Gamma(3, 15) X_alpha = GaussianGamma(mu, Lambda, a, b, plates=(4,)) u = X_alpha._message_to_child() (mu, mumu) = mu._message_to_child() Cov_mu = mumu - linalg.outer(mu, mu) (Lambda, _) = Lambda._message_to_child() (b, _) = b._message_to_child() (tau, logtau) = Gamma(a, b + 0.5*np.sum(Lambda*Cov_mu, axis=(-1,-2)))._message_to_child() self.assertAllClose(u[0] * np.ones((4,1)), np.ones((4,1)) * tau[...,None] * mu) self.assertAllClose(u[1] * np.ones((4,1,1)), np.ones((4,1,1)) * (linalg.inv(Lambda) + tau[...,None,None] * linalg.outer(mu, mu))) self.assertAllClose(u[2] * np.ones(4), np.ones(4) * tau) self.assertAllClose(u[3] * np.ones(4), np.ones(4) * logtau) pass
def test_lowerbound(self): """ Test the variational Bayesian lower bound term for GaussianARD. """ # Test vector formula with full noise covariance m = np.random.randn(2) alpha = np.random.rand(2) y = np.random.randn(2) X = GaussianARD(m, alpha, ndim=1) V = np.array([[3,1],[1,3]]) Y = Gaussian(X, V) Y.observe(y) X.update() Cov = np.linalg.inv(np.diag(alpha) + V) mu = np.dot(Cov, np.dot(V, y) + alpha*m) x2 = np.outer(mu, mu) + Cov logH_X = (+ 2*0.5*(1+np.log(2*np.pi)) + 0.5*np.log(np.linalg.det(Cov))) logp_X = (- 2*0.5*np.log(2*np.pi) + 0.5*np.log(np.linalg.det(np.diag(alpha))) - 0.5*np.sum(np.diag(alpha) * (x2 - np.outer(mu,m) - np.outer(m,mu) + np.outer(m,m)))) self.assertAllClose(logp_X + logH_X, X.lower_bound_contribution()) def check_lower_bound(shape_mu, shape_alpha, plates_mu=(), **kwargs): M = GaussianARD(np.ones(plates_mu + shape_mu), np.ones(plates_mu + shape_mu), shape=shape_mu, plates=plates_mu) if not ('ndim' in kwargs or 'shape' in kwargs): kwargs['ndim'] = len(shape_mu) X = GaussianARD(M, 2*np.ones(shape_alpha), **kwargs) Y = GaussianARD(X, 3*np.ones(X.get_shape(0)), **kwargs) Y.observe(4*np.ones(Y.get_shape(0))) X.update() Cov = 1/(2+3) mu = Cov * (2*1 + 3*4) x2 = mu**2 + Cov logH_X = (+ 0.5*(1+np.log(2*np.pi)) + 0.5*np.log(Cov)) logp_X = (- 0.5*np.log(2*np.pi) + 0.5*np.log(2) - 0.5*2*(x2 - 2*mu*1 + 1**2+1)) r = np.prod(X.get_shape(0)) self.assertAllClose(r * (logp_X + logH_X), X.lower_bound_contribution()) # Test scalar formula check_lower_bound((), ()) # Test array formula check_lower_bound((2,3), (2,3)) # Test dim-broadcasting of mu check_lower_bound((3,1), (2,3,4)) # Test dim-broadcasting of alpha check_lower_bound((2,3,4), (3,1)) # Test dim-broadcasting of mu and alpha check_lower_bound((3,1), (3,1), shape=(2,3,4)) # Test dim-broadcasting of mu with plates check_lower_bound((), (), plates_mu=(), shape=(), plates=(5,)) # BUG: Scalar parents for array variable caused einsum error check_lower_bound((), (), shape=(3,)) # BUG: Log-det was summed over plates check_lower_bound((), (), shape=(3,), plates=(4,)) pass
def run(N=100000, N_batch=50, seed=42, maxiter=100, plot=True): """ Run deterministic annealing demo for 1-D Gaussian mixture. """ if seed is not None: np.random.seed(seed) # Number of clusters in the model K = 20 # Dimensionality of the data D = 5 # Generate data K_true = 10 spread = 5 means = spread * np.random.randn(K_true, D) z = random.categorical(np.ones(K_true), size=N) data = np.empty((N, D)) for n in range(N): data[n] = means[z[n]] + np.random.randn(D) # # Standard VB-EM algorithm # # Full model mu = Gaussian(np.zeros(D), np.identity(D), plates=(K, ), name='means') alpha = Dirichlet(np.ones(K), name='class probabilities') Z = Categorical(alpha, plates=(N, ), name='classes') Y = Mixture(Z, Gaussian, mu, np.identity(D), name='observations') # Break symmetry with random initialization of the means mu.initialize_from_random() # Put the data in Y.observe(data) # Run inference Q = VB(Y, Z, mu, alpha) Q.save(mu) Q.update(repeat=maxiter) if plot: bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-') max_cputime = np.sum(Q.cputime[~np.isnan(Q.cputime)]) # # Stochastic variational inference # # Construct smaller model (size of the mini-batch) mu = Gaussian(np.zeros(D), np.identity(D), plates=(K, ), name='means') alpha = Dirichlet(np.ones(K), name='class probabilities') Z = Categorical(alpha, plates=(N_batch, ), plates_multiplier=(N / N_batch, ), name='classes') Y = Mixture(Z, Gaussian, mu, np.identity(D), name='observations') # Break symmetry with random initialization of the means mu.initialize_from_random() # Inference engine Q = VB(Y, Z, mu, alpha, autosave_filename=Q.autosave_filename) Q.load(mu) # Because using mini-batches, messages need to be multiplied appropriately print("Stochastic variational inference...") Q.ignore_bound_checks = True maxiter *= int(N / N_batch) delay = 1 forgetting_rate = 0.7 for n in range(maxiter): # Observe a mini-batch subset = np.random.choice(N, N_batch) Y.observe(data[subset, :]) # Learn intermediate variables Q.update(Z) # Set step length step = (n + delay)**(-forgetting_rate) # Stochastic gradient for the global variables Q.gradient_step(mu, alpha, scale=step) if np.sum(Q.cputime[:n]) > max_cputime: break if plot: bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:') bpplt.pyplot.xlabel('CPU time (in seconds)') bpplt.pyplot.ylabel('VB lower bound') bpplt.pyplot.legend(['VB-EM', 'Stochastic inference'], loc='lower right') bpplt.pyplot.title('VB for Gaussian mixture model') return
def run(N=100000, N_batch=50, seed=42, maxiter=100, plot=True): """ Run deterministic annealing demo for 1-D Gaussian mixture. """ if seed is not None: np.random.seed(seed) # Number of clusters in the model K = 20 # Dimensionality of the data D = 5 # Generate data K_true = 10 spread = 5 means = spread * np.random.randn(K_true, D) z = random.categorical(np.ones(K_true), size=N) data = np.empty((N,D)) for n in range(N): data[n] = means[z[n]] + np.random.randn(D) # # Standard VB-EM algorithm # # Full model mu = Gaussian(np.zeros(D), np.identity(D), plates=(K,), name='means') alpha = Dirichlet(np.ones(K), name='class probabilities') Z = Categorical(alpha, plates=(N,), name='classes') Y = Mixture(Z, Gaussian, mu, np.identity(D), name='observations') # Break symmetry with random initialization of the means mu.initialize_from_random() # Put the data in Y.observe(data) # Run inference Q = VB(Y, Z, mu, alpha) Q.save(mu) Q.update(repeat=maxiter) if plot: bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-') max_cputime = np.sum(Q.cputime[~np.isnan(Q.cputime)]) # # Stochastic variational inference # # Construct smaller model (size of the mini-batch) mu = Gaussian(np.zeros(D), np.identity(D), plates=(K,), name='means') alpha = Dirichlet(np.ones(K), name='class probabilities') Z = Categorical(alpha, plates=(N_batch,), plates_multiplier=(N/N_batch,), name='classes') Y = Mixture(Z, Gaussian, mu, np.identity(D), name='observations') # Break symmetry with random initialization of the means mu.initialize_from_random() # Inference engine Q = VB(Y, Z, mu, alpha, autosave_filename=Q.autosave_filename) Q.load(mu) # Because using mini-batches, messages need to be multiplied appropriately print("Stochastic variational inference...") Q.ignore_bound_checks = True maxiter *= int(N/N_batch) delay = 1 forgetting_rate = 0.7 for n in range(maxiter): # Observe a mini-batch subset = np.random.choice(N, N_batch) Y.observe(data[subset,:]) # Learn intermediate variables Q.update(Z) # Set step length step = (n + delay) ** (-forgetting_rate) # Stochastic gradient for the global variables Q.gradient_step(mu, alpha, scale=step) if np.sum(Q.cputime[:n]) > max_cputime: break if plot: bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:') bpplt.pyplot.xlabel('CPU time (in seconds)') bpplt.pyplot.ylabel('VB lower bound') bpplt.pyplot.legend(['VB-EM', 'Stochastic inference'], loc='lower right') bpplt.pyplot.title('VB for Gaussian mixture model') return
def test_gradient(self): """Test standard gradient of a Gaussian node.""" D = 3 np.random.seed(42) # # Without observations # # Construct model mu = np.random.randn(D) Lambda = random.covariance(D) X = Gaussian(mu, Lambda) # Random initialization mu0 = np.random.randn(D) Lambda0 = random.covariance(D) X.initialize_from_parameters(mu0, Lambda0) Q = VB(X) # Initial parameters phi0 = X.phi # Gradient rg = X.get_riemannian_gradient() g = X.get_gradient(rg) # Numerical gradient eps = 1e-6 p0 = X.get_parameters() l0 = Q.compute_lowerbound(ignore_masked=False) g_num = [np.zeros(D), np.zeros((D, D))] for i in range(D): e = np.zeros(D) e[i] = eps p1 = p0[0] + e X.set_parameters([p1, p0[1]]) l1 = Q.compute_lowerbound(ignore_masked=False) g_num[0][i] = (l1 - l0) / eps for i in range(D): for j in range(i + 1): e = np.zeros((D, D)) e[i, j] += eps e[j, i] += eps p1 = p0[1] + e X.set_parameters([p0[0], p1]) l1 = Q.compute_lowerbound(ignore_masked=False) g_num[1][i, j] = (l1 - l0) / (2 * eps) g_num[1][j, i] = (l1 - l0) / (2 * eps) # Check self.assertAllClose(g[0], g_num[0]) self.assertAllClose(g[1], g_num[1]) # # With observations # # Construct model mu = np.random.randn(D) Lambda = random.covariance(D) X = Gaussian(mu, Lambda) # Random initialization mu0 = np.random.randn(D) Lambda0 = random.covariance(D) X.initialize_from_parameters(mu0, Lambda0) V = random.covariance(D) Y = Gaussian(X, V) Y.observe(np.random.randn(D)) Q = VB(Y, X) # Initial parameters phi0 = X.phi # Gradient rg = X.get_riemannian_gradient() g = X.get_gradient(rg) # Numerical gradient eps = 1e-6 p0 = X.get_parameters() l0 = Q.compute_lowerbound() g_num = [np.zeros(D), np.zeros((D, D))] for i in range(D): e = np.zeros(D) e[i] = eps p1 = p0[0] + e X.set_parameters([p1, p0[1]]) l1 = Q.compute_lowerbound() g_num[0][i] = (l1 - l0) / eps for i in range(D): for j in range(i + 1): e = np.zeros((D, D)) e[i, j] += eps e[j, i] += eps p1 = p0[1] + e X.set_parameters([p0[0], p1]) l1 = Q.compute_lowerbound() g_num[1][i, j] = (l1 - l0) / (2 * eps) g_num[1][j, i] = (l1 - l0) / (2 * eps) # Check self.assertAllClose(g[0], g_num[0]) self.assertAllClose(g[1], g_num[1]) # # With plates # # Construct model K = D + 1 mu = np.random.randn(D) Lambda = random.covariance(D) X = Gaussian(mu, Lambda, plates=(K, )) V = random.covariance(D, size=(K, )) Y = Gaussian(X, V) Y.observe(np.random.randn(K, D)) Q = VB(Y, X) # Random initialization mu0 = np.random.randn(*(X.get_shape(0))) Lambda0 = random.covariance(D, size=X.plates) X.initialize_from_parameters(mu0, Lambda0) # Initial parameters phi0 = X.phi # Gradient rg = X.get_riemannian_gradient() g = X.get_gradient(rg) # Numerical gradient eps = 1e-6 p0 = X.get_parameters() l0 = Q.compute_lowerbound() g_num = [np.zeros(X.get_shape(0)), np.zeros(X.get_shape(1))] for k in range(K): for i in range(D): e = np.zeros(X.get_shape(0)) e[k, i] = eps p1 = p0[0] + e X.set_parameters([p1, p0[1]]) l1 = Q.compute_lowerbound() g_num[0][k, i] = (l1 - l0) / eps for i in range(D): for j in range(i + 1): e = np.zeros(X.get_shape(1)) e[k, i, j] += eps e[k, j, i] += eps p1 = p0[1] + e X.set_parameters([p0[0], p1]) l1 = Q.compute_lowerbound() g_num[1][k, i, j] = (l1 - l0) / (2 * eps) g_num[1][k, j, i] = (l1 - l0) / (2 * eps) # Check self.assertAllClose(g[0], g_num[0]) self.assertAllClose(g[1], g_num[1]) pass
def test_message_to_child(self): """ Test the message to child of GaussianGammaISO node. """ # Simple test mu = np.array([1, 2, 3]) Lambda = np.identity(3) a = 2 b = 10 X_alpha = GaussianGammaISO(mu, Lambda, a, b) u = X_alpha._message_to_child() self.assertEqual(len(u), 4) tau = np.array(a / b) self.assertAllClose(u[0], tau[..., None] * mu) self.assertAllClose( u[1], (linalg.inv(Lambda) + tau[..., None, None] * linalg.outer(mu, mu))) self.assertAllClose(u[2], tau) self.assertAllClose(u[3], -np.log(b) + special.psi(a)) # Test with unknown parents mu = Gaussian(np.arange(3), 10 * np.identity(3)) Lambda = Wishart(10, np.identity(3)) a = 2 b = Gamma(3, 15) X_alpha = GaussianGammaISO(mu, Lambda, a, b) u = X_alpha._message_to_child() (mu, mumu) = mu._message_to_child() Cov_mu = mumu - linalg.outer(mu, mu) (Lambda, _) = Lambda._message_to_child() (b, _) = b._message_to_child() (tau, logtau) = Gamma( a, b + 0.5 * np.sum(Lambda * Cov_mu))._message_to_child() self.assertAllClose(u[0], tau[..., None] * mu) self.assertAllClose( u[1], (linalg.inv(Lambda) + tau[..., None, None] * linalg.outer(mu, mu))) self.assertAllClose(u[2], tau) self.assertAllClose(u[3], logtau) # Test with plates mu = Gaussian(np.reshape(np.arange(3 * 4), (4, 3)), 10 * np.identity(3), plates=(4, )) Lambda = Wishart(10, np.identity(3)) a = 2 b = Gamma(3, 15) X_alpha = GaussianGammaISO(mu, Lambda, a, b, plates=(4, )) u = X_alpha._message_to_child() (mu, mumu) = mu._message_to_child() Cov_mu = mumu - linalg.outer(mu, mu) (Lambda, _) = Lambda._message_to_child() (b, _) = b._message_to_child() (tau, logtau) = Gamma( a, b + 0.5 * np.sum(Lambda * Cov_mu, axis=(-1, -2)))._message_to_child() self.assertAllClose(u[0] * np.ones((4, 1)), np.ones((4, 1)) * tau[..., None] * mu) self.assertAllClose( u[1] * np.ones((4, 1, 1)), np.ones((4, 1, 1)) * (linalg.inv(Lambda) + tau[..., None, None] * linalg.outer(mu, mu))) self.assertAllClose(u[2] * np.ones(4), np.ones(4) * tau) self.assertAllClose(u[3] * np.ones(4), np.ones(4) * logtau) pass
def test_lowerbound(self): """ Test the variational Bayesian lower bound term for GaussianARD. """ # Test vector formula with full noise covariance m = np.random.randn(2) alpha = np.random.rand(2) y = np.random.randn(2) X = GaussianARD(m, alpha, ndim=1) V = np.array([[3, 1], [1, 3]]) Y = Gaussian(X, V) Y.observe(y) X.update() Cov = np.linalg.inv(np.diag(alpha) + V) mu = np.dot(Cov, np.dot(V, y) + alpha * m) x2 = np.outer(mu, mu) + Cov logH_X = (+2 * 0.5 * (1 + np.log(2 * np.pi)) + 0.5 * np.log(np.linalg.det(Cov))) logp_X = ( -2 * 0.5 * np.log(2 * np.pi) + 0.5 * np.log(np.linalg.det(np.diag(alpha))) - 0.5 * np.sum( np.diag(alpha) * (x2 - np.outer(mu, m) - np.outer(m, mu) + np.outer(m, m)))) self.assertAllClose(logp_X + logH_X, X.lower_bound_contribution()) def check_lower_bound(shape_mu, shape_alpha, plates_mu=(), **kwargs): M = GaussianARD(np.ones(plates_mu + shape_mu), np.ones(plates_mu + shape_mu), shape=shape_mu, plates=plates_mu) X = GaussianARD(M, 2 * np.ones(shape_alpha), **kwargs) Y = GaussianARD(X, 3 * np.ones(X.get_shape(0)), **kwargs) Y.observe(4 * np.ones(Y.get_shape(0))) X.update() Cov = 1 / (2 + 3) mu = Cov * (2 * 1 + 3 * 4) x2 = mu**2 + Cov logH_X = (+0.5 * (1 + np.log(2 * np.pi)) + 0.5 * np.log(Cov)) logp_X = (-0.5 * np.log(2 * np.pi) + 0.5 * np.log(2) - 0.5 * 2 * (x2 - 2 * mu * 1 + 1**2 + 1)) r = np.prod(X.get_shape(0)) self.assertAllClose(r * (logp_X + logH_X), X.lower_bound_contribution()) # Test scalar formula check_lower_bound((), ()) # Test array formula check_lower_bound((2, 3), (2, 3)) # Test dim-broadcasting of mu check_lower_bound((3, 1), (2, 3, 4)) # Test dim-broadcasting of alpha check_lower_bound((2, 3, 4), (3, 1)) # Test dim-broadcasting of mu and alpha check_lower_bound((3, 1), (3, 1), shape=(2, 3, 4)) # Test dim-broadcasting of mu with plates check_lower_bound((), (), plates_mu=(), shape=(), plates=(5, )) # BUG: Scalar parents for array variable caused einsum error check_lower_bound((), (), shape=(3, )) # BUG: Log-det was summed over plates check_lower_bound((), (), shape=(3, ), plates=(4, )) pass
def test_messages(self): D = 2 M = 3 np.random.seed(42) def check(mu, Lambda, alpha, beta, ndim): X = GaussianGamma( mu, ( Lambda if isinstance(Lambda._moments, WishartMoments) else Lambda.as_wishart(ndim=ndim) ), alpha, beta, ndim=ndim ) self.assert_moments( X, postprocess=lambda u: [ u[0], u[1] + linalg.transpose(u[1], ndim=ndim), u[2], u[3] ], rtol=1e-5, atol=1e-6, eps=1e-8 ) X.observe( ( np.random.randn(*(X.plates + X.dims[0])), np.random.rand(*X.plates) ) ) self.assert_message_to_parent(X, mu) self.assert_message_to_parent( X, Lambda, postprocess=lambda m: [ m[0] + linalg.transpose(m[0], ndim=ndim), m[1], ] ) self.assert_message_to_parent(X, beta) check( Gaussian(np.random.randn(M, D), random.covariance(D), plates=(M,)), Wishart(D + np.random.rand(M), random.covariance(D), plates=(M,)), np.random.rand(M), Gamma(np.random.rand(M), np.random.rand(M), plates=(M,)), ndim=1 ) check( GaussianARD(np.random.randn(M, D), np.random.rand(M, D), ndim=0), Gamma(np.random.rand(M, D), np.random.rand(M, D)), np.random.rand(M, D), Gamma(np.random.rand(M, D), np.random.rand(M, D)), ndim=0 ) pass
#( 5 : Parameter expansion 収束が遅い時) # from bayespy.inference.vmp import transformations # rotX = transformations.RotateGaussianARD(X) # rotC = transformations.RotateGaussianARD(C, alpha) # R = transformations.RotationOptimizer(rotC, rotX, D) # R.rotate() # alpha.initialize_from_prior() # C.initialize_from_prior() # X.initialize_from_parameters(np.random.randn(1, 100, D), 10) # tau.initialize_from_prior() # Q = VB(Y, C, X, alpha, tau) # Q.callback = R.rotate # Q.update(repeat=1000, tol=1e-6) # -----Examining the results----- # Plotting the results bpplt.pyplot.figure() bpplt.pdf(Q['tau'], np.linspace(60, 140, num=100)) V = Gaussian([3, 5], [[4, 2], [2, 5]]) bpplt.pyplot.figure() bpplt.contour(V, np.linspace(1, 5, num=100), np.linspace(3, 7, num=100)) bpplt.pyplot.figure() bpplt.hinton(C) bpplt.pyplot.figure() bpplt.plot(X, axis=-2) bpplt.pyplot.show()
def test_init(self): """ Test the creation of GaussianGammaISO node """ # Simple construction X_alpha = GaussianGammaISO([1, 2, 3], np.identity(3), 2, 10) self.assertEqual(X_alpha.plates, ()) self.assertEqual(X_alpha.dims, ((3, ), (3, 3), (), ())) # Plates X_alpha = GaussianGammaISO([1, 2, 3], np.identity(3), 2, 10, plates=(4, )) self.assertEqual(X_alpha.plates, (4, )) self.assertEqual(X_alpha.dims, ((3, ), (3, 3), (), ())) # Plates in mu X_alpha = GaussianGammaISO(np.ones((4, 3)), np.identity(3), 2, 10) self.assertEqual(X_alpha.plates, (4, )) self.assertEqual(X_alpha.dims, ((3, ), (3, 3), (), ())) # Plates in Lambda X_alpha = GaussianGammaISO(np.ones(3), np.ones((4, 3, 3)) * np.identity(3), 2, 10) self.assertEqual(X_alpha.plates, (4, )) self.assertEqual(X_alpha.dims, ((3, ), (3, 3), (), ())) # Plates in a X_alpha = GaussianGammaISO(np.ones(3), np.identity(3), np.ones(4), 10) self.assertEqual(X_alpha.plates, (4, )) self.assertEqual(X_alpha.dims, ((3, ), (3, 3), (), ())) # Plates in Lambda X_alpha = GaussianGammaISO(np.ones(3), np.identity(3), 2, np.ones(4)) self.assertEqual(X_alpha.plates, (4, )) self.assertEqual(X_alpha.dims, ((3, ), (3, 3), (), ())) # Inconsistent plates self.assertRaises(ValueError, GaussianGammaISO, np.ones((4, 3)), np.identity(3), 2, 10, plates=()) # Inconsistent plates self.assertRaises(ValueError, GaussianGammaISO, np.ones((4, 3)), np.identity(3), 2, 10, plates=(5, )) # Unknown parameters mu = Gaussian(np.zeros(3), np.identity(3)) Lambda = Wishart(10, np.identity(3)) b = Gamma(1, 1) X_alpha = GaussianGammaISO(mu, Lambda, 2, b) self.assertEqual(X_alpha.plates, ()) self.assertEqual(X_alpha.dims, ((3, ), (3, 3), (), ())) # mu is Gaussian-gamma mu_tau = GaussianGammaISO(np.ones(3), np.identity(3), 5, 5) X_alpha = GaussianGammaISO(mu_tau, np.identity(3), 5, 5) self.assertEqual(X_alpha.plates, ()) self.assertEqual(X_alpha.dims, ((3, ), (3, 3), (), ())) pass
def test_riemannian_gradient(self): """Test Riemannian gradient of a Gaussian node.""" D = 3 # # Without observations # # Construct model mu = np.random.randn(D) Lambda = random.covariance(D) X = Gaussian(mu, Lambda) # Random initialization mu0 = np.random.randn(D) Lambda0 = random.covariance(D) X.initialize_from_parameters(mu0, Lambda0) # Initial parameters phi0 = X.phi # Gradient g = X.get_riemannian_gradient() # Parameters after VB-EM update X.update() phi1 = X.phi # Check self.assertAllClose(g[0], phi1[0] - phi0[0]) self.assertAllClose(g[1], phi1[1] - phi0[1]) # TODO/FIXME: Actually, gradient should be zero because cost function # is zero without observations! Use the mask! # # With observations # # Construct model mu = np.random.randn(D) Lambda = random.covariance(D) X = Gaussian(mu, Lambda) V = random.covariance(D) Y = Gaussian(X, V) Y.observe(np.random.randn(D)) # Random initialization mu0 = np.random.randn(D) Lambda0 = random.covariance(D) X.initialize_from_parameters(mu0, Lambda0) # Initial parameters phi0 = X.phi # Gradient g = X.get_riemannian_gradient() # Parameters after VB-EM update X.update() phi1 = X.phi # Check self.assertAllClose(g[0], phi1[0] - phi0[0]) self.assertAllClose(g[1], phi1[1] - phi0[1]) pass
def test_gradient(self): """Test standard gradient of a Gaussian node.""" D = 3 np.random.seed(42) # # Without observations # # Construct model mu = np.random.randn(D) Lambda = random.covariance(D) X = Gaussian(mu, Lambda) # Random initialization mu0 = np.random.randn(D) Lambda0 = random.covariance(D) X.initialize_from_parameters(mu0, Lambda0) Q = VB(X) # Initial parameters phi0 = X.phi # Gradient rg = X.get_riemannian_gradient() g = X.get_gradient(rg) # Numerical gradient eps = 1e-6 p0 = X.get_parameters() l0 = Q.compute_lowerbound(ignore_masked=False) g_num = [np.zeros(D), np.zeros((D,D))] for i in range(D): e = np.zeros(D) e[i] = eps p1 = p0[0] + e X.set_parameters([p1, p0[1]]) l1 = Q.compute_lowerbound(ignore_masked=False) g_num[0][i] = (l1 - l0) / eps for i in range(D): for j in range(i+1): e = np.zeros((D,D)) e[i,j] += eps e[j,i] += eps p1 = p0[1] + e X.set_parameters([p0[0], p1]) l1 = Q.compute_lowerbound(ignore_masked=False) g_num[1][i,j] = (l1 - l0) / (2*eps) g_num[1][j,i] = (l1 - l0) / (2*eps) # Check self.assertAllClose(g[0], g_num[0]) self.assertAllClose(g[1], g_num[1]) # # With observations # # Construct model mu = np.random.randn(D) Lambda = random.covariance(D) X = Gaussian(mu, Lambda) # Random initialization mu0 = np.random.randn(D) Lambda0 = random.covariance(D) X.initialize_from_parameters(mu0, Lambda0) V = random.covariance(D) Y = Gaussian(X, V) Y.observe(np.random.randn(D)) Q = VB(Y, X) # Initial parameters phi0 = X.phi # Gradient rg = X.get_riemannian_gradient() g = X.get_gradient(rg) # Numerical gradient eps = 1e-6 p0 = X.get_parameters() l0 = Q.compute_lowerbound() g_num = [np.zeros(D), np.zeros((D,D))] for i in range(D): e = np.zeros(D) e[i] = eps p1 = p0[0] + e X.set_parameters([p1, p0[1]]) l1 = Q.compute_lowerbound() g_num[0][i] = (l1 - l0) / eps for i in range(D): for j in range(i+1): e = np.zeros((D,D)) e[i,j] += eps e[j,i] += eps p1 = p0[1] + e X.set_parameters([p0[0], p1]) l1 = Q.compute_lowerbound() g_num[1][i,j] = (l1 - l0) / (2*eps) g_num[1][j,i] = (l1 - l0) / (2*eps) # Check self.assertAllClose(g[0], g_num[0]) self.assertAllClose(g[1], g_num[1]) # # With plates # # Construct model K = D+1 mu = np.random.randn(D) Lambda = random.covariance(D) X = Gaussian(mu, Lambda, plates=(K,)) V = random.covariance(D, size=(K,)) Y = Gaussian(X, V) Y.observe(np.random.randn(K,D)) Q = VB(Y, X) # Random initialization mu0 = np.random.randn(*(X.get_shape(0))) Lambda0 = random.covariance(D, size=X.plates) X.initialize_from_parameters(mu0, Lambda0) # Initial parameters phi0 = X.phi # Gradient rg = X.get_riemannian_gradient() g = X.get_gradient(rg) # Numerical gradient eps = 1e-6 p0 = X.get_parameters() l0 = Q.compute_lowerbound() g_num = [np.zeros(X.get_shape(0)), np.zeros(X.get_shape(1))] for k in range(K): for i in range(D): e = np.zeros(X.get_shape(0)) e[k,i] = eps p1 = p0[0] + e X.set_parameters([p1, p0[1]]) l1 = Q.compute_lowerbound() g_num[0][k,i] = (l1 - l0) / eps for i in range(D): for j in range(i+1): e = np.zeros(X.get_shape(1)) e[k,i,j] += eps e[k,j,i] += eps p1 = p0[1] + e X.set_parameters([p0[0], p1]) l1 = Q.compute_lowerbound() g_num[1][k,i,j] = (l1 - l0) / (2*eps) g_num[1][k,j,i] = (l1 - l0) / (2*eps) # Check self.assertAllClose(g[0], g_num[0]) self.assertAllClose(g[1], g_num[1]) pass
from bayespy.nodes import Dirichlet, Categorical from bayespy.nodes import Gaussian, Wishart from bayespy.nodes import Mixture from bayespy.inference import VB y0 = np.random.multivariate_normal([0, 0], [[2, 0], [0, 0.1]], size=50) y1 = np.random.multivariate_normal([0, 0], [[0.1, 0], [0, 2]], size=50) y2 = np.random.multivariate_normal([2, 2], [[2, -1.5], [-1.5, 2]], size=50) y3 = np.random.multivariate_normal([-2, -2], [[0.5, 0], [0, 0.5]], size=50) y = np.vstack([y0, y1, y2, y3]) N = 200 D = 2 K = 10 alpha = Dirichlet(1e-5*np.ones(K), name='alpha') Z = Categorical(alpha, plates=(N,),name='z') mu = Gaussian(np.zeros(D),1e-5*np.identity(D),plates=(K,),name='mu') Lambda = Wishart(D,1e-5*np.identity(D),plates=(K,),name='Lambda') Y = Mixture(Z, Gaussian, mu, Lambda, name='Y') Z.initialize_from_random() Q = VB(Y, mu, Lambda, Z, alpha) Y.observe(y) Q.update(repeat=1000) bpplt.gaussian_mixture_2d(Y, alpha=alpha, scale=2)
action_nodes = {} O_D = int(np.prod(env.observation_space.shape) ) # will only work with low dimensions. Need filter if isinstance(env.action_space, gym.spaces.discrete.Discrete): A_D = env.action_space.n #mu = np.zeros(D) #lambda_ = 1e-5*np.identity(D) for obs in observations: trial = obs[0] o_n = obs[1:O_D + 1] n = obs[-1] if n in obs_nodes: X = obs_nodes[n] else: mu = Gaussian(np.zeros(O_D), 1e-5 * np.identity(O_D)) lambda_ = Wishart(O_D, np.identity(O_D)) O_n = Gaussian(mu, lambda_, name=f"O_{n}") obs_nodes[n] = O_n X.observe(o_n) for action in actions: trial, agent, a_n, n = action if a_n < 0: #action reset continue if n in action_nodes: A = action_nodes[n] else: category_prob = Dirichlet(1e-3 * np.ones(A_D), name='category_prob') #FIXME: Unconfirmed! A = Categorical(category_prob)