def test_mask_to_parent(self): """ Test the mask handling in Mixture node """ K = 3 Z = Categorical(np.ones(K)/K, plates=(4,5)) Mu = GaussianARD(0, 1, shape=(2,), plates=(4,K,5)) Alpha = Gamma(1, 1, plates=(4,K,5,2)) X = Mixture(Z, GaussianARD, Mu, Alpha, cluster_plate=-2) Y = GaussianARD(X, 1) mask = np.reshape((np.mod(np.arange(4*5), 2) == 0), (4,5)) Y.observe(np.ones((4,5,2)), mask=mask) self.assertArrayEqual(Z.mask, mask) self.assertArrayEqual(Mu.mask, mask[:,None,:]) self.assertArrayEqual(Alpha.mask, mask[:,None,:,None]) pass
def check_lower_bound(shape_mu, shape_alpha, plates_mu=(), **kwargs): M = GaussianARD(np.ones(plates_mu + shape_mu), np.ones(plates_mu + shape_mu), shape=shape_mu, plates=plates_mu) if not ('ndim' in kwargs or 'shape' in kwargs): kwargs['ndim'] = len(shape_mu) X = GaussianARD(M, 2*np.ones(shape_alpha), **kwargs) Y = GaussianARD(X, 3*np.ones(X.get_shape(0)), **kwargs) Y.observe(4*np.ones(Y.get_shape(0))) X.update() Cov = 1/(2+3) mu = Cov * (2*1 + 3*4) x2 = mu**2 + Cov logH_X = (+ 0.5*(1+np.log(2*np.pi)) + 0.5*np.log(Cov)) logp_X = (- 0.5*np.log(2*np.pi) + 0.5*np.log(2) - 0.5*2*(x2 - 2*mu*1 + 1**2+1)) r = np.prod(X.get_shape(0)) self.assertAllClose(r * (logp_X + logH_X), X.lower_bound_contribution())
def _setup_linear_regression(): """ Setup code for the pdf and contour tests. This code is from http://www.bayespy.org/examples/regression.html """ np.random.seed(1) k = 2 # slope c = 5 # bias s = 2 # noise standard deviation x = np.arange(10) y = k*x + c + s*np.random.randn(10) X = np.vstack([x, np.ones(len(x))]).T B = GaussianARD(0, 1e-6, shape=(2,)) F = SumMultiply('i,i', B, X) tau = Gamma(1e-3, 1e-3) Y = GaussianARD(F, tau) Y.observe(y) Q = VB(Y, B, tau) Q.update(repeat=1000) xh = np.linspace(-5, 15, 100) Xh = np.vstack([xh, np.ones(len(xh))]).T Fh = SumMultiply('i,i', B, Xh) return locals()
def run(): k = 2 c = 5 s = 2 x = np.arange(10) y = k * x + c + s * np.random.randn(10) X=np.vstack([x,np.ones(len(x))]).T B = GaussianARD(0, 1e-6, shape=(2,)) F = SumMultiply('i,i', B, X) tau = Gamma(1e-3, 1e-3) Y = GaussianARD(F, tau) Y.observe(y) from bayespy.inference import VB Q = VB(Y, B, tau) Q.update(repeat=1000) xh = np.linspace(-5, 15, 100) Xh = np.vstack([xh, np.ones(len(xh))]).T Fh = SumMultiply('i,i', B, Xh) bpplt.pyplot.figure() bpplt.plot(Fh, x=xh, scale=2) bpplt.plot(y, x=x, color='r', marker='x', linestyle='None') bpplt.plot(k*xh+c, x=xh, color='r'); bpplt.pyplot.show()
def test_riemannian_gradient(self): """Test Riemannian gradient of a Gamma node.""" # # Without observations # # Construct model a = np.random.rand() b = np.random.rand() tau = Gamma(a, b) # Random initialization tau.initialize_from_parameters(np.random.rand(), np.random.rand()) # Initial parameters phi0 = tau.phi # Gradient g = tau.get_riemannian_gradient() # Parameters after VB-EM update tau.update() phi1 = tau.phi # Check self.assertAllClose(g[0], phi1[0] - phi0[0]) self.assertAllClose(g[1], phi1[1] - phi0[1]) # # With observations # # Construct model a = np.random.rand() b = np.random.rand() tau = Gamma(a, b) mu = np.random.randn() Y = GaussianARD(mu, tau) Y.observe(np.random.randn()) # Random initialization tau.initialize_from_parameters(np.random.rand(), np.random.rand()) # Initial parameters phi0 = tau.phi # Gradient g = tau.get_riemannian_gradient() # Parameters after VB-EM update tau.update() phi1 = tau.phi # Check self.assertAllClose(g[0], phi1[0] - phi0[0]) self.assertAllClose(g[1], phi1[1] - phi0[1]) pass
def test_lower_bound_contribution(self): a = 15 b = 21 y = 4 x = Gamma(a, b) x.observe(y) testing.assert_allclose( x.lower_bound_contribution(), ( a * np.log(b) + (a - 1) * np.log(y) - b * y - special.gammaln(a) ) ) # Just one latent node so we'll get exact marginal likelihood # # p(Y) = p(Y,X)/p(X|Y) = p(Y|X) * p(X) / p(X|Y) a = 2.3 b = 4.1 x = 1.9 y = 4.8 tau = Gamma(a, b) Y = GaussianARD(x, tau) Y.observe(y) mu = x nu = 2 * a s2 = b / a a_post = a + 0.5 b_post = b + 0.5*(y - x)**2 tau.update() testing.assert_allclose( [-b_post, a_post], tau.phi ) testing.assert_allclose( Y.lower_bound_contribution() + tau.lower_bound_contribution(), # + tau.g, ( special.gammaln((nu+1)/2) - special.gammaln(nu/2) - 0.5 * np.log(nu) - 0.5 * np.log(np.pi) - 0.5 * np.log(s2) - 0.5 * (nu + 1) * np.log( 1 + (y - mu)**2 / (nu * s2) ) ) ) return
def test_message_to_parents(self): """ Check gradient passed to inputs parent node """ D = 3 X = Gaussian(np.random.randn(D), random.covariance(D)) a = Gamma(np.random.rand(D), np.random.rand(D)) Y = GaussianARD(X, a) Y.observe(np.random.randn(D)) self.assert_message_to_parent(Y, X) self.assert_message_to_parent(Y, a) pass
def check(shape, plates, einsum_x, einsum_xx, axis=-1): # TODO/FIXME: Improve by having non-diagonal precision/covariance # parameter for the Gaussian X D = shape[axis] X = GaussianARD(np.random.randn(*(plates+shape)), np.random.rand(*(plates+shape)), shape=shape, plates=plates) (x, xx) = X.get_moments() R = np.random.randn(D,D) X.rotate(R, axis=axis) (rx, rxxr) = X.get_moments() self.assertAllClose(rx, np.einsum(einsum_x, R, x)) self.assertAllClose(rxxr, np.einsum(einsum_xx, R, xx, R)) pass
def pca(): np.random.seed(41) M = 10 N = 3000 D = 5 # Construct the PCA model alpha = Gamma(1e-3, 1e-3, plates=(D,), name='alpha') W = GaussianARD(0, alpha, plates=(M,1), shape=(D,), name='W') X = GaussianARD(0, 1, plates=(1,N), shape=(D,), name='X') tau = Gamma(1e-3, 1e-3, name='tau') W.initialize_from_random() F = SumMultiply('d,d->', W, X) Y = GaussianARD(F, tau, name='Y') # Observe data data = np.sum(np.random.randn(M,1,D-1) * np.random.randn(1,N,D-1), axis=-1) + 1e-1 * np.random.randn(M,N) Y.observe(data) # Initialize VB engine Q = VB(Y, X, W, alpha, tau) # Take one update step (so phi is ok) Q.update(repeat=1) Q.save() # Run VB-EM Q.update(repeat=200) bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-') # Restore the state Q.load() # Run Riemannian conjugate gradient #Q.optimize(X, alpha, maxiter=100, collapsed=[W, tau]) Q.optimize(W, tau, maxiter=100, collapsed=[X, alpha]) bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:') bpplt.pyplot.show()
def test_message_to_parent(self): """ Test the message to parents of Mixture node. """ K = 3 # Broadcasting the moments on the cluster axis Mu = GaussianARD(2, 1, ndim=0, plates=(K,)) (mu, mumu) = Mu._message_to_child() Alpha = Gamma(3, 1, plates=(K,)) (alpha, logalpha) = Alpha._message_to_child() z = Categorical(np.ones(K)/K) X = Mixture(z, GaussianARD, Mu, Alpha) tau = 4 Y = GaussianARD(X, tau) y = 5 Y.observe(y) (x, xx) = X._message_to_child() m = X._message_to_parent(0) self.assertAllClose(m[0], random.gaussian_logpdf(xx*alpha, x*alpha*mu, mumu*alpha, logalpha, 0)) m = X._message_to_parent(1) self.assertAllClose(m[0], 1/K * (alpha*x) * np.ones(3)) self.assertAllClose(m[1], -0.5 * 1/K * alpha * np.ones(3)) # Some parameters do not have cluster plate axis Mu = GaussianARD(2, 1, ndim=0, plates=(K,)) (mu, mumu) = Mu._message_to_child() Alpha = Gamma(3, 1) # Note: no cluster plate axis! (alpha, logalpha) = Alpha._message_to_child() z = Categorical(np.ones(K)/K) X = Mixture(z, GaussianARD, Mu, Alpha) tau = 4 Y = GaussianARD(X, tau) y = 5 Y.observe(y) (x, xx) = X._message_to_child() m = X._message_to_parent(0) self.assertAllClose(m[0], random.gaussian_logpdf(xx*alpha, x*alpha*mu, mumu*alpha, logalpha, 0)) m = X._message_to_parent(1) self.assertAllClose(m[0], 1/K * (alpha*x) * np.ones(3)) self.assertAllClose(m[1], -0.5 * 1/K * alpha * np.ones(3)) # Cluster assignments do not have as many plate axes as parameters. M = 2 Mu = GaussianARD(2, 1, ndim=0, plates=(K,M)) (mu, mumu) = Mu._message_to_child() Alpha = Gamma(3, 1, plates=(K,M)) (alpha, logalpha) = Alpha._message_to_child() z = Categorical(np.ones(K)/K) X = Mixture(z, GaussianARD, Mu, Alpha, cluster_plate=-2) tau = 4 Y = GaussianARD(X, tau) y = 5 * np.ones(M) Y.observe(y) (x, xx) = X._message_to_child() m = X._message_to_parent(0) self.assertAllClose(m[0]*np.ones(K), np.sum(random.gaussian_logpdf(xx*alpha, x*alpha*mu, mumu*alpha, logalpha, 0) * np.ones((K,M)), axis=-1)) m = X._message_to_parent(1) self.assertAllClose(m[0] * np.ones((K,M)), 1/K * (alpha*x) * np.ones((K,M))) self.assertAllClose(m[1] * np.ones((K,M)), -0.5 * 1/K * alpha * np.ones((K,M))) pass
def test_message_to_parent(self): """ Test the message to parents of Mixture node. """ K = 3 # Broadcasting the moments on the cluster axis Mu = GaussianARD(2, 1, ndim=0, plates=(K,)) (mu, mumu) = Mu._message_to_child() Alpha = Gamma(3, 1, plates=(K,)) (alpha, logalpha) = Alpha._message_to_child() z = Categorical(np.ones(K)/K) X = Mixture(z, GaussianARD, Mu, Alpha) tau = 4 Y = GaussianARD(X, tau) y = 5 Y.observe(y) (x, xx) = X._message_to_child() m = z._message_from_children() self.assertAllClose(m[0] * np.ones(K), random.gaussian_logpdf(xx*alpha, x*alpha*mu, mumu*alpha, logalpha, 0) * np.ones(K)) m = Mu._message_from_children() self.assertAllClose(m[0], 1/K * (alpha*x) * np.ones(3)) self.assertAllClose(m[1], -0.5 * 1/K * alpha * np.ones(3)) # Some parameters do not have cluster plate axis Mu = GaussianARD(2, 1, ndim=0, plates=(K,)) (mu, mumu) = Mu._message_to_child() Alpha = Gamma(3, 1) # Note: no cluster plate axis! (alpha, logalpha) = Alpha._message_to_child() z = Categorical(np.ones(K)/K) X = Mixture(z, GaussianARD, Mu, Alpha) tau = 4 Y = GaussianARD(X, tau) y = 5 Y.observe(y) (x, xx) = X._message_to_child() m = z._message_from_children() self.assertAllClose(m[0] * np.ones(K), random.gaussian_logpdf(xx*alpha, x*alpha*mu, mumu*alpha, logalpha, 0) * np.ones(K)) m = Mu._message_from_children() self.assertAllClose(m[0], 1/K * (alpha*x) * np.ones(3)) self.assertAllClose(m[1], -0.5 * 1/K * alpha * np.ones(3)) # Cluster assignments do not have as many plate axes as parameters. M = 2 Mu = GaussianARD(2, 1, ndim=0, plates=(K,M)) (mu, mumu) = Mu._message_to_child() Alpha = Gamma(3, 1, plates=(K,M)) (alpha, logalpha) = Alpha._message_to_child() z = Categorical(np.ones(K)/K) X = Mixture(z, GaussianARD, Mu, Alpha, cluster_plate=-2) tau = 4 Y = GaussianARD(X, tau) y = 5 * np.ones(M) Y.observe(y) (x, xx) = X._message_to_child() m = z._message_from_children() self.assertAllClose(m[0]*np.ones(K), np.sum(random.gaussian_logpdf(xx*alpha, x*alpha*mu, mumu*alpha, logalpha, 0) * np.ones((K,M)), axis=-1)) m = Mu._message_from_children() self.assertAllClose(m[0] * np.ones((K,M)), 1/K * (alpha*x) * np.ones((K,M))) self.assertAllClose(m[1] * np.ones((K,M)), -0.5 * 1/K * alpha * np.ones((K,M))) # Mixed distribution broadcasts g # This tests for a found bug. The bug caused an error. Z = Categorical([0.3, 0.5, 0.2]) X = Mixture(Z, Categorical, [[0.2,0.8], [0.1,0.9], [0.3,0.7]]) m = Z._message_from_children() # # Test nested mixtures # t1 = [1, 1, 0, 3, 3] t2 = [2] p = Dirichlet([1, 1], plates=(4, 3)) X = Mixture(t1, Mixture, t2, Categorical, p) X.observe([1, 1, 0, 0, 0]) p.update() self.assertAllClose( p.phi[0], [ [[1, 1], [1, 1], [2, 1]], [[1, 1], [1, 1], [1, 3]], [[1, 1], [1, 1], [1, 1]], [[1, 1], [1, 1], [3, 1]], ] ) # Test sample plates in nested mixtures t1 = Categorical([0.3, 0.7], plates=(5,)) t2 = [[1], [1], [0], [3], [3]] t3 = 2 p = Dirichlet([1, 1], plates=(2, 4, 3)) X = Mixture(t1, Mixture, t2, Mixture, t3, Categorical, p) X.observe([1, 1, 0, 0, 0]) p.update() self.assertAllClose( p.phi[0], [ [ [[1, 1], [1, 1], [1.3, 1]], [[1, 1], [1, 1], [1, 1.6]], [[1, 1], [1, 1], [1, 1]], [[1, 1], [1, 1], [1.6, 1]], ], [ [[1, 1], [1, 1], [1.7, 1]], [[1, 1], [1, 1], [1, 2.4]], [[1, 1], [1, 1], [1, 1]], [[1, 1], [1, 1], [2.4, 1]], ] ] ) # Check that Gate and nested Mixture are equal t1 = Categorical([0.3, 0.7], plates=(5,)) t2 = Categorical([0.1, 0.3, 0.6], plates=(5, 1)) p = Dirichlet([1, 2, 3, 4], plates=(2, 3)) X = Mixture(t1, Mixture, t2, Categorical, p) X.observe([3, 3, 1, 2, 2]) t1_msg = t1._message_from_children() t2_msg = t2._message_from_children() p_msg = p._message_from_children() t1 = Categorical([0.3, 0.7], plates=(5,)) t2 = Categorical([0.1, 0.3, 0.6], plates=(5, 1)) p = Dirichlet([1, 2, 3, 4], plates=(2, 3)) X = Categorical(Gate(t1, Gate(t2, p))) X.observe([3, 3, 1, 2, 2]) t1_msg2 = t1._message_from_children() t2_msg2 = t2._message_from_children() p_msg2 = p._message_from_children() self.assertAllClose(t1_msg[0], t1_msg2[0]) self.assertAllClose(t2_msg[0], t2_msg2[0]) self.assertAllClose(p_msg[0], p_msg2[0]) pass
import numpy numpy.random.seed(1) M = 20 N = 100 import numpy as np x = np.random.randn(N, 2) w = np.random.randn(M, 2) f = np.einsum('ik,jk->ij', w, x) y = f + 0.1*np.random.randn(M, N) D = 10 from bayespy.nodes import GaussianARD, Gamma, SumMultiply X = GaussianARD(0, 1, plates=(1,N), shape=(D,)) alpha = Gamma(1e-5, 1e-5, plates=(D,)) C = GaussianARD(0, alpha, plates=(M,1), shape=(D,)) F = SumMultiply('d,d->', X, C) tau = Gamma(1e-5, 1e-5) Y = GaussianARD(F, tau) Y.observe(y) from bayespy.inference import VB Q = VB(Y, X, C, alpha, tau) C.initialize_from_random() from bayespy.inference.vmp.transformations import RotateGaussianARD rot_X = RotateGaussianARD(X) rot_C = RotateGaussianARD(C, alpha) from bayespy.inference.vmp.transformations import RotationOptimizer R = RotationOptimizer(rot_X, rot_C, D) Q.set_callback(R.rotate) Q.update(repeat=1000) import bayespy.plot as bpplt bpplt.hinton(C)
def test_message_to_parent(self): """ Test the message to parents of Gate node. """ # Unobserved and broadcasting Z = 2 X = GaussianARD(0, 1, shape=(), plates=(3,)) F = Gate(Z, X) Y = GaussianARD(F, 1) m = F._message_to_parent(0) self.assertEqual(len(m), 1) self.assertAllClose(m[0], 0*np.ones(3)) m = F._message_to_parent(1) self.assertEqual(len(m), 2) self.assertAllClose(m[0]*np.ones(3), [0, 0, 0]) self.assertAllClose(m[1]*np.ones(3), [0, 0, 0]) # Gating scalar node Z = 2 X = GaussianARD([1,2,3], 1, shape=(), plates=(3,)) F = Gate(Z, X) Y = GaussianARD(F, 1) Y.observe(10) m = F._message_to_parent(0) self.assertAllClose(m[0], [10*1-0.5*2, 10*2-0.5*5, 10*3-0.5*10]) m = F._message_to_parent(1) self.assertAllClose(m[0], [0, 0, 10]) self.assertAllClose(m[1], [0, 0, -0.5]) # Fixed X Z = 2 X = [1,2,3] F = Gate(Z, X, moments=GaussianMoments(0)) Y = GaussianARD(F, 1) Y.observe(10) m = F._message_to_parent(0) self.assertAllClose(m[0], [10*1-0.5*1, 10*2-0.5*4, 10*3-0.5*9]) m = F._message_to_parent(1) self.assertAllClose(m[0], [0, 0, 10]) self.assertAllClose(m[1], [0, 0, -0.5]) # Uncertain gating Z = Categorical([0.2, 0.3, 0.5]) X = GaussianARD([1,2,3], 1, shape=(), plates=(3,)) F = Gate(Z, X) Y = GaussianARD(F, 1) Y.observe(10) m = F._message_to_parent(0) self.assertAllClose(m[0], [10*1-0.5*2, 10*2-0.5*5, 10*3-0.5*10]) m = F._message_to_parent(1) self.assertAllClose(m[0], [0.2*10, 0.3*10, 0.5*10]) self.assertAllClose(m[1], [-0.5*0.2, -0.5*0.3, -0.5*0.5]) # Plates in Z Z = [2, 0] X = GaussianARD([1,2,3], 1, shape=(), plates=(3,)) F = Gate(Z, X) Y = GaussianARD(F, 1) Y.observe([10, 20]) m = F._message_to_parent(0) self.assertAllClose(m[0], [[10*1-0.5*2, 10*2-0.5*5, 10*3-0.5*10], [20*1-0.5*2, 20*2-0.5*5, 20*3-0.5*10]]) m = F._message_to_parent(1) self.assertAllClose(m[0], [20, 0, 10]) self.assertAllClose(m[1], [-0.5, 0, -0.5]) # Plates in X Z = 2 X = GaussianARD([[1,2,3], [4,5,6]], 1, shape=(), plates=(2,3,)) F = Gate(Z, X) Y = GaussianARD(F, 1) Y.observe([10, 20]) m = F._message_to_parent(0) self.assertAllClose(m[0], [10*1-0.5*2 + 20*4-0.5*17, 10*2-0.5*5 + 20*5-0.5*26, 10*3-0.5*10 + 20*6-0.5*37]) m = F._message_to_parent(1) self.assertAllClose(m[0], [[0, 0, 10], [0, 0, 20]]) self.assertAllClose(m[1]*np.ones((2,3)), [[0, 0, -0.5], [0, 0, -0.5]]) # Gating non-default plate Z = 2 X = GaussianARD([[1],[2],[3]], 1, shape=(), plates=(3,1)) F = Gate(Z, X, gated_plate=-2) Y = GaussianARD(F, 1) Y.observe([10]) m = F._message_to_parent(0) self.assertAllClose(m[0], [10*1-0.5*2, 10*2-0.5*5, 10*3-0.5*10]) m = F._message_to_parent(1) self.assertAllClose(m[0], [[0], [0], [10]]) self.assertAllClose(m[1], [[0], [0], [-0.5]]) # Gating non-scalar node Z = 2 X = GaussianARD([[1,4],[2,5],[3,6]], 1, shape=(2,), plates=(3,)) F = Gate(Z, X) Y = GaussianARD(F, 1) Y.observe([10,20]) m = F._message_to_parent(0) self.assertAllClose(m[0], [10*1-0.5*2 + 20*4-0.5*17, 10*2-0.5*5 + 20*5-0.5*26, 10*3-0.5*10 + 20*6-0.5*37]) m = F._message_to_parent(1) I = np.identity(2) self.assertAllClose(m[0], [[0,0], [0,0], [10,20]]) self.assertAllClose(m[1], [0*I, 0*I, -0.5*I]) # Broadcasting the moments on the cluster axis Z = 2 X = GaussianARD(2, 1, shape=(), plates=(3,)) F = Gate(Z, X) Y = GaussianARD(F, 1) Y.observe(10) m = F._message_to_parent(0) self.assertAllClose(m[0], [10*2-0.5*5, 10*2-0.5*5, 10*2-0.5*5]) m = F._message_to_parent(1) self.assertAllClose(m[0], [0, 0, 10]) self.assertAllClose(m[1], [0, 0, -0.5]) pass
def test_message_to_child(self): """ Test the message to child of Gate node. """ # Gating scalar node Z = 2 X = GaussianARD([1, 2, 3], 1, shape=(), plates=(3, )) Y = Gate(Z, X) u = Y._message_to_child() self.assertEqual(len(u), 2) self.assertAllClose(u[0], 3) self.assertAllClose(u[1], 3**2 + 1) # Fixed X Z = 2 X = [1, 2, 3] Y = Gate(Z, X, moments=GaussianMoments(())) u = Y._message_to_child() self.assertEqual(len(u), 2) self.assertAllClose(u[0], 3) self.assertAllClose(u[1], 3**2) # Uncertain gating Z = Categorical([0.2, 0.3, 0.5]) X = GaussianARD([1, 2, 3], 1, shape=(), plates=(3, )) Y = Gate(Z, X) u = Y._message_to_child() self.assertAllClose(u[0], 0.2 * 1 + 0.3 * 2 + 0.5 * 3) self.assertAllClose(u[1], 0.2 * 2 + 0.3 * 5 + 0.5 * 10) # Plates in Z Z = [2, 0] X = GaussianARD([1, 2, 3], 1, shape=(), plates=(3, )) Y = Gate(Z, X) u = Y._message_to_child() self.assertAllClose(u[0], [3, 1]) self.assertAllClose(u[1], [10, 2]) # Plates in X Z = 2 X = GaussianARD([1, 2, 3], 1, shape=(), plates=( 4, 3, )) Y = Gate(Z, X) u = Y._message_to_child() self.assertAllClose(np.ones(4) * u[0], np.ones(4) * 3) self.assertAllClose(np.ones(4) * u[1], np.ones(4) * 10) # Gating non-default plate Z = 2 X = GaussianARD([[1], [2], [3]], 1, shape=(), plates=(3, 4)) Y = Gate(Z, X, gated_plate=-2) u = Y._message_to_child() self.assertAllClose(np.ones(4) * u[0], np.ones(4) * 3) self.assertAllClose(np.ones(4) * u[1], np.ones(4) * 10) # Gating non-scalar node Z = 2 X = GaussianARD([1 * np.ones(4), 2 * np.ones(4), 3 * np.ones(4)], 1, shape=(4, ), plates=(3, )) Y = Gate(Z, X) u = Y._message_to_child() self.assertAllClose(u[0], 3 * np.ones(4)) self.assertAllClose(u[1], 9 * np.ones((4, 4)) + 1 * np.identity(4)) # Broadcasting the moments on the cluster axis Z = 2 X = GaussianARD(1, 1, shape=(), plates=(3, )) Y = Gate(Z, X) u = Y._message_to_child() self.assertEqual(len(u), 2) self.assertAllClose(u[0], 1) self.assertAllClose(u[1], 1**2 + 1) pass
def test_message_to_parent(self): """ Test the message to parents of Concatenate node. """ # Two parents without shapes X1 = GaussianARD(0, 1, plates=(2,), shape=()) X2 = GaussianARD(0, 1, plates=(3,), shape=()) Z = Concatenate(X1, X2) Y = GaussianARD(Z, 1) Y.observe(np.random.randn(*Y.get_shape(0))) m1 = X1._message_from_children() m2 = X2._message_from_children() m = Z._message_from_children() self.assertAllClose((m[0]*np.ones((5,)))[:2], m1[0]*np.ones((2,))) self.assertAllClose((m[1]*np.ones((5,)))[:2], m1[1]*np.ones((2,))) self.assertAllClose((m[0]*np.ones((5,)))[2:], m2[0]*np.ones((3,))) self.assertAllClose((m[1]*np.ones((5,)))[2:], m2[1]*np.ones((3,))) # Two parents with shapes with warnings.catch_warnings(): warnings.simplefilter("ignore", FutureWarning) X1 = GaussianARD(0, 1, plates=(2,), shape=(4,6)) X2 = GaussianARD(0, 1, plates=(3,), shape=(4,6)) Z = Concatenate(X1, X2) Y = GaussianARD(Z, 1) Y.observe(np.random.randn(*Y.get_shape(0))) m1 = X1._message_from_children() m2 = X2._message_from_children() m = Z._message_from_children() self.assertAllClose((m[0]*np.ones((5,4,6)))[:2], m1[0]*np.ones((2,4,6))) self.assertAllClose((m[1]*np.ones((5,4,6,4,6)))[:2], m1[1]*np.ones((2,4,6,4,6))) self.assertAllClose((m[0]*np.ones((5,4,6)))[2:], m2[0]*np.ones((3,4,6))) self.assertAllClose((m[1]*np.ones((5,4,6,4,6)))[2:], m2[1]*np.ones((3,4,6,4,6))) # Two parents with non-default concatenation axis X1 = GaussianARD(0, 1, plates=(2,4), shape=()) X2 = GaussianARD(0, 1, plates=(3,4), shape=()) Z = Concatenate(X1, X2, axis=-2) Y = GaussianARD(Z, 1) Y.observe(np.random.randn(*Y.get_shape(0))) m1 = X1._message_from_children() m2 = X2._message_from_children() m = Z._message_from_children() self.assertAllClose((m[0]*np.ones((5,4)))[:2], m1[0]*np.ones((2,4))) self.assertAllClose((m[1]*np.ones((5,4)))[:2], m1[1]*np.ones((2,4))) self.assertAllClose((m[0]*np.ones((5,4)))[2:], m2[0]*np.ones((3,4))) self.assertAllClose((m[1]*np.ones((5,4)))[2:], m2[1]*np.ones((3,4))) # Constant parent X1 = np.random.randn(2,4,6) X2 = GaussianARD(0, 1, plates=(3,), shape=(4,6)) Z = Concatenate(X1, X2) Y = GaussianARD(Z, 1) Y.observe(np.random.randn(*Y.get_shape(0))) m1 = Z._message_to_parent(0) m2 = X2._message_from_children() m = Z._message_from_children() self.assertAllClose((m[0]*np.ones((5,4,6)))[:2], m1[0]*np.ones((2,4,6))) self.assertAllClose((m[1]*np.ones((5,4,6,4,6)))[:2], m1[1]*np.ones((2,4,6,4,6))) self.assertAllClose((m[0]*np.ones((5,4,6)))[2:], m2[0]*np.ones((3,4,6))) self.assertAllClose((m[1]*np.ones((5,4,6,4,6)))[2:], m2[1]*np.ones((3,4,6,4,6))) pass
def _init_weights(self): self.weights = GaussianARD(self.prior_mean, self.prior_precision, shape=(self.n_feature, ))
def test_message_to_parent_alpha(self): """ Test the message from GaussianARD the 2nd parent (alpha). """ # Check formula with uncertain parent mu mu = GaussianARD(1,1) tau = Gamma(0.5*1e10, 1e10) X = GaussianARD(mu, tau) X.observe(3) (m0, m1) = tau._message_from_children() self.assertAllClose(m0, -0.5*(3**2 - 2*3*1 + 1**2+1)) self.assertAllClose(m1, 0.5) # Check formula with uncertain node tau = Gamma(1e10, 1e10) X = GaussianARD(2, tau) Y = GaussianARD(X, 1) Y.observe(5) X.update() (m0, m1) = tau._message_from_children() self.assertAllClose(m0, -0.5*(1/(1+1)+3.5**2 - 2*3.5*2 + 2**2)) self.assertAllClose(m1, 0.5) # Check alpha larger than mu alpha = Gamma(np.ones((3,2,3))*1e10, 1e10) X = GaussianARD(np.ones((2,3)), alpha, ndim=3) X.observe(2*np.ones((3,2,3))) (m0, m1) = alpha._message_from_children() self.assertAllClose(m0 * np.ones((3,2,3)), -0.5*(2**2 - 2*2*1 + 1**2) * np.ones((3,2,3))) self.assertAllClose(m1*np.ones((3,2,3)), 0.5*np.ones((3,2,3))) # Check mu larger than alpha tau = Gamma(np.ones((2,3))*1e10, 1e10) X = GaussianARD(np.ones((3,2,3)), tau, ndim=3) X.observe(2*np.ones((3,2,3))) (m0, m1) = tau._message_from_children() self.assertAllClose(m0, -0.5*(2**2 - 2*2*1 + 1**2) * 3 * np.ones((2,3))) self.assertAllClose(m1 * np.ones((2,3)), 0.5 * 3 * np.ones((2,3))) # Check node larger than mu and alpha tau = Gamma(np.ones((3,))*1e10, 1e10) X = GaussianARD(np.ones((2,3)), tau, shape=(3,2,3)) X.observe(2*np.ones((3,2,3))) (m0, m1) = tau._message_from_children() self.assertAllClose(m0 * np.ones(3), -0.5*(2**2 - 2*2*1 + 1**2) * 6 * np.ones((3,))) self.assertAllClose(m1 * np.ones(3), 0.5 * 6 * np.ones(3)) # Check plates for smaller mu than node tau = Gamma(np.ones((4,1,2,3))*1e10, 1e10) X = GaussianARD(GaussianARD(1, 1, shape=(3,), plates=(4,1,1)), tau, shape=(2,3), plates=(4,5)) X.observe(2*np.ones((4,5,2,3))) (m0, m1) = tau._message_from_children() self.assertAllClose(m0 * np.ones((4,1,2,3)), (-0.5 * (2**2 - 2*2*1 + 1**2+1) * 5*np.ones((4,1,2,3)))) self.assertAllClose(m1 * np.ones((4,1,2,3)), 5*0.5 * np.ones((4,1,2,3))) # Check mask tau = Gamma(np.ones((4,3))*1e10, 1e10) X = GaussianARD(np.ones(3), tau, shape=(3,), plates=(2,4,)) X.observe(2*np.ones((2,4,3)), mask=[[True, False, True, False], [False, True, True, False]]) (m0, m1) = tau._message_from_children() self.assertAllClose(m0 * np.ones((4,3)), (-0.5 * (2**2 - 2*2*1 + 1**2) * np.ones((4,3)) * np.array([[1], [1], [2], [0]]))) self.assertAllClose(m1 * np.ones((4,3)), 0.5 * np.array([[1], [1], [2], [0]]) * np.ones((4,3))) # Check non-ARD Gaussian child mu = np.array([1,2]) alpha = np.array([3,4]) Alpha = Gamma(alpha*1e10, 1e10) Lambda = np.array([[1, 0.5], [0.5, 1]]) X = GaussianARD(mu, Alpha, ndim=1) Y = Gaussian(X, Lambda) y = np.array([5,6]) Y.observe(y) X.update() (m0, m1) = Alpha._message_from_children() Cov = np.linalg.inv(np.diag(alpha)+Lambda) mean = np.dot(Cov, np.dot(np.diag(alpha), mu) + np.dot(Lambda, y)) self.assertAllClose(m0 * np.ones(2), -0.5 * np.diag( np.outer(mean, mean) + Cov - np.outer(mean, mu) - np.outer(mu, mean) + np.outer(mu, mu))) self.assertAllClose(m1 * np.ones(2), 0.5 * np.ones(2)) pass
def test_initialization(self): """ Test initialization methods of GaussianARD """ X = GaussianARD(1, 2, shape=(2, ), plates=(3, )) # Prior initialization mu = 1 * np.ones((3, 2)) alpha = 2 * np.ones((3, 2)) X.initialize_from_prior() u = X._message_to_child() self.assertAllClose(u[0] * np.ones((3, 2)), mu) self.assertAllClose( u[1] * np.ones((3, 2, 2)), linalg.outer(mu, mu, ndim=1) + misc.diag(1 / alpha, ndim=1)) # Parameter initialization mu = np.random.randn(3, 2) alpha = np.random.rand(3, 2) X.initialize_from_parameters(mu, alpha) u = X._message_to_child() self.assertAllClose(u[0], mu) self.assertAllClose( u[1], linalg.outer(mu, mu, ndim=1) + misc.diag(1 / alpha, ndim=1)) # Value initialization x = np.random.randn(3, 2) X.initialize_from_value(x) u = X._message_to_child() self.assertAllClose(u[0], x) self.assertAllClose(u[1], linalg.outer(x, x, ndim=1)) # Random initialization X.initialize_from_random() pass
def test_init(self): """ Test the constructor of GaussianARD """ def check_init(true_plates, true_shape, mu, alpha, **kwargs): X = GaussianARD(mu, alpha, **kwargs) self.assertEqual(X.dims, (true_shape, true_shape + true_shape), msg="Constructed incorrect dimensionality") self.assertEqual(X.plates, true_plates, msg="Constructed incorrect plates") # # Create from constant parents # # Use ndim=0 for constant mu check_init((), (), 0, 1) check_init((3, 2), (), np.zeros(( 3, 2, )), np.ones((2, ))) check_init((4, 2, 2, 3), (), np.zeros(( 2, 1, 3, )), np.ones((4, 1, 2, 3))) # Use ndim check_init((4, 2), (2, 3), np.zeros(( 2, 1, 3, )), np.ones((4, 1, 2, 3)), ndim=2) # Use shape check_init((4, 2), (2, 3), np.zeros(( 2, 1, 3, )), np.ones((4, 1, 2, 3)), shape=(2, 3)) # Use ndim and shape check_init((4, 2), (2, 3), np.zeros(( 2, 1, 3, )), np.ones((4, 1, 2, 3)), ndim=2, shape=(2, 3)) # # Create from node parents # # ndim=0 by default check_init((3, ), (), GaussianARD(0, 1, plates=(3, )), Gamma(1, 1, plates=(3, ))) check_init((4, 2, 2, 3), (), GaussianARD(np.zeros((2, 1, 3)), np.ones((2, 1, 3)), ndim=3), Gamma(np.ones((4, 1, 2, 3)), np.ones((4, 1, 2, 3)))) # Use ndim check_init((4, ), (2, 2, 3), GaussianARD(np.zeros((4, 1, 2, 3)), np.ones((4, 1, 2, 3)), ndim=2), Gamma(np.ones((4, 2, 1, 3)), np.ones((4, 2, 1, 3))), ndim=3) # Use shape check_init((4, ), (2, 2, 3), GaussianARD(np.zeros((4, 1, 2, 3)), np.ones((4, 1, 2, 3)), ndim=2), Gamma(np.ones((4, 2, 1, 3)), np.ones((4, 2, 1, 3))), shape=(2, 2, 3)) # Use ndim and shape check_init((4, 2), (2, 3), GaussianARD(np.zeros((2, 1, 3)), np.ones((2, 1, 3)), ndim=2), Gamma(np.ones((4, 1, 2, 3)), np.ones((4, 1, 2, 3))), ndim=2, shape=(2, 3)) # Test for a found bug check_init((), (3, ), np.ones(3), 1, ndim=1) # Parent mu has more axes check_init((2, ), (3, ), GaussianARD(np.zeros((2, 3)), np.ones((2, 3)), ndim=2), np.ones((2, 3)), ndim=1) # DO NOT add axes if necessary self.assertRaises(ValueError, GaussianARD, GaussianARD(np.zeros((2, 3)), np.ones((2, 3)), ndim=2), 1, ndim=3) # # Errors # # Inconsistent shapes self.assertRaises(ValueError, GaussianARD, GaussianARD(np.zeros((2, 3)), np.ones((2, 3)), ndim=1), np.ones((4, 3)), ndim=2) # Inconsistent dims of mu and alpha self.assertRaises(ValueError, GaussianARD, np.zeros((2, 3)), np.ones((2, ))) # Inconsistent plates of mu and alpha self.assertRaises(ValueError, GaussianARD, GaussianARD(np.zeros((3, 2, 3)), np.ones((3, 2, 3)), ndim=2), np.ones((3, 4, 2, 3)), ndim=3) # Inconsistent ndim and shape self.assertRaises(ValueError, GaussianARD, np.zeros((2, 3)), np.ones((2, )), shape=(2, 3), ndim=1) # Incorrect shape self.assertRaises(ValueError, GaussianARD, GaussianARD(np.zeros((2, 3)), np.ones((2, 3)), ndim=2), np.ones((2, 3)), shape=(2, 2)) pass
def test_rotate_plates(self): # Basic test for Gaussian vectors X = GaussianARD(np.random.randn(3, 2), np.random.rand(3, 2), shape=(2, ), plates=(3, )) (u0, u1) = X.get_moments() Cov = u1 - linalg.outer(u0, u0, ndim=1) Q = np.random.randn(3, 3) Qu0 = np.einsum('ik,kj->ij', Q, u0) QCov = np.einsum('k,kij->kij', np.sum(Q, axis=0)**2, Cov) Qu1 = QCov + linalg.outer(Qu0, Qu0, ndim=1) X.rotate_plates(Q, plate_axis=-1) (u0, u1) = X.get_moments() self.assertAllClose(u0, Qu0) self.assertAllClose(u1, Qu1) # Test full covariance, that is, with observations X = GaussianARD(np.random.randn(3, 2), np.random.rand(3, 2), shape=(2, ), plates=(3, )) Y = Gaussian(X, [[2.0, 1.5], [1.5, 3.0]], plates=(3, )) Y.observe(np.random.randn(3, 2)) X.update() (u0, u1) = X.get_moments() Cov = u1 - linalg.outer(u0, u0, ndim=1) Q = np.random.randn(3, 3) Qu0 = np.einsum('ik,kj->ij', Q, u0) QCov = np.einsum('k,kij->kij', np.sum(Q, axis=0)**2, Cov) Qu1 = QCov + linalg.outer(Qu0, Qu0, ndim=1) X.rotate_plates(Q, plate_axis=-1) (u0, u1) = X.get_moments() self.assertAllClose(u0, Qu0) self.assertAllClose(u1, Qu1) pass
def check_lower_bound(shape_mu, shape_alpha, plates_mu=(), **kwargs): M = GaussianARD(np.ones(plates_mu + shape_mu), np.ones(plates_mu + shape_mu), shape=shape_mu, plates=plates_mu) if not ('ndim' in kwargs or 'shape' in kwargs): kwargs['ndim'] = len(shape_mu) X = GaussianARD(M, 2 * np.ones(shape_alpha), **kwargs) Y = GaussianARD(X, 3 * np.ones(X.get_shape(0)), **kwargs) Y.observe(4 * np.ones(Y.get_shape(0))) X.update() Cov = 1 / (2 + 3) mu = Cov * (2 * 1 + 3 * 4) x2 = mu**2 + Cov logH_X = (+0.5 * (1 + np.log(2 * np.pi)) + 0.5 * np.log(Cov)) logp_X = (-0.5 * np.log(2 * np.pi) + 0.5 * np.log(2) - 0.5 * 2 * (x2 - 2 * mu * 1 + 1**2 + 1)) r = np.prod(X.get_shape(0)) self.assertAllClose(r * (logp_X + logH_X), X.lower_bound_contribution())
def test_lowerbound(self): """ Test the variational Bayesian lower bound term for GaussianARD. """ # Test vector formula with full noise covariance m = np.random.randn(2) alpha = np.random.rand(2) y = np.random.randn(2) X = GaussianARD(m, alpha, ndim=1) V = np.array([[3, 1], [1, 3]]) Y = Gaussian(X, V) Y.observe(y) X.update() Cov = np.linalg.inv(np.diag(alpha) + V) mu = np.dot(Cov, np.dot(V, y) + alpha * m) x2 = np.outer(mu, mu) + Cov logH_X = (+2 * 0.5 * (1 + np.log(2 * np.pi)) + 0.5 * np.log(np.linalg.det(Cov))) logp_X = ( -2 * 0.5 * np.log(2 * np.pi) + 0.5 * np.log(np.linalg.det(np.diag(alpha))) - 0.5 * np.sum( np.diag(alpha) * (x2 - np.outer(mu, m) - np.outer(m, mu) + np.outer(m, m)))) self.assertAllClose(logp_X + logH_X, X.lower_bound_contribution()) def check_lower_bound(shape_mu, shape_alpha, plates_mu=(), **kwargs): M = GaussianARD(np.ones(plates_mu + shape_mu), np.ones(plates_mu + shape_mu), shape=shape_mu, plates=plates_mu) if not ('ndim' in kwargs or 'shape' in kwargs): kwargs['ndim'] = len(shape_mu) X = GaussianARD(M, 2 * np.ones(shape_alpha), **kwargs) Y = GaussianARD(X, 3 * np.ones(X.get_shape(0)), **kwargs) Y.observe(4 * np.ones(Y.get_shape(0))) X.update() Cov = 1 / (2 + 3) mu = Cov * (2 * 1 + 3 * 4) x2 = mu**2 + Cov logH_X = (+0.5 * (1 + np.log(2 * np.pi)) + 0.5 * np.log(Cov)) logp_X = (-0.5 * np.log(2 * np.pi) + 0.5 * np.log(2) - 0.5 * 2 * (x2 - 2 * mu * 1 + 1**2 + 1)) r = np.prod(X.get_shape(0)) self.assertAllClose(r * (logp_X + logH_X), X.lower_bound_contribution()) # Test scalar formula check_lower_bound((), ()) # Test array formula check_lower_bound((2, 3), (2, 3)) # Test dim-broadcasting of mu check_lower_bound((3, 1), (2, 3, 4)) # Test dim-broadcasting of alpha check_lower_bound((2, 3, 4), (3, 1)) # Test dim-broadcasting of mu and alpha check_lower_bound((3, 1), (3, 1), shape=(2, 3, 4)) # Test dim-broadcasting of mu with plates check_lower_bound((), (), plates_mu=(), shape=(), plates=(5, )) # BUG: Scalar parents for array variable caused einsum error check_lower_bound((), (), shape=(3, )) # BUG: Log-det was summed over plates check_lower_bound((), (), shape=(3, ), plates=(4, )) pass
def test_message_to_parent(self): """ Test the message to parents of Mixture node. """ K = 3 # Broadcasting the moments on the cluster axis Mu = GaussianARD(2, 1, ndim=0, plates=(K,)) (mu, mumu) = Mu._message_to_child() Alpha = Gamma(3, 1, plates=(K,)) (alpha, logalpha) = Alpha._message_to_child() z = Categorical(np.ones(K)/K) X = Mixture(z, GaussianARD, Mu, Alpha) tau = 4 Y = GaussianARD(X, tau) y = 5 Y.observe(y) (x, xx) = X._message_to_child() m = z._message_from_children() self.assertAllClose(m[0] * np.ones(K), random.gaussian_logpdf(xx*alpha, x*alpha*mu, mumu*alpha, logalpha, 0) * np.ones(K)) m = Mu._message_from_children() self.assertAllClose(m[0], 1/K * (alpha*x) * np.ones(3)) self.assertAllClose(m[1], -0.5 * 1/K * alpha * np.ones(3)) # Some parameters do not have cluster plate axis Mu = GaussianARD(2, 1, ndim=0, plates=(K,)) (mu, mumu) = Mu._message_to_child() Alpha = Gamma(3, 1) # Note: no cluster plate axis! (alpha, logalpha) = Alpha._message_to_child() z = Categorical(np.ones(K)/K) X = Mixture(z, GaussianARD, Mu, Alpha) tau = 4 Y = GaussianARD(X, tau) y = 5 Y.observe(y) (x, xx) = X._message_to_child() m = z._message_from_children() self.assertAllClose(m[0] * np.ones(K), random.gaussian_logpdf(xx*alpha, x*alpha*mu, mumu*alpha, logalpha, 0) * np.ones(K)) m = Mu._message_from_children() self.assertAllClose(m[0], 1/K * (alpha*x) * np.ones(3)) self.assertAllClose(m[1], -0.5 * 1/K * alpha * np.ones(3)) # Cluster assignments do not have as many plate axes as parameters. M = 2 Mu = GaussianARD(2, 1, ndim=0, plates=(K,M)) (mu, mumu) = Mu._message_to_child() Alpha = Gamma(3, 1, plates=(K,M)) (alpha, logalpha) = Alpha._message_to_child() z = Categorical(np.ones(K)/K) X = Mixture(z, GaussianARD, Mu, Alpha, cluster_plate=-2) tau = 4 Y = GaussianARD(X, tau) y = 5 * np.ones(M) Y.observe(y) (x, xx) = X._message_to_child() m = z._message_from_children() self.assertAllClose(m[0]*np.ones(K), np.sum(random.gaussian_logpdf(xx*alpha, x*alpha*mu, mumu*alpha, logalpha, 0) * np.ones((K,M)), axis=-1)) m = Mu._message_from_children() self.assertAllClose(m[0] * np.ones((K,M)), 1/K * (alpha*x) * np.ones((K,M))) self.assertAllClose(m[1] * np.ones((K,M)), -0.5 * 1/K * alpha * np.ones((K,M))) # Mixed distribution broadcasts g # This tests for a found bug. The bug caused an error. Z = Categorical([0.3, 0.5, 0.2]) X = Mixture(Z, Categorical, [[0.2,0.8], [0.1,0.9], [0.3,0.7]]) m = Z._message_from_children() pass
class BayesianRegression(object): """Bayesian linear regression.""" def __init__(self, n_feature, prior_mean=0, prior_precision=1e-6, prior_a=10, prior_b=1): super().__init__() self.n_feature = n_feature if np.shape(prior_mean) != (n_feature, ): prior_mean = prior_mean * np.ones(n_feature) if np.shape(prior_precision) != (n_feature, n_feature): prior_precision = prior_precision * np.ones(n_feature) self.prior_mean = prior_mean self.prior_precision = prior_precision self.prior_a = prior_a self.prior_b = prior_b self._init_weights() # print("Intialize regression") # self.print() def _init_weights(self): self.weights = GaussianARD(self.prior_mean, self.prior_precision, shape=(self.n_feature, )) def fit(self, X, y): self._init_weights() # self.cost, # self.myopic_voc(action, state), # self.vpi_action(action, state), # self.vpi(state), # self.expected_term_reward(state) self.tau = Gamma(self.prior_a, self.prior_b) F = SumMultiply('i,i', self.weights, X) y_obs = GaussianARD(F, self.tau) y_obs.observe(y) Q = VB(y_obs, self.weights) Q.update(repeat=10, tol=1e-4, verbose=False) def predict(self, x, return_var=False): y = SumMultiply('i,i', self.weights, x) y_hat, var, *_ = y.get_moments() if return_var: return y_hat, var else: return y_hat def sample(self, x): w = self.weights.random() return x @ w def print(self, diagonal=True): mean, m2 = self.weights.get_moments()[:2] var = m2 - mean**2 if diagonal: var = np.diagonal(var) bar = '_' * 40 print(f'{bar}\n{mean.round(3)}\n{var.round(3)}\n{bar}')
def test_message_to_child(self): """ Test moments of GaussianARD. """ # Check that moments have full shape when broadcasting X = GaussianARD(np.zeros((2,)), np.ones((3,2)), shape=(4,3,2)) (u0, u1) = X._message_to_child() self.assertEqual(np.shape(u0), (4,3,2)) self.assertEqual(np.shape(u1), (4,3,2,4,3,2)) # Check the formula X = GaussianARD(2, 3) (u0, u1) = X._message_to_child() self.assertAllClose(u0, 2) self.assertAllClose(u1, 2**2 + 1/3) # Check the formula for multidimensional arrays X = GaussianARD(2*np.ones((2,1,4)), 3*np.ones((2,3,1)), ndim=3) (u0, u1) = X._message_to_child() self.assertAllClose(u0, 2*np.ones((2,3,4))) self.assertAllClose(u1, 2**2 * np.ones((2,3,4,2,3,4)) + 1/3 * misc.identity(2,3,4)) # Check the formula for dim-broadcasted mu X = GaussianARD(2*np.ones((3,1)), 3*np.ones((2,3,4)), ndim=3) (u0, u1) = X._message_to_child() self.assertAllClose(u0, 2*np.ones((2,3,4))) self.assertAllClose(u1, 2**2 * np.ones((2,3,4,2,3,4)) + 1/3 * misc.identity(2,3,4)) # Check the formula for dim-broadcasted alpha X = GaussianARD(2*np.ones((2,3,4)), 3*np.ones((3,1)), ndim=3) (u0, u1) = X._message_to_child() self.assertAllClose(u0, 2*np.ones((2,3,4))) self.assertAllClose(u1, 2**2 * np.ones((2,3,4,2,3,4)) + 1/3 * misc.identity(2,3,4)) # Check the formula for dim-broadcasted mu and alpha X = GaussianARD(2*np.ones((3,1)), 3*np.ones((3,1)), shape=(2,3,4)) (u0, u1) = X._message_to_child() self.assertAllClose(u0, 2*np.ones((2,3,4))) self.assertAllClose(u1, 2**2 * np.ones((2,3,4,2,3,4)) + 1/3 * misc.identity(2,3,4)) # Check the formula for dim-broadcasted mu with plates mu = GaussianARD(2*np.ones((5,1,3,4)), np.ones((5,1,3,4)), shape=(3,4), plates=(5,1)) X = GaussianARD(mu, 3*np.ones((5,2,3,4)), shape=(2,3,4), plates=(5,)) (u0, u1) = X._message_to_child() self.assertAllClose(u0, 2*np.ones((5,2,3,4))) self.assertAllClose(u1, 2**2 * np.ones((5,2,3,4,2,3,4)) + 1/3 * misc.identity(2,3,4)) # Check posterior X = GaussianARD(2, 3) Y = GaussianARD(X, 1) Y.observe(10) X.update() (u0, u1) = X._message_to_child() self.assertAllClose(u0, 1/(3+1) * (3*2 + 1*10)) self.assertAllClose(u1, (1/(3+1) * (3*2 + 1*10))**2 + 1/(3+1)) pass
def test_message_to_parent(self): """ Test the message to parents of Concatenate node. """ # Two parents without shapes X1 = GaussianARD(0, 1, plates=(2, ), shape=()) X2 = GaussianARD(0, 1, plates=(3, ), shape=()) Z = Concatenate(X1, X2) Y = GaussianARD(Z, 1) Y.observe(np.random.randn(*Y.get_shape(0))) m1 = X1._message_from_children() m2 = X2._message_from_children() m = Z._message_from_children() self.assertAllClose((m[0] * np.ones((5, )))[:2], m1[0] * np.ones( (2, ))) self.assertAllClose((m[1] * np.ones((5, )))[:2], m1[1] * np.ones( (2, ))) self.assertAllClose((m[0] * np.ones((5, )))[2:], m2[0] * np.ones( (3, ))) self.assertAllClose((m[1] * np.ones((5, )))[2:], m2[1] * np.ones( (3, ))) # Two parents with shapes with warnings.catch_warnings(): warnings.simplefilter("ignore", FutureWarning) X1 = GaussianARD(0, 1, plates=(2, ), shape=(4, 6)) X2 = GaussianARD(0, 1, plates=(3, ), shape=(4, 6)) Z = Concatenate(X1, X2) Y = GaussianARD(Z, 1) Y.observe(np.random.randn(*Y.get_shape(0))) m1 = X1._message_from_children() m2 = X2._message_from_children() m = Z._message_from_children() self.assertAllClose((m[0] * np.ones((5, 4, 6)))[:2], m1[0] * np.ones((2, 4, 6))) self.assertAllClose((m[1] * np.ones((5, 4, 6, 4, 6)))[:2], m1[1] * np.ones((2, 4, 6, 4, 6))) self.assertAllClose((m[0] * np.ones((5, 4, 6)))[2:], m2[0] * np.ones((3, 4, 6))) self.assertAllClose((m[1] * np.ones((5, 4, 6, 4, 6)))[2:], m2[1] * np.ones((3, 4, 6, 4, 6))) # Two parents with non-default concatenation axis X1 = GaussianARD(0, 1, plates=(2, 4), shape=()) X2 = GaussianARD(0, 1, plates=(3, 4), shape=()) Z = Concatenate(X1, X2, axis=-2) Y = GaussianARD(Z, 1) Y.observe(np.random.randn(*Y.get_shape(0))) m1 = X1._message_from_children() m2 = X2._message_from_children() m = Z._message_from_children() self.assertAllClose((m[0] * np.ones((5, 4)))[:2], m1[0] * np.ones( (2, 4))) self.assertAllClose((m[1] * np.ones((5, 4)))[:2], m1[1] * np.ones( (2, 4))) self.assertAllClose((m[0] * np.ones((5, 4)))[2:], m2[0] * np.ones( (3, 4))) self.assertAllClose((m[1] * np.ones((5, 4)))[2:], m2[1] * np.ones( (3, 4))) # Constant parent X1 = np.random.randn(2, 4, 6) X2 = GaussianARD(0, 1, plates=(3, ), shape=(4, 6)) Z = Concatenate(X1, X2) Y = GaussianARD(Z, 1) Y.observe(np.random.randn(*Y.get_shape(0))) m1 = Z._message_to_parent(0) m2 = X2._message_from_children() m = Z._message_from_children() self.assertAllClose((m[0] * np.ones((5, 4, 6)))[:2], m1[0] * np.ones((2, 4, 6))) self.assertAllClose((m[1] * np.ones((5, 4, 6, 4, 6)))[:2], m1[1] * np.ones((2, 4, 6, 4, 6))) self.assertAllClose((m[0] * np.ones((5, 4, 6)))[2:], m2[0] * np.ones((3, 4, 6))) self.assertAllClose((m[1] * np.ones((5, 4, 6, 4, 6)))[2:], m2[1] * np.ones((3, 4, 6, 4, 6))) pass
def test_lowerbound(self): """ Test the variational Bayesian lower bound term for GaussianARD. """ # Test vector formula with full noise covariance m = np.random.randn(2) alpha = np.random.rand(2) y = np.random.randn(2) X = GaussianARD(m, alpha, ndim=1) V = np.array([[3,1],[1,3]]) Y = Gaussian(X, V) Y.observe(y) X.update() Cov = np.linalg.inv(np.diag(alpha) + V) mu = np.dot(Cov, np.dot(V, y) + alpha*m) x2 = np.outer(mu, mu) + Cov logH_X = (+ 2*0.5*(1+np.log(2*np.pi)) + 0.5*np.log(np.linalg.det(Cov))) logp_X = (- 2*0.5*np.log(2*np.pi) + 0.5*np.log(np.linalg.det(np.diag(alpha))) - 0.5*np.sum(np.diag(alpha) * (x2 - np.outer(mu,m) - np.outer(m,mu) + np.outer(m,m)))) self.assertAllClose(logp_X + logH_X, X.lower_bound_contribution()) def check_lower_bound(shape_mu, shape_alpha, plates_mu=(), **kwargs): M = GaussianARD(np.ones(plates_mu + shape_mu), np.ones(plates_mu + shape_mu), shape=shape_mu, plates=plates_mu) if not ('ndim' in kwargs or 'shape' in kwargs): kwargs['ndim'] = len(shape_mu) X = GaussianARD(M, 2*np.ones(shape_alpha), **kwargs) Y = GaussianARD(X, 3*np.ones(X.get_shape(0)), **kwargs) Y.observe(4*np.ones(Y.get_shape(0))) X.update() Cov = 1/(2+3) mu = Cov * (2*1 + 3*4) x2 = mu**2 + Cov logH_X = (+ 0.5*(1+np.log(2*np.pi)) + 0.5*np.log(Cov)) logp_X = (- 0.5*np.log(2*np.pi) + 0.5*np.log(2) - 0.5*2*(x2 - 2*mu*1 + 1**2+1)) r = np.prod(X.get_shape(0)) self.assertAllClose(r * (logp_X + logH_X), X.lower_bound_contribution()) # Test scalar formula check_lower_bound((), ()) # Test array formula check_lower_bound((2,3), (2,3)) # Test dim-broadcasting of mu check_lower_bound((3,1), (2,3,4)) # Test dim-broadcasting of alpha check_lower_bound((2,3,4), (3,1)) # Test dim-broadcasting of mu and alpha check_lower_bound((3,1), (3,1), shape=(2,3,4)) # Test dim-broadcasting of mu with plates check_lower_bound((), (), plates_mu=(), shape=(), plates=(5,)) # BUG: Scalar parents for array variable caused einsum error check_lower_bound((), (), shape=(3,)) # BUG: Log-det was summed over plates check_lower_bound((), (), shape=(3,), plates=(4,)) pass
def test_init(self): """ Test the creation of Concatenate node """ # One parent only X = GaussianARD(0, 1, plates=(3, ), shape=()) Y = Concatenate(X) self.assertEqual(Y.plates, (3, )) self.assertEqual(Y.dims, ((), ())) X = GaussianARD(0, 1, plates=(3, ), shape=(2, 4)) Y = Concatenate(X) self.assertEqual(Y.plates, (3, )) self.assertEqual(Y.dims, ((2, 4), (2, 4, 2, 4))) # Two parents X1 = GaussianARD(0, 1, plates=(2, ), shape=()) X2 = GaussianARD(0, 1, plates=(3, ), shape=()) Y = Concatenate(X1, X2) self.assertEqual(Y.plates, (5, )) self.assertEqual(Y.dims, ((), ())) # Two parents with shapes X1 = GaussianARD(0, 1, plates=(2, ), shape=(4, 6)) X2 = GaussianARD(0, 1, plates=(3, ), shape=(4, 6)) Y = Concatenate(X1, X2) self.assertEqual(Y.plates, (5, )) self.assertEqual(Y.dims, ((4, 6), (4, 6, 4, 6))) # Two parents with non-default axis X1 = GaussianARD(0, 1, plates=(2, 4), shape=()) X2 = GaussianARD(0, 1, plates=(3, 4), shape=()) Y = Concatenate(X1, X2, axis=-2) self.assertEqual(Y.plates, (5, 4)) self.assertEqual(Y.dims, ((), ())) # Three parents X1 = GaussianARD(0, 1, plates=(2, ), shape=()) X2 = GaussianARD(0, 1, plates=(3, ), shape=()) X3 = GaussianARD(0, 1, plates=(4, ), shape=()) Y = Concatenate(X1, X2, X3) self.assertEqual(Y.plates, (9, )) self.assertEqual(Y.dims, ((), ())) # Constant parent X1 = [7.2, 3.5] X2 = GaussianARD(0, 1, plates=(3, ), shape=()) Y = Concatenate(X1, X2) self.assertEqual(Y.plates, (5, )) self.assertEqual(Y.dims, ((), ())) # Different moments X1 = GaussianARD(0, 1, plates=(3, )) X2 = Gamma(1, 1, plates=(4, )) self.assertRaises(ValueError, Concatenate, X1, X2) # Incompatible shapes X1 = GaussianARD(0, 1, plates=(3, ), shape=(2, )) X2 = GaussianARD(0, 1, plates=(2, ), shape=()) self.assertRaises(ValueError, Concatenate, X1, X2) # Incompatible plates X1 = GaussianARD(0, 1, plates=(4, 3), shape=()) X2 = GaussianARD(0, 1, plates=( 5, 2, ), shape=()) self.assertRaises(ValueError, Concatenate, X1, X2) pass
def test_initialization(self): """ Test initialization methods of GaussianARD """ X = GaussianARD(1, 2, shape=(2,), plates=(3,)) # Prior initialization mu = 1 * np.ones((3, 2)) alpha = 2 * np.ones((3, 2)) X.initialize_from_prior() u = X._message_to_child() self.assertAllClose(u[0]*np.ones((3,2)), mu) self.assertAllClose(u[1]*np.ones((3,2,2)), linalg.outer(mu, mu, ndim=1) + misc.diag(1/alpha, ndim=1)) # Parameter initialization mu = np.random.randn(3, 2) alpha = np.random.rand(3, 2) X.initialize_from_parameters(mu, alpha) u = X._message_to_child() self.assertAllClose(u[0], mu) self.assertAllClose(u[1], linalg.outer(mu, mu, ndim=1) + misc.diag(1/alpha, ndim=1)) # Value initialization x = np.random.randn(3, 2) X.initialize_from_value(x) u = X._message_to_child() self.assertAllClose(u[0], x) self.assertAllClose(u[1], linalg.outer(x, x, ndim=1)) # Random initialization X.initialize_from_random() pass
def test_message_to_child(self): """ Test the message to child of Concatenate node. """ # Two parents without shapes X1 = GaussianARD(0, 1, plates=(2, ), shape=()) X2 = GaussianARD(0, 1, plates=(3, ), shape=()) Y = Concatenate(X1, X2) u1 = X1.get_moments() u2 = X2.get_moments() u = Y.get_moments() self.assertAllClose((u[0] * np.ones((5, )))[:2], u1[0] * np.ones( (2, ))) self.assertAllClose((u[1] * np.ones((5, )))[:2], u1[1] * np.ones( (2, ))) self.assertAllClose((u[0] * np.ones((5, )))[2:], u2[0] * np.ones( (3, ))) self.assertAllClose((u[1] * np.ones((5, )))[2:], u2[1] * np.ones( (3, ))) # Two parents with shapes X1 = GaussianARD(0, 1, plates=(2, ), shape=(4, )) X2 = GaussianARD(0, 1, plates=(3, ), shape=(4, )) Y = Concatenate(X1, X2) u1 = X1.get_moments() u2 = X2.get_moments() u = Y.get_moments() self.assertAllClose((u[0] * np.ones((5, 4)))[:2], u1[0] * np.ones( (2, 4))) self.assertAllClose((u[1] * np.ones((5, 4, 4)))[:2], u1[1] * np.ones( (2, 4, 4))) self.assertAllClose((u[0] * np.ones((5, 4)))[2:], u2[0] * np.ones( (3, 4))) self.assertAllClose((u[1] * np.ones((5, 4, 4)))[2:], u2[1] * np.ones( (3, 4, 4))) # Test with non-constant axis X1 = GaussianARD(0, 1, plates=(2, 4), shape=()) X2 = GaussianARD(0, 1, plates=(3, 4), shape=()) Y = Concatenate(X1, X2, axis=-2) u1 = X1.get_moments() u2 = X2.get_moments() u = Y.get_moments() self.assertAllClose((u[0] * np.ones((5, 4)))[:2], u1[0] * np.ones( (2, 4))) self.assertAllClose((u[1] * np.ones((5, 4)))[:2], u1[1] * np.ones( (2, 4))) self.assertAllClose((u[0] * np.ones((5, 4)))[2:], u2[0] * np.ones( (3, 4))) self.assertAllClose((u[1] * np.ones((5, 4)))[2:], u2[1] * np.ones( (3, 4))) # Test with constant parent X1 = np.random.randn(2, 4) X2 = GaussianARD(0, 1, plates=(3, ), shape=(4, )) Y = Concatenate(X1, X2) u1 = Y.parents[0].get_moments() u2 = X2.get_moments() u = Y.get_moments() self.assertAllClose((u[0] * np.ones((5, 4)))[:2], u1[0] * np.ones( (2, 4))) self.assertAllClose((u[1] * np.ones((5, 4, 4)))[:2], u1[1] * np.ones( (2, 4, 4))) self.assertAllClose((u[0] * np.ones((5, 4)))[2:], u2[0] * np.ones( (3, 4))) self.assertAllClose((u[1] * np.ones((5, 4, 4)))[2:], u2[1] * np.ones( (3, 4, 4))) pass
def test_gradient(self): """Test standard gradient of a Gamma node.""" D = 3 np.random.seed(42) # # Without observations # # Construct model a = np.random.rand(D) b = np.random.rand(D) tau = Gamma(a, b) Q = VB(tau) # Random initialization tau.initialize_from_parameters(np.random.rand(D), np.random.rand(D)) # Initial parameters phi0 = tau.phi # Gradient rg = tau.get_riemannian_gradient() g = tau.get_gradient(rg) # Numerical gradient eps = 1e-8 p0 = tau.get_parameters() l0 = Q.compute_lowerbound(ignore_masked=False) g_num = [np.zeros(D), np.zeros(D)] for i in range(D): e = np.zeros(D) e[i] = eps p1 = p0[0] + e tau.set_parameters([p1, p0[1]]) l1 = Q.compute_lowerbound(ignore_masked=False) g_num[0][i] = (l1 - l0) / eps for i in range(D): e = np.zeros(D) e[i] = eps p1 = p0[1] + e tau.set_parameters([p0[0], p1]) l1 = Q.compute_lowerbound(ignore_masked=False) g_num[1][i] = (l1 - l0) / eps # Check self.assertAllClose(g[0], g_num[0]) self.assertAllClose(g[1], g_num[1]) # # With observations # # Construct model a = np.random.rand(D) b = np.random.rand(D) tau = Gamma(a, b) mu = np.random.randn(D) Y = GaussianARD(mu, tau) Y.observe(np.random.randn(D)) Q = VB(Y, tau) # Random initialization tau.initialize_from_parameters(np.random.rand(D), np.random.rand(D)) # Initial parameters phi0 = tau.phi # Gradient rg = tau.get_riemannian_gradient() g = tau.get_gradient(rg) # Numerical gradient eps = 1e-8 p0 = tau.get_parameters() l0 = Q.compute_lowerbound(ignore_masked=False) g_num = [np.zeros(D), np.zeros(D)] for i in range(D): e = np.zeros(D) e[i] = eps p1 = p0[0] + e tau.set_parameters([p1, p0[1]]) l1 = Q.compute_lowerbound(ignore_masked=False) g_num[0][i] = (l1 - l0) / eps for i in range(D): e = np.zeros(D) e[i] = eps p1 = p0[1] + e tau.set_parameters([p0[0], p1]) l1 = Q.compute_lowerbound(ignore_masked=False) g_num[1][i] = (l1 - l0) / eps # Check self.assertAllClose(g[0], g_num[0]) self.assertAllClose(g[1], g_num[1]) pass
def __init__(self, n_feature, n_iter=1000, tolerance=1e-8): super().__init__() self.n_iter = n_iter self.tolerance = tolerance self.weights = GaussianARD(0, 1e-6, shape=(n_feature, ))
def test_message_to_parent_alpha(self): """ Test the message from GaussianARD the 2nd parent (alpha). """ # Check formula with uncertain parent mu mu = GaussianARD(1, 1) tau = Gamma(0.5 * 1e10, 1e10) X = GaussianARD(mu, tau) X.observe(3) (m0, m1) = tau._message_from_children() self.assertAllClose(m0, -0.5 * (3**2 - 2 * 3 * 1 + 1**2 + 1)) self.assertAllClose(m1, 0.5) # Check formula with uncertain node tau = Gamma(1e10, 1e10) X = GaussianARD(2, tau) Y = GaussianARD(X, 1) Y.observe(5) X.update() (m0, m1) = tau._message_from_children() self.assertAllClose(m0, -0.5 * (1 / (1 + 1) + 3.5**2 - 2 * 3.5 * 2 + 2**2)) self.assertAllClose(m1, 0.5) # Check alpha larger than mu alpha = Gamma(np.ones((3, 2, 3)) * 1e10, 1e10) X = GaussianARD(np.ones((2, 3)), alpha, ndim=3) X.observe(2 * np.ones((3, 2, 3))) (m0, m1) = alpha._message_from_children() self.assertAllClose( m0 * np.ones((3, 2, 3)), -0.5 * (2**2 - 2 * 2 * 1 + 1**2) * np.ones((3, 2, 3))) self.assertAllClose(m1 * np.ones((3, 2, 3)), 0.5 * np.ones((3, 2, 3))) # Check mu larger than alpha tau = Gamma(np.ones((2, 3)) * 1e10, 1e10) X = GaussianARD(np.ones((3, 2, 3)), tau, ndim=3) X.observe(2 * np.ones((3, 2, 3))) (m0, m1) = tau._message_from_children() self.assertAllClose( m0, -0.5 * (2**2 - 2 * 2 * 1 + 1**2) * 3 * np.ones((2, 3))) self.assertAllClose(m1 * np.ones((2, 3)), 0.5 * 3 * np.ones((2, 3))) # Check node larger than mu and alpha tau = Gamma(np.ones((3, )) * 1e10, 1e10) X = GaussianARD(np.ones((2, 3)), tau, shape=(3, 2, 3)) X.observe(2 * np.ones((3, 2, 3))) (m0, m1) = tau._message_from_children() self.assertAllClose( m0 * np.ones(3), -0.5 * (2**2 - 2 * 2 * 1 + 1**2) * 6 * np.ones( (3, ))) self.assertAllClose(m1 * np.ones(3), 0.5 * 6 * np.ones(3)) # Check plates for smaller mu than node tau = Gamma(np.ones((4, 1, 2, 3)) * 1e10, 1e10) X = GaussianARD(GaussianARD(1, 1, shape=(3, ), plates=(4, 1, 1)), tau, shape=(2, 3), plates=(4, 5)) X.observe(2 * np.ones((4, 5, 2, 3))) (m0, m1) = tau._message_from_children() self.assertAllClose(m0 * np.ones( (4, 1, 2, 3)), (-0.5 * (2**2 - 2 * 2 * 1 + 1**2 + 1) * 5 * np.ones( (4, 1, 2, 3)))) self.assertAllClose(m1 * np.ones((4, 1, 2, 3)), 5 * 0.5 * np.ones( (4, 1, 2, 3))) # Check mask tau = Gamma(np.ones((4, 3)) * 1e10, 1e10) X = GaussianARD(np.ones(3), tau, shape=(3, ), plates=( 2, 4, )) X.observe(2 * np.ones((2, 4, 3)), mask=[[True, False, True, False], [False, True, True, False]]) (m0, m1) = tau._message_from_children() self.assertAllClose(m0 * np.ones((4, 3)), (-0.5 * (2**2 - 2 * 2 * 1 + 1**2) * np.ones( (4, 3)) * np.array([[1], [1], [2], [0]]))) self.assertAllClose( m1 * np.ones((4, 3)), 0.5 * np.array([[1], [1], [2], [0]]) * np.ones((4, 3))) # Check non-ARD Gaussian child mu = np.array([1, 2]) alpha = np.array([3, 4]) Alpha = Gamma(alpha * 1e10, 1e10) Lambda = np.array([[1, 0.5], [0.5, 1]]) X = GaussianARD(mu, Alpha, ndim=1) Y = Gaussian(X, Lambda) y = np.array([5, 6]) Y.observe(y) X.update() (m0, m1) = Alpha._message_from_children() Cov = np.linalg.inv(np.diag(alpha) + Lambda) mean = np.dot(Cov, np.dot(np.diag(alpha), mu) + np.dot(Lambda, y)) self.assertAllClose( m0 * np.ones(2), -0.5 * np.diag( np.outer(mean, mean) + Cov - np.outer(mean, mu) - np.outer(mu, mean) + np.outer(mu, mu))) self.assertAllClose(m1 * np.ones(2), 0.5 * np.ones(2)) pass
import numpy as np np.random.seed(1) from bayespy.nodes import GaussianARD, GaussianMarkovChain, Gamma, Dot M = 30 N = 400 D = 10 alpha = Gamma(1e-5, 1e-5, plates=(D, ), name='alpha') A = GaussianARD(0, alpha, shape=(D, ), plates=(D, ), name='A') X = GaussianMarkovChain(np.zeros(D), 1e-3 * np.identity(D), A, np.ones(D), n=N, name='X') gamma = Gamma(1e-5, 1e-5, plates=(D, ), name='gamma') C = GaussianARD(0, gamma, shape=(D, ), plates=(M, 1), name='C') F = Dot(C, X, name='F') C.initialize_from_random() tau = Gamma(1e-5, 1e-5, name='tau') Y = GaussianARD(F, tau, name='Y') from bayespy.inference import VB Q = VB(X, C, gamma, A, alpha, tau, Y) w = 0.3 a = np.array([[np.cos(w), -np.sin(w), 0, 0], [np.sin(w), np.cos(w), 0, 0], [0, 0, 1, 0], [0, 0, 0, 0]]) c = np.random.randn(M, 4) x = np.empty((N, 4))
# p(\mathbf{y}|\mu,\tau) &= \prod^{9}_{n=0} \mathcal{N}(y_n|\mu,\tau) \\ # p(\mu) &= \mathcal{N}(\mu|0,10^{-6}) \\ # p(\tau) &= \mathcal{G}(\tau|10^{-6},10^{-6}) # \end{split} # $$ # # where $\mathcal{N}$ is the Gaussian distribution parameterized by its mean and precision (i.e., inverse variance), and $\mathcal{G}$ is the gamma distribution parameterized by its shape and rate parameters. Note that we have given quite uninformative priors for the variables $\mu$ and $\tau$. This simple model can also be shown as a directed factor graph: # This model can be constructed in BayesPy as follows: # In[2]: from bayespy.nodes import GaussianARD, Gamma mu = GaussianARD(0, 1e-6) tau = Gamma(1e-6, 1e-6) y = GaussianARD(mu, tau, plates=(10,)) # In[3]: y.observe(data) # Next we want to estimate the posterior distribution. In principle, we could use different inference engines (e.g., MCMC or EP) but currently only variational Bayesian (VB) engine is implemented. The engine is initialized by giving all the nodes of the model: # In[4]: from bayespy.inference import VB Q = VB(mu, tau, y)
import numpy as np np.random.seed(1) data = np.random.normal(5, 10, size=(10, )) from bayespy.nodes import GaussianARD, Gamma mu = GaussianARD(0, 1e-6) tau = Gamma(1e-6, 1e-6) print(mu) print(tau) y = GaussianARD(mu, tau, plates=(10, )) y.observe(data) from bayespy.inference import VB Q = VB(mu, tau, y) Q.update(repeat=20) import bayespy.plot as bpplt bpplt.pyplot.subplot(2, 1, 1) bpplt.pdf(mu, np.linspace(-10, 20, num=100), color='k', name=r'\mu') bpplt.pyplot.subplot(2, 1, 2) bpplt.pdf(tau, np.linspace(1e-6, 0.08, num=100), color='k', name=r'\tau') bpplt.pyplot.tight_layout() bpplt.pyplot.show()
def model(M, N, D, K): """ Construct the linear state-space model with time-varying dynamics For reference, see the following publication: (TODO) """ # # The model block for the latent mixing weight process # # Dynamics matrix with ARD # beta : (K) x () beta = Gamma(1e-5, 1e-5, plates=(K,), name='beta') # B : (K) x (K) B = GaussianARD(np.identity(K), beta, shape=(K,), plates=(K,), name='B', plotter=bpplt.GaussianHintonPlotter(rows=0, cols=1, scale=0), initialize=False) B.initialize_from_value(np.identity(K)) # Mixing weight process, that is, the weights in the linear combination of # state dynamics matrices # S : () x (N,K) S = GaussianMarkovChain(np.ones(K), 1e-6*np.identity(K), B, np.ones(K), n=N, name='S', plotter=bpplt.GaussianMarkovChainPlotter(scale=2), initialize=False) s = 10*np.random.randn(N,K) s[:,0] = 10 S.initialize_from_value(s) # # The model block for the latent states # # Projection matrix of the dynamics matrix # alpha : (K) x () alpha = Gamma(1e-5, 1e-5, plates=(D,K), name='alpha') alpha.initialize_from_value(1*np.ones((D,K))) # A : (D) x (D,K) A = GaussianARD(0, alpha, shape=(D,K), plates=(D,), name='A', plotter=bpplt.GaussianHintonPlotter(rows=0, cols=1, scale=0), initialize=False) # Initialize S and A such that A*S is almost an identity matrix a = np.zeros((D,D,K)) a[np.arange(D),np.arange(D),np.zeros(D,dtype=int)] = 1 a[:,:,0] = np.identity(D) / s[0,0] a[:,:,1:] = 0.1/s[0,0]*np.random.randn(D,D,K-1) A.initialize_from_value(a) # Latent states with dynamics # X : () x (N,D) X = VaryingGaussianMarkovChain(np.zeros(D), # mean of x0 1e-3*np.identity(D), # prec of x0 A, # dynamics matrices S._convert(GaussianMoments)[1:], # temporal weights np.ones(D), # innovation n=N, # time instances name='X', plotter=bpplt.GaussianMarkovChainPlotter(scale=2), initialize=False) X.initialize_from_value(np.random.randn(N,D)) # # The model block for observations # # Mixing matrix from latent space to observation space using ARD # gamma : (D) x () gamma = Gamma(1e-5, 1e-5, plates=(D,), name='gamma') gamma.initialize_from_value(1e-2*np.ones(D)) # C : (M,1) x (D) C = GaussianARD(0, gamma, shape=(D,), plates=(M,1), name='C', plotter=bpplt.GaussianHintonPlotter(rows=0, cols=2, scale=0)) C.initialize_from_value(np.random.randn(M,1,D)) # Noiseless process # F : (M,N) x () F = SumMultiply('d,d', C, X, name='F') # Observation noise # tau : () x () tau = Gamma(1e-5, 1e-5, name='tau') tau.initialize_from_value(1e2) # Observations # Y: (M,N) x () Y = GaussianARD(F, tau, name='Y') # Construct inference machine Q = VB(Y, F, C, gamma, X, A, alpha, tau, S, B, beta) return Q
def run(N=500, seed=42, maxiter=100, plot=True): """ Run deterministic annealing demo for 1-D Gaussian mixture. """ if seed is not None: np.random.seed(seed) mu = GaussianARD(0, 1, plates=(2,), name='means') Z = Categorical([0.3, 0.7], plates=(N,), name='classes') Y = Mixture(Z, GaussianARD, mu, 1, name='observations') # Generate data z = Z.random() data = np.empty(N) for n in range(N): data[n] = [4, -4][z[n]] Y.observe(data) # Initialize means closer to the inferior local optimum in which the # cluster means are swapped mu.initialize_from_value([0, 6]) Q = VB(Y, Z, mu) Q.save() # # Standard VB-EM algorithm # Q.update(repeat=maxiter) mu_vbem = mu.u[0].copy() L_vbem = Q.compute_lowerbound() # # VB-EM with deterministic annealing # Q.load() beta = 0.01 while beta < 1.0: beta = min(beta*1.2, 1.0) print("Set annealing to %.2f" % beta) Q.set_annealing(beta) Q.update(repeat=maxiter, tol=1e-4) mu_anneal = mu.u[0].copy() L_anneal = Q.compute_lowerbound() print("==============================") print("RESULTS FOR VB-EM vs ANNEALING") print("Fixed component probabilities:", np.array([0.3, 0.7])) print("True component means:", np.array([4, -4])) print("VB-EM component means:", mu_vbem) print("VB-EM lower bound:", L_vbem) print("Annealed VB-EM component means:", mu_anneal) print("Annealed VB-EM lower bound:", L_anneal) return
def model(M=20, N=100, D=10, K=3): """ Construct the linear state-space model with switching dynamics. """ # # Switching dynamics (HMM) # # Prior for initial state probabilities rho = Dirichlet(1e-3*np.ones(K), name='rho') # Prior for state transition probabilities V = Dirichlet(1e-3*np.ones(K), plates=(K,), name='V') v = 10*np.identity(K) + 1*np.ones((K,K)) v /= np.sum(v, axis=-1, keepdims=True) V.initialize_from_value(v) # Hidden states (with unknown initial state probabilities and state # transition probabilities) Z = CategoricalMarkovChain(rho, V, states=N-1, name='Z', plotter=bpplt.CategoricalMarkovChainPlotter(), initialize=False) Z.u[0] = np.random.dirichlet(np.ones(K)) Z.u[1] = np.reshape(np.random.dirichlet(0.5*np.ones(K*K), size=(N-2)), (N-2, K, K)) # # Linear state-space models # # Dynamics matrix with ARD # (K,D) x () alpha = Gamma(1e-5, 1e-5, plates=(K,1,D), name='alpha') # (K,1,1,D) x (D) A = GaussianARD(0, alpha, shape=(D,), plates=(K,D), name='A', plotter=bpplt.GaussianHintonPlotter()) A.initialize_from_value(np.identity(D)*np.ones((K,D,D)) + 0.1*np.random.randn(K,D,D)) # Latent states with dynamics # (K,1) x (N,D) X = SwitchingGaussianMarkovChain(np.zeros(D), # mean of x0 1e-3*np.identity(D), # prec of x0 A, # dynamics Z, # dynamics selection np.ones(D), # innovation n=N, # time instances name='X', plotter=bpplt.GaussianMarkovChainPlotter()) X.initialize_from_value(10*np.random.randn(N,D)) # Mixing matrix from latent space to observation space using ARD # (K,1,1,D) x () gamma = Gamma(1e-5, 1e-5, plates=(D,), name='gamma') # (K,M,1) x (D) C = GaussianARD(0, gamma, shape=(D,), plates=(M,1), name='C', plotter=bpplt.GaussianHintonPlotter(rows=-3,cols=-1)) C.initialize_from_value(np.random.randn(M,1,D)) # Underlying noiseless function # (K,M,N) x () F = SumMultiply('i,i', C, X, name='F') # # Mixing the models # # Observation noise tau = Gamma(1e-5, 1e-5, name='tau') tau.initialize_from_value(1e2) # Emission/observation distribution Y = GaussianARD(F, tau, name='Y') Q = VB(Y, F, Z, rho, V, C, gamma, X, A, alpha, tau) return Q
def test_annealing(self): X = GaussianARD(3, 4) X.initialize_from_parameters(-1, 6) Q = VB(X) Q.set_annealing(0.1) # # Check that the gradient is correct # # Initial parameters phi0 = X.phi # Gradient rg = X.get_riemannian_gradient() g = X.get_gradient(rg) # Numerical gradient of the first parameter eps = 1e-6 p0 = X.get_parameters() l0 = Q.compute_lowerbound(ignore_masked=False) g_num = [(), ()] e = eps p1 = p0[0] + e X.set_parameters([p1, p0[1]]) l1 = Q.compute_lowerbound(ignore_masked=False) g_num[0] = (l1 - l0) / eps # Numerical gradient of the second parameter p1 = p0[1] + e X.set_parameters([p0[0], p1]) l1 = Q.compute_lowerbound(ignore_masked=False) g_num[1] = (l1 - l0) / (eps) # Check self.assertAllClose(g[0], g_num[0]) self.assertAllClose(g[1], g_num[1]) # # Gradient should be zero after updating # X.update() # Initial parameters phi0 = X.phi # Numerical gradient of the first parameter eps = 1e-8 p0 = X.get_parameters() l0 = Q.compute_lowerbound(ignore_masked=False) g_num = [(), ()] e = eps p1 = p0[0] + e X.set_parameters([p1, p0[1]]) l1 = Q.compute_lowerbound(ignore_masked=False) g_num[0] = (l1 - l0) / eps # Numerical gradient of the second parameter p1 = p0[1] + e X.set_parameters([p0[0], p1]) l1 = Q.compute_lowerbound(ignore_masked=False) g_num[1] = (l1 - l0) / (eps) # Check self.assertAllClose(0, g_num[0], atol=1e-5) self.assertAllClose(0, g_num[1], atol=1e-5) # Not at the optimum X.initialize_from_parameters(-1, 6) # Initial parameters phi0 = X.phi # Gradient g = X.get_riemannian_gradient() # Parameters after VB-EM update X.update() phi1 = X.phi # Check self.assertAllClose(g[0], phi1[0] - phi0[0]) self.assertAllClose(g[1], phi1[1] - phi0[1]) pass
def test_message_to_parent(self): """ Test the message to parents of Mixture node. """ K = 3 # Broadcasting the moments on the cluster axis Mu = GaussianARD(2, 1, ndim=0, plates=(K, )) (mu, mumu) = Mu._message_to_child() Alpha = Gamma(3, 1, plates=(K, )) (alpha, logalpha) = Alpha._message_to_child() z = Categorical(np.ones(K) / K) X = Mixture(z, GaussianARD, Mu, Alpha) tau = 4 Y = GaussianARD(X, tau) y = 5 Y.observe(y) (x, xx) = X._message_to_child() m = z._message_from_children() self.assertAllClose( m[0] * np.ones(K), random.gaussian_logpdf(xx * alpha, x * alpha * mu, mumu * alpha, logalpha, 0) * np.ones(K)) m = Mu._message_from_children() self.assertAllClose(m[0], 1 / K * (alpha * x) * np.ones(3)) self.assertAllClose(m[1], -0.5 * 1 / K * alpha * np.ones(3)) # Some parameters do not have cluster plate axis Mu = GaussianARD(2, 1, ndim=0, plates=(K, )) (mu, mumu) = Mu._message_to_child() Alpha = Gamma(3, 1) # Note: no cluster plate axis! (alpha, logalpha) = Alpha._message_to_child() z = Categorical(np.ones(K) / K) X = Mixture(z, GaussianARD, Mu, Alpha) tau = 4 Y = GaussianARD(X, tau) y = 5 Y.observe(y) (x, xx) = X._message_to_child() m = z._message_from_children() self.assertAllClose( m[0] * np.ones(K), random.gaussian_logpdf(xx * alpha, x * alpha * mu, mumu * alpha, logalpha, 0) * np.ones(K)) m = Mu._message_from_children() self.assertAllClose(m[0], 1 / K * (alpha * x) * np.ones(3)) self.assertAllClose(m[1], -0.5 * 1 / K * alpha * np.ones(3)) # Cluster assignments do not have as many plate axes as parameters. M = 2 Mu = GaussianARD(2, 1, ndim=0, plates=(K, M)) (mu, mumu) = Mu._message_to_child() Alpha = Gamma(3, 1, plates=(K, M)) (alpha, logalpha) = Alpha._message_to_child() z = Categorical(np.ones(K) / K) X = Mixture(z, GaussianARD, Mu, Alpha, cluster_plate=-2) tau = 4 Y = GaussianARD(X, tau) y = 5 * np.ones(M) Y.observe(y) (x, xx) = X._message_to_child() m = z._message_from_children() self.assertAllClose( m[0] * np.ones(K), np.sum(random.gaussian_logpdf(xx * alpha, x * alpha * mu, mumu * alpha, logalpha, 0) * np.ones( (K, M)), axis=-1)) m = Mu._message_from_children() self.assertAllClose(m[0] * np.ones((K, M)), 1 / K * (alpha * x) * np.ones((K, M))) self.assertAllClose(m[1] * np.ones((K, M)), -0.5 * 1 / K * alpha * np.ones((K, M))) # Mixed distribution broadcasts g # This tests for a found bug. The bug caused an error. Z = Categorical([0.3, 0.5, 0.2]) X = Mixture(Z, Categorical, [[0.2, 0.8], [0.1, 0.9], [0.3, 0.7]]) m = Z._message_from_children() pass
import numpy as np np.random.seed(1) from bayespy.nodes import GaussianARD, GaussianMarkovChain, Gamma, Dot M = 30 N = 400 D = 10 alpha = Gamma(1e-5, 1e-5, plates=(D, ), name='alpha') A = GaussianARD(0, alpha, shape=(D, ), plates=(D, ), name='A') X = GaussianMarkovChain(np.zeros(D), 1e-3 * np.identity(D), A, np.ones(D), n=N, name='X') gamma = Gamma(1e-5, 1e-5, plates=(D, ), name='gamma') C = GaussianARD(0, gamma, shape=(D, ), plates=(M, 1), name='C') F = Dot(C, X, name='F') C.initialize_from_random() tau = Gamma(1e-5, 1e-5, name='tau') Y = GaussianARD(F, tau, name='Y') from bayespy.inference import VB Q = VB(X, C, gamma, A, alpha, tau, Y) w = 0.3 a = np.array([[np.cos(w), -np.sin(w), 0, 0], [np.sin(w), np.cos(w), 0, 0], [0, 0, 1, 0], [0, 0, 0, 0]]) c = np.random.randn(M, 4) x = np.empty((N, 4)) f = np.empty((M, N)) y = np.empty((M, N)) x[0] = 10 * np.random.randn(4)
def test_message_to_parent_mu(self): """ Test that GaussianARD computes the message to the 1st parent correctly. """ # Check formula with uncertain parent alpha mu = GaussianARD(0, 1) alpha = Gamma(2,1) X = GaussianARD(mu, alpha) X.observe(3) (m0, m1) = mu._message_from_children() #(m0, m1) = X._message_to_parent(0) self.assertAllClose(m0, 2*3) self.assertAllClose(m1, -0.5*2) # Check formula with uncertain node mu = GaussianARD(1, 1e10) X = GaussianARD(mu, 2) Y = GaussianARD(X, 1) Y.observe(5) X.update() (m0, m1) = mu._message_from_children() self.assertAllClose(m0, 2 * 1/(2+1)*(2*1+1*5)) self.assertAllClose(m1, -0.5*2) # Check alpha larger than mu mu = GaussianARD(np.zeros((2,3)), 1e10, shape=(2,3)) X = GaussianARD(mu, 2*np.ones((3,2,3))) X.observe(3*np.ones((3,2,3))) (m0, m1) = mu._message_from_children() self.assertAllClose(m0, 2*3 * 3 * np.ones((2,3))) self.assertAllClose(m1, -0.5 * 3 * 2*misc.identity(2,3)) # Check mu larger than alpha mu = GaussianARD(np.zeros((3,2,3)), 1e10, shape=(3,2,3)) X = GaussianARD(mu, 2*np.ones((2,3))) X.observe(3*np.ones((3,2,3))) (m0, m1) = mu._message_from_children() self.assertAllClose(m0, 2 * 3 * np.ones((3,2,3))) self.assertAllClose(m1, -0.5 * 2*misc.identity(3,2,3)) # Check node larger than mu and alpha mu = GaussianARD(np.zeros((2,3)), 1e10, shape=(2,3)) X = GaussianARD(mu, 2*np.ones((3,)), shape=(3,2,3)) X.observe(3*np.ones((3,2,3))) (m0, m1) = mu._message_from_children() self.assertAllClose(m0, 2*3 * 3*np.ones((2,3))) self.assertAllClose(m1, -0.5 * 2 * 3*misc.identity(2,3)) # Check broadcasting of dimensions mu = GaussianARD(np.zeros((2,1)), 1e10, shape=(2,1)) X = GaussianARD(mu, 2*np.ones((2,3)), shape=(2,3)) X.observe(3*np.ones((2,3))) (m0, m1) = mu._message_from_children() self.assertAllClose(m0, 2*3 * 3*np.ones((2,1))) self.assertAllClose(m1, -0.5 * 2 * 3*misc.identity(2,1)) # Check plates for smaller mu than node mu = GaussianARD(0,1, shape=(3,), plates=(4,1,1)) X = GaussianARD(mu, 2*np.ones((3,)), shape=(2,3), plates=(4,5)) X.observe(3*np.ones((4,5,2,3))) (m0, m1) = mu._message_from_children() self.assertAllClose(m0 * np.ones((4,1,1,3)), 2*3 * 5*2*np.ones((4,1,1,3))) self.assertAllClose(m1 * np.ones((4,1,1,3,3)), -0.5*2 * 5*2*misc.identity(3) * np.ones((4,1,1,3,3))) # Check mask mu = GaussianARD(np.zeros((2,1,3)), 1e10, shape=(3,)) X = GaussianARD(mu, 2*np.ones((2,4,3)), shape=(3,), plates=(2,4,)) X.observe(3*np.ones((2,4,3)), mask=[[True, True, True, False], [False, True, False, True]]) (m0, m1) = mu._message_from_children() self.assertAllClose(m0, (2*3 * np.ones((2,1,3)) * np.array([[[3]], [[2]]]))) self.assertAllClose(m1, (-0.5*2 * misc.identity(3) * np.ones((2,1,1,1)) * np.array([[[[3]]], [[[2]]]]))) # Check mask with different shapes mu = GaussianARD(np.zeros((2,1,3)), 1e10, shape=()) X = GaussianARD(mu, 2*np.ones((2,4,3)), shape=(3,), plates=(2,4,)) mask = np.array([[True, True, True, False], [False, True, False, True]]) X.observe(3*np.ones((2,4,3)), mask=mask) (m0, m1) = mu._message_from_children() self.assertAllClose(m0, 2*3 * np.sum(np.ones((2,4,3))*mask[...,None], axis=-2, keepdims=True)) self.assertAllClose(m1, (-0.5*2 * np.sum(np.ones((2,4,3))*mask[...,None], axis=-2, keepdims=True))) # Check non-ARD Gaussian child mu = np.array([1,2]) Mu = GaussianARD(mu, 1e10, shape=(2,)) alpha = np.array([3,4]) Lambda = np.array([[1, 0.5], [0.5, 1]]) X = GaussianARD(Mu, alpha, ndim=1) Y = Gaussian(X, Lambda) y = np.array([5,6]) Y.observe(y) X.update() (m0, m1) = Mu._message_from_children() mean = np.dot(np.linalg.inv(np.diag(alpha)+Lambda), np.dot(np.diag(alpha), mu) + np.dot(Lambda, y)) self.assertAllClose(m0, np.dot(np.diag(alpha), mean)) self.assertAllClose(m1, -0.5*np.diag(alpha)) # Check broadcasted variable axes mu = GaussianARD(np.zeros(1), 1e10, shape=(1,)) X = GaussianARD(mu, 2, shape=(3,)) X.observe(3*np.ones(3)) (m0, m1) = mu._message_from_children() self.assertAllClose(m0, 2*3 * np.sum(np.ones(3), axis=-1, keepdims=True)) self.assertAllClose(m1, -0.5*2 * np.sum(np.identity(3), axis=(-1,-2), keepdims=True)) pass
def test_message_to_child(self): """ Test moments of GaussianARD. """ # Check that moments have full shape when broadcasting X = GaussianARD(np.zeros((2, )), np.ones((3, 2)), shape=(4, 3, 2)) (u0, u1) = X._message_to_child() self.assertEqual(np.shape(u0), (4, 3, 2)) self.assertEqual(np.shape(u1), (4, 3, 2, 4, 3, 2)) # Check the formula X = GaussianARD(2, 3) (u0, u1) = X._message_to_child() self.assertAllClose(u0, 2) self.assertAllClose(u1, 2**2 + 1 / 3) # Check the formula for multidimensional arrays X = GaussianARD(2 * np.ones((2, 1, 4)), 3 * np.ones((2, 3, 1)), ndim=3) (u0, u1) = X._message_to_child() self.assertAllClose(u0, 2 * np.ones((2, 3, 4))) self.assertAllClose( u1, 2**2 * np.ones( (2, 3, 4, 2, 3, 4)) + 1 / 3 * misc.identity(2, 3, 4)) # Check the formula for dim-broadcasted mu X = GaussianARD(2 * np.ones((3, 1)), 3 * np.ones((2, 3, 4)), ndim=3) (u0, u1) = X._message_to_child() self.assertAllClose(u0, 2 * np.ones((2, 3, 4))) self.assertAllClose( u1, 2**2 * np.ones( (2, 3, 4, 2, 3, 4)) + 1 / 3 * misc.identity(2, 3, 4)) # Check the formula for dim-broadcasted alpha X = GaussianARD(2 * np.ones((2, 3, 4)), 3 * np.ones((3, 1)), ndim=3) (u0, u1) = X._message_to_child() self.assertAllClose(u0, 2 * np.ones((2, 3, 4))) self.assertAllClose( u1, 2**2 * np.ones( (2, 3, 4, 2, 3, 4)) + 1 / 3 * misc.identity(2, 3, 4)) # Check the formula for dim-broadcasted mu and alpha X = GaussianARD(2 * np.ones((3, 1)), 3 * np.ones((3, 1)), shape=(2, 3, 4)) (u0, u1) = X._message_to_child() self.assertAllClose(u0, 2 * np.ones((2, 3, 4))) self.assertAllClose( u1, 2**2 * np.ones( (2, 3, 4, 2, 3, 4)) + 1 / 3 * misc.identity(2, 3, 4)) # Check the formula for dim-broadcasted mu with plates mu = GaussianARD(2 * np.ones((5, 1, 3, 4)), np.ones((5, 1, 3, 4)), shape=(3, 4), plates=(5, 1)) X = GaussianARD(mu, 3 * np.ones((5, 2, 3, 4)), shape=(2, 3, 4), plates=(5, )) (u0, u1) = X._message_to_child() self.assertAllClose(u0, 2 * np.ones((5, 2, 3, 4))) self.assertAllClose( u1, 2**2 * np.ones( (5, 2, 3, 4, 2, 3, 4)) + 1 / 3 * misc.identity(2, 3, 4)) # Check posterior X = GaussianARD(2, 3) Y = GaussianARD(X, 1) Y.observe(10) X.update() (u0, u1) = X._message_to_child() self.assertAllClose(u0, 1 / (3 + 1) * (3 * 2 + 1 * 10)) self.assertAllClose(u1, (1 / (3 + 1) * (3 * 2 + 1 * 10))**2 + 1 / (3 + 1)) pass
def test_init(self): """ Test the creation of Gate node """ # Gating scalar node Z = Categorical(np.ones(3) / 3) X = GaussianARD(0, 1, shape=(), plates=(3, )) Y = Gate(Z, X) self.assertEqual(Y.plates, ()) self.assertEqual(Y.dims, ((), ())) # Gating non-scalar node Z = Categorical(np.ones(3) / 3) X = GaussianARD(0, 1, shape=(2, ), plates=(3, )) Y = Gate(Z, X) self.assertEqual(Y.plates, ()) self.assertEqual(Y.dims, ((2, ), (2, 2))) # Plates from Z Z = Categorical(np.ones(3) / 3, plates=(4, )) X = GaussianARD(0, 1, shape=(2, ), plates=(3, )) Y = Gate(Z, X) self.assertEqual(Y.plates, (4, )) self.assertEqual(Y.dims, ((2, ), (2, 2))) # Plates from X Z = Categorical(np.ones(3) / 3) X = GaussianARD(0, 1, shape=(2, ), plates=(4, 3)) Y = Gate(Z, X) self.assertEqual(Y.plates, (4, )) self.assertEqual(Y.dims, ((2, ), (2, 2))) # Plates from Z and X Z = Categorical(np.ones(3) / 3, plates=(5, )) X = GaussianARD(0, 1, shape=(2, ), plates=(4, 1, 3)) Y = Gate(Z, X) self.assertEqual(Y.plates, (4, 5)) self.assertEqual(Y.dims, ((2, ), (2, 2))) # Gating non-default plate Z = Categorical(np.ones(3) / 3) X = GaussianARD(0, 1, shape=(), plates=(3, 4)) Y = Gate(Z, X, gated_plate=-2) self.assertEqual(Y.plates, (4, )) self.assertEqual(Y.dims, ((), ())) # Fixed gating Z = 2 X = GaussianARD(0, 1, shape=(2, ), plates=(3, )) Y = Gate(Z, X) self.assertEqual(Y.plates, ()) self.assertEqual(Y.dims, ((2, ), (2, 2))) # Fixed X Z = Categorical(np.ones(3) / 3) X = [1, 2, 3] Y = Gate(Z, X, moments=GaussianMoments(())) self.assertEqual(Y.plates, ()) self.assertEqual(Y.dims, ((), ())) # Do not accept non-negative cluster plates Z = Categorical(np.ones(3) / 3) X = GaussianARD(0, 1, plates=(3, )) self.assertRaises(ValueError, Gate, Z, X, gated_plate=0) # None of the parents have the cluster plate axis Z = Categorical(np.ones(3) / 3) X = GaussianARD(0, 1) self.assertRaises(ValueError, Gate, Z, X) # Inconsistent cluster plate Z = Categorical(np.ones(3) / 3) X = GaussianARD(0, 1, plates=(2, )) self.assertRaises(ValueError, Gate, Z, X) pass
def test_message_to_parent(self): """ Test the message to parents of Gate node. """ # Unobserved and broadcasting Z = 2 X = GaussianARD(0, 1, shape=(), plates=(3, )) F = Gate(Z, X) Y = GaussianARD(F, 1) m = F._message_to_parent(0) self.assertEqual(len(m), 1) self.assertAllClose(m[0], 0 * np.ones(3)) m = F._message_to_parent(1) self.assertEqual(len(m), 2) self.assertAllClose(m[0] * np.ones(3), [0, 0, 0]) self.assertAllClose(m[1] * np.ones(3), [0, 0, 0]) # Gating scalar node Z = 2 X = GaussianARD([1, 2, 3], 1, shape=(), plates=(3, )) F = Gate(Z, X) Y = GaussianARD(F, 1) Y.observe(10) m = F._message_to_parent(0) self.assertAllClose( m[0], [10 * 1 - 0.5 * 2, 10 * 2 - 0.5 * 5, 10 * 3 - 0.5 * 10]) m = F._message_to_parent(1) self.assertAllClose(m[0], [0, 0, 10]) self.assertAllClose(m[1], [0, 0, -0.5]) # Fixed X Z = 2 X = [1, 2, 3] F = Gate(Z, X, moments=GaussianMoments(())) Y = GaussianARD(F, 1) Y.observe(10) m = F._message_to_parent(0) self.assertAllClose( m[0], [10 * 1 - 0.5 * 1, 10 * 2 - 0.5 * 4, 10 * 3 - 0.5 * 9]) m = F._message_to_parent(1) self.assertAllClose(m[0], [0, 0, 10]) self.assertAllClose(m[1], [0, 0, -0.5]) # Uncertain gating Z = Categorical([0.2, 0.3, 0.5]) X = GaussianARD([1, 2, 3], 1, shape=(), plates=(3, )) F = Gate(Z, X) Y = GaussianARD(F, 1) Y.observe(10) m = F._message_to_parent(0) self.assertAllClose( m[0], [10 * 1 - 0.5 * 2, 10 * 2 - 0.5 * 5, 10 * 3 - 0.5 * 10]) m = F._message_to_parent(1) self.assertAllClose(m[0], [0.2 * 10, 0.3 * 10, 0.5 * 10]) self.assertAllClose(m[1], [-0.5 * 0.2, -0.5 * 0.3, -0.5 * 0.5]) # Plates in Z Z = [2, 0] X = GaussianARD([1, 2, 3], 1, shape=(), plates=(3, )) F = Gate(Z, X) Y = GaussianARD(F, 1) Y.observe([10, 20]) m = F._message_to_parent(0) self.assertAllClose( m[0], [[10 * 1 - 0.5 * 2, 10 * 2 - 0.5 * 5, 10 * 3 - 0.5 * 10], [20 * 1 - 0.5 * 2, 20 * 2 - 0.5 * 5, 20 * 3 - 0.5 * 10]]) m = F._message_to_parent(1) self.assertAllClose(m[0], [20, 0, 10]) self.assertAllClose(m[1], [-0.5, 0, -0.5]) # Plates in X Z = 2 X = GaussianARD([[1, 2, 3], [4, 5, 6]], 1, shape=(), plates=( 2, 3, )) F = Gate(Z, X) Y = GaussianARD(F, 1) Y.observe([10, 20]) m = F._message_to_parent(0) self.assertAllClose(m[0], [ 10 * 1 - 0.5 * 2 + 20 * 4 - 0.5 * 17, 10 * 2 - 0.5 * 5 + 20 * 5 - 0.5 * 26, 10 * 3 - 0.5 * 10 + 20 * 6 - 0.5 * 37 ]) m = F._message_to_parent(1) self.assertAllClose(m[0], [[0, 0, 10], [0, 0, 20]]) self.assertAllClose(m[1] * np.ones((2, 3)), [[0, 0, -0.5], [0, 0, -0.5]]) # Gating non-default plate Z = 2 X = GaussianARD([[1], [2], [3]], 1, shape=(), plates=(3, 1)) F = Gate(Z, X, gated_plate=-2) Y = GaussianARD(F, 1) Y.observe([10]) m = F._message_to_parent(0) self.assertAllClose( m[0], [10 * 1 - 0.5 * 2, 10 * 2 - 0.5 * 5, 10 * 3 - 0.5 * 10]) m = F._message_to_parent(1) self.assertAllClose(m[0], [[0], [0], [10]]) self.assertAllClose(m[1], [[0], [0], [-0.5]]) # Gating non-scalar node Z = 2 X = GaussianARD([[1, 4], [2, 5], [3, 6]], 1, shape=(2, ), plates=(3, )) F = Gate(Z, X) Y = GaussianARD(F, 1) Y.observe([10, 20]) m = F._message_to_parent(0) self.assertAllClose(m[0], [ 10 * 1 - 0.5 * 2 + 20 * 4 - 0.5 * 17, 10 * 2 - 0.5 * 5 + 20 * 5 - 0.5 * 26, 10 * 3 - 0.5 * 10 + 20 * 6 - 0.5 * 37 ]) m = F._message_to_parent(1) I = np.identity(2) self.assertAllClose(m[0], [[0, 0], [0, 0], [10, 20]]) self.assertAllClose(m[1], [0 * I, 0 * I, -0.5 * I]) # Broadcasting the moments on the cluster axis Z = 2 X = GaussianARD(2, 1, shape=(), plates=(3, )) F = Gate(Z, X) Y = GaussianARD(F, 1) Y.observe(10) m = F._message_to_parent(0) self.assertAllClose( m[0], [10 * 2 - 0.5 * 5, 10 * 2 - 0.5 * 5, 10 * 2 - 0.5 * 5]) m = F._message_to_parent(1) self.assertAllClose(m[0], [0, 0, 10]) self.assertAllClose(m[1], [0, 0, -0.5]) pass
def test_message_to_parent(self): """ Test the message to parents of Mixture node. """ K = 3 # Broadcasting the moments on the cluster axis Mu = GaussianARD(2, 1, ndim=0, plates=(K, )) (mu, mumu) = Mu._message_to_child() Alpha = Gamma(3, 1, plates=(K, )) (alpha, logalpha) = Alpha._message_to_child() z = Categorical(np.ones(K) / K) X = Mixture(z, GaussianARD, Mu, Alpha) tau = 4 Y = GaussianARD(X, tau) y = 5 Y.observe(y) (x, xx) = X._message_to_child() m = z._message_from_children() self.assertAllClose( m[0] * np.ones(K), random.gaussian_logpdf(xx * alpha, x * alpha * mu, mumu * alpha, logalpha, 0) * np.ones(K)) m = Mu._message_from_children() self.assertAllClose(m[0], 1 / K * (alpha * x) * np.ones(3)) self.assertAllClose(m[1], -0.5 * 1 / K * alpha * np.ones(3)) # Some parameters do not have cluster plate axis Mu = GaussianARD(2, 1, ndim=0, plates=(K, )) (mu, mumu) = Mu._message_to_child() Alpha = Gamma(3, 1) # Note: no cluster plate axis! (alpha, logalpha) = Alpha._message_to_child() z = Categorical(np.ones(K) / K) X = Mixture(z, GaussianARD, Mu, Alpha) tau = 4 Y = GaussianARD(X, tau) y = 5 Y.observe(y) (x, xx) = X._message_to_child() m = z._message_from_children() self.assertAllClose( m[0] * np.ones(K), random.gaussian_logpdf(xx * alpha, x * alpha * mu, mumu * alpha, logalpha, 0) * np.ones(K)) m = Mu._message_from_children() self.assertAllClose(m[0], 1 / K * (alpha * x) * np.ones(3)) self.assertAllClose(m[1], -0.5 * 1 / K * alpha * np.ones(3)) # Cluster assignments do not have as many plate axes as parameters. M = 2 Mu = GaussianARD(2, 1, ndim=0, plates=(K, M)) (mu, mumu) = Mu._message_to_child() Alpha = Gamma(3, 1, plates=(K, M)) (alpha, logalpha) = Alpha._message_to_child() z = Categorical(np.ones(K) / K) X = Mixture(z, GaussianARD, Mu, Alpha, cluster_plate=-2) tau = 4 Y = GaussianARD(X, tau) y = 5 * np.ones(M) Y.observe(y) (x, xx) = X._message_to_child() m = z._message_from_children() self.assertAllClose( m[0] * np.ones(K), np.sum(random.gaussian_logpdf(xx * alpha, x * alpha * mu, mumu * alpha, logalpha, 0) * np.ones( (K, M)), axis=-1)) m = Mu._message_from_children() self.assertAllClose(m[0] * np.ones((K, M)), 1 / K * (alpha * x) * np.ones((K, M))) self.assertAllClose(m[1] * np.ones((K, M)), -0.5 * 1 / K * alpha * np.ones((K, M))) # Mixed distribution broadcasts g # This tests for a found bug. The bug caused an error. Z = Categorical([0.3, 0.5, 0.2]) X = Mixture(Z, Categorical, [[0.2, 0.8], [0.1, 0.9], [0.3, 0.7]]) m = Z._message_from_children() # # Test nested mixtures # t1 = [1, 1, 0, 3, 3] t2 = [2] p = Dirichlet([1, 1], plates=(4, 3)) X = Mixture(t1, Mixture, t2, Categorical, p) X.observe([1, 1, 0, 0, 0]) p.update() self.assertAllClose(p.phi[0], [ [[1, 1], [1, 1], [2, 1]], [[1, 1], [1, 1], [1, 3]], [[1, 1], [1, 1], [1, 1]], [[1, 1], [1, 1], [3, 1]], ]) # Test sample plates in nested mixtures t1 = Categorical([0.3, 0.7], plates=(5, )) t2 = [[1], [1], [0], [3], [3]] t3 = 2 p = Dirichlet([1, 1], plates=(2, 4, 3)) X = Mixture(t1, Mixture, t2, Mixture, t3, Categorical, p) X.observe([1, 1, 0, 0, 0]) p.update() self.assertAllClose(p.phi[0], [[ [[1, 1], [1, 1], [1.3, 1]], [[1, 1], [1, 1], [1, 1.6]], [[1, 1], [1, 1], [1, 1]], [[1, 1], [1, 1], [1.6, 1]], ], [ [[1, 1], [1, 1], [1.7, 1]], [[1, 1], [1, 1], [1, 2.4]], [[1, 1], [1, 1], [1, 1]], [[1, 1], [1, 1], [2.4, 1]], ]]) # Check that Gate and nested Mixture are equal t1 = Categorical([0.3, 0.7], plates=(5, )) t2 = Categorical([0.1, 0.3, 0.6], plates=(5, 1)) p = Dirichlet([1, 2, 3, 4], plates=(2, 3)) X = Mixture(t1, Mixture, t2, Categorical, p) X.observe([3, 3, 1, 2, 2]) t1_msg = t1._message_from_children() t2_msg = t2._message_from_children() p_msg = p._message_from_children() t1 = Categorical([0.3, 0.7], plates=(5, )) t2 = Categorical([0.1, 0.3, 0.6], plates=(5, 1)) p = Dirichlet([1, 2, 3, 4], plates=(2, 3)) X = Categorical(Gate(t1, Gate(t2, p))) X.observe([3, 3, 1, 2, 2]) t1_msg2 = t1._message_from_children() t2_msg2 = t2._message_from_children() p_msg2 = p._message_from_children() self.assertAllClose(t1_msg[0], t1_msg2[0]) self.assertAllClose(t2_msg[0], t2_msg2[0]) self.assertAllClose(p_msg[0], p_msg2[0]) pass
def test_rotate_plates(self): # Basic test for Gaussian vectors X = GaussianARD(np.random.randn(3,2), np.random.rand(3,2), shape=(2,), plates=(3,)) (u0, u1) = X.get_moments() Cov = u1 - linalg.outer(u0, u0, ndim=1) Q = np.random.randn(3,3) Qu0 = np.einsum('ik,kj->ij', Q, u0) QCov = np.einsum('k,kij->kij', np.sum(Q, axis=0)**2, Cov) Qu1 = QCov + linalg.outer(Qu0, Qu0, ndim=1) X.rotate_plates(Q, plate_axis=-1) (u0, u1) = X.get_moments() self.assertAllClose(u0, Qu0) self.assertAllClose(u1, Qu1) # Test full covariance, that is, with observations X = GaussianARD(np.random.randn(3,2), np.random.rand(3,2), shape=(2,), plates=(3,)) Y = Gaussian(X, [[2.0, 1.5], [1.5, 3.0]], plates=(3,)) Y.observe(np.random.randn(3,2)) X.update() (u0, u1) = X.get_moments() Cov = u1 - linalg.outer(u0, u0, ndim=1) Q = np.random.randn(3,3) Qu0 = np.einsum('ik,kj->ij', Q, u0) QCov = np.einsum('k,kij->kij', np.sum(Q, axis=0)**2, Cov) Qu1 = QCov + linalg.outer(Qu0, Qu0, ndim=1) X.rotate_plates(Q, plate_axis=-1) (u0, u1) = X.get_moments() self.assertAllClose(u0, Qu0) self.assertAllClose(u1, Qu1) pass
def model(M=10, N=100, D=3): """ Construct linear state-space model. See, for instance, the following publication: "Fast variational Bayesian linear state-space model" Luttinen (ECML 2013) """ # Dynamics matrix with ARD alpha = Gamma(1e-5, 1e-5, plates=(D,), name='alpha') A = GaussianARD(0, alpha, shape=(D,), plates=(D,), plotter=bpplt.GaussianHintonPlotter(rows=0, cols=1, scale=0), name='A') A.initialize_from_value(np.identity(D)) # Latent states with dynamics X = GaussianMarkovChain(np.zeros(D), # mean of x0 1e-3*np.identity(D), # prec of x0 A, # dynamics np.ones(D), # innovation n=N, # time instances plotter=bpplt.GaussianMarkovChainPlotter(scale=2), name='X') X.initialize_from_value(np.random.randn(N,D)) # Mixing matrix from latent space to observation space using ARD gamma = Gamma(1e-5, 1e-5, plates=(D,), name='gamma') gamma.initialize_from_value(1e-2*np.ones(D)) C = GaussianARD(0, gamma, shape=(D,), plates=(M,1), plotter=bpplt.GaussianHintonPlotter(rows=0, cols=2, scale=0), name='C') C.initialize_from_value(np.random.randn(M,1,D)) # Observation noise tau = Gamma(1e-5, 1e-5, name='tau') tau.initialize_from_value(1e2) # Underlying noiseless function F = SumMultiply('i,i', C, X, name='F') # Noisy observations Y = GaussianARD(F, tau, name='Y') Q = VB(Y, F, C, gamma, X, A, alpha, tau, C) return Q
def test_message_to_child(self): """ Test the message to child of Concatenate node. """ # Two parents without shapes X1 = GaussianARD(0, 1, plates=(2,), shape=()) X2 = GaussianARD(0, 1, plates=(3,), shape=()) Y = Concatenate(X1, X2) u1 = X1.get_moments() u2 = X2.get_moments() u = Y.get_moments() self.assertAllClose((u[0]*np.ones((5,)))[:2], u1[0]*np.ones((2,))) self.assertAllClose((u[1]*np.ones((5,)))[:2], u1[1]*np.ones((2,))) self.assertAllClose((u[0]*np.ones((5,)))[2:], u2[0]*np.ones((3,))) self.assertAllClose((u[1]*np.ones((5,)))[2:], u2[1]*np.ones((3,))) # Two parents with shapes X1 = GaussianARD(0, 1, plates=(2,), shape=(4,)) X2 = GaussianARD(0, 1, plates=(3,), shape=(4,)) Y = Concatenate(X1, X2) u1 = X1.get_moments() u2 = X2.get_moments() u = Y.get_moments() self.assertAllClose((u[0]*np.ones((5,4)))[:2], u1[0]*np.ones((2,4))) self.assertAllClose((u[1]*np.ones((5,4,4)))[:2], u1[1]*np.ones((2,4,4))) self.assertAllClose((u[0]*np.ones((5,4)))[2:], u2[0]*np.ones((3,4))) self.assertAllClose((u[1]*np.ones((5,4,4)))[2:], u2[1]*np.ones((3,4,4))) # Test with non-constant axis X1 = GaussianARD(0, 1, plates=(2,4), shape=()) X2 = GaussianARD(0, 1, plates=(3,4), shape=()) Y = Concatenate(X1, X2, axis=-2) u1 = X1.get_moments() u2 = X2.get_moments() u = Y.get_moments() self.assertAllClose((u[0]*np.ones((5,4)))[:2], u1[0]*np.ones((2,4))) self.assertAllClose((u[1]*np.ones((5,4)))[:2], u1[1]*np.ones((2,4))) self.assertAllClose((u[0]*np.ones((5,4)))[2:], u2[0]*np.ones((3,4))) self.assertAllClose((u[1]*np.ones((5,4)))[2:], u2[1]*np.ones((3,4))) # Test with constant parent X1 = np.random.randn(2, 4) X2 = GaussianARD(0, 1, plates=(3,), shape=(4,)) Y = Concatenate(X1, X2) u1 = Y.parents[0].get_moments() u2 = X2.get_moments() u = Y.get_moments() self.assertAllClose((u[0]*np.ones((5,4)))[:2], u1[0]*np.ones((2,4))) self.assertAllClose((u[1]*np.ones((5,4,4)))[:2], u1[1]*np.ones((2,4,4))) self.assertAllClose((u[0]*np.ones((5,4)))[2:], u2[0]*np.ones((3,4))) self.assertAllClose((u[1]*np.ones((5,4,4)))[2:], u2[1]*np.ones((3,4,4))) pass
def check(indices, plates, shape, axis=-1, use_mask=False): mu = np.random.rand(*(plates+shape)) alpha = np.random.rand(*(plates+shape)) X = GaussianARD(mu, alpha, shape=shape, plates=plates) Y = Take(X, indices, plate_axis=axis) Z = GaussianARD(Y, 1, shape=shape) z = np.random.randn(*(Z.get_shape(0))) if use_mask: mask = np.mod(np.reshape(np.arange(np.prod(Z.plates)), Z.plates), 2) != 0 else: mask = True Z.observe(z, mask=mask) X.update() (x0, x1) = X.get_moments() # For comparison, build the same model brute force X = GaussianARD(mu, alpha, shape=shape, plates=plates) # Number of trailing plate axes before the take axis N = len(X.plates) + axis # Reshape the take axes into a single axis z_shape = X.plates[:axis] + (-1,) if axis < -1: z_shape = z_shape + X.plates[(axis+1):] z_shape = z_shape + shape z = np.reshape(z, z_shape) # Reshape the take axes into a single axis if use_mask: mask_shape = X.plates[:axis] + (-1,) if axis < -1: mask_shape = mask_shape + X.plates[(axis+1):] mask = np.reshape(mask, mask_shape) for (j, i) in enumerate(range(np.size(indices))): ind = np.array(indices).flatten()[i] index_x = N*(slice(None),) + (ind,) index_z = N*(slice(None),) + (j,) # print(index) Xi = X[index_x] zi = z[index_z] Zi = GaussianARD(Xi, 1, ndim=len(shape)) if use_mask: maski = mask[index_z] else: maski = True Zi.observe(zi, mask=maski) X.update() self.assertAllClose( x0, X.get_moments()[0], ) self.assertAllClose( x1, X.get_moments()[1], ) return
def model(M, N, D, K): """ Construct the linear state-space model with time-varying dynamics For reference, see the following publication: (TODO) """ # # The model block for the latent mixing weight process # # Dynamics matrix with ARD # beta : (K) x () beta = Gamma(1e-5, 1e-5, plates=(K, ), name='beta') # B : (K) x (K) B = GaussianARD(np.identity(K), beta, shape=(K, ), plates=(K, ), name='B', plotter=bpplt.GaussianHintonPlotter(rows=0, cols=1, scale=0), initialize=False) B.initialize_from_value(np.identity(K)) # Mixing weight process, that is, the weights in the linear combination of # state dynamics matrices # S : () x (N,K) S = GaussianMarkovChain(np.ones(K), 1e-6 * np.identity(K), B, np.ones(K), n=N, name='S', plotter=bpplt.GaussianMarkovChainPlotter(scale=2), initialize=False) s = 10 * np.random.randn(N, K) s[:, 0] = 10 S.initialize_from_value(s) # # The model block for the latent states # # Projection matrix of the dynamics matrix # alpha : (K) x () alpha = Gamma(1e-5, 1e-5, plates=(D, K), name='alpha') alpha.initialize_from_value(1 * np.ones((D, K))) # A : (D) x (D,K) A = GaussianARD(0, alpha, shape=(D, K), plates=(D, ), name='A', plotter=bpplt.GaussianHintonPlotter(rows=0, cols=1, scale=0), initialize=False) # Initialize S and A such that A*S is almost an identity matrix a = np.zeros((D, D, K)) a[np.arange(D), np.arange(D), np.zeros(D, dtype=int)] = 1 a[:, :, 0] = np.identity(D) / s[0, 0] a[:, :, 1:] = 0.1 / s[0, 0] * np.random.randn(D, D, K - 1) A.initialize_from_value(a) # Latent states with dynamics # X : () x (N,D) X = VaryingGaussianMarkovChain( np.zeros(D), # mean of x0 1e-3 * np.identity(D), # prec of x0 A, # dynamics matrices S._convert(GaussianMoments)[1:], # temporal weights np.ones(D), # innovation n=N, # time instances name='X', plotter=bpplt.GaussianMarkovChainPlotter(scale=2), initialize=False) X.initialize_from_value(np.random.randn(N, D)) # # The model block for observations # # Mixing matrix from latent space to observation space using ARD # gamma : (D) x () gamma = Gamma(1e-5, 1e-5, plates=(D, ), name='gamma') gamma.initialize_from_value(1e-2 * np.ones(D)) # C : (M,1) x (D) C = GaussianARD(0, gamma, shape=(D, ), plates=(M, 1), name='C', plotter=bpplt.GaussianHintonPlotter(rows=0, cols=2, scale=0)) C.initialize_from_value(np.random.randn(M, 1, D)) # Noiseless process # F : (M,N) x () F = SumMultiply('d,d', C, X, name='F') # Observation noise # tau : () x () tau = Gamma(1e-5, 1e-5, name='tau') tau.initialize_from_value(1e2) # Observations # Y: (M,N) x () Y = GaussianARD(F, tau, name='Y') # Construct inference machine Q = VB(Y, F, C, gamma, X, A, alpha, tau, S, B, beta) return Q
# This is the PCA model from the previous sections import numpy as np np.random.seed(1) from bayespy.nodes import GaussianARD, Gamma, Dot D = 3 X = GaussianARD(0, 1, shape=(D, ), plates=(1, 100), name='X') alpha = Gamma(1e-3, 1e-3, plates=(D, ), name='alpha') C = GaussianARD(0, alpha, shape=(D, ), plates=(10, 1), name='C') F = Dot(C, X) tau = Gamma(1e-3, 1e-3, name='tau') Y = GaussianARD(F, tau) c = np.random.randn(10, 2) x = np.random.randn(2, 100) data = np.dot(c, x) + 0.1 * np.random.randn(10, 100) Y.observe(data) Y.observe(data, mask=[[True], [False], [False], [True], [True], [False], [True], [True], [True], [False]]) from bayespy.inference import VB Q = VB(Y, C, X, alpha, tau) X.initialize_from_parameters(np.random.randn(1, 100, D), 10) from bayespy.inference.vmp import transformations rotX = transformations.RotateGaussianARD(X) rotC = transformations.RotateGaussianARD(C, alpha) R = transformations.RotationOptimizer(rotC, rotX, D) Q = VB(Y, C, X, alpha, tau) Q.callback = R.rotate Q.update(repeat=1000, tol=1e-6) Q.update(repeat=50, tol=np.nan) import bayespy.plot as bpplt
def test_message_to_parent_mu(self): """ Test that GaussianARD computes the message to the 1st parent correctly. """ # Check formula with uncertain parent alpha mu = GaussianARD(0, 1) alpha = Gamma(2, 1) X = GaussianARD(mu, alpha) X.observe(3) (m0, m1) = mu._message_from_children() #(m0, m1) = X._message_to_parent(0) self.assertAllClose(m0, 2 * 3) self.assertAllClose(m1, -0.5 * 2) # Check formula with uncertain node mu = GaussianARD(1, 1e10) X = GaussianARD(mu, 2) Y = GaussianARD(X, 1) Y.observe(5) X.update() (m0, m1) = mu._message_from_children() self.assertAllClose(m0, 2 * 1 / (2 + 1) * (2 * 1 + 1 * 5)) self.assertAllClose(m1, -0.5 * 2) # Check alpha larger than mu mu = GaussianARD(np.zeros((2, 3)), 1e10, shape=(2, 3)) X = GaussianARD(mu, 2 * np.ones((3, 2, 3))) X.observe(3 * np.ones((3, 2, 3))) (m0, m1) = mu._message_from_children() self.assertAllClose(m0, 2 * 3 * 3 * np.ones((2, 3))) self.assertAllClose(m1, -0.5 * 3 * 2 * misc.identity(2, 3)) # Check mu larger than alpha mu = GaussianARD(np.zeros((3, 2, 3)), 1e10, shape=(3, 2, 3)) X = GaussianARD(mu, 2 * np.ones((2, 3))) X.observe(3 * np.ones((3, 2, 3))) (m0, m1) = mu._message_from_children() self.assertAllClose(m0, 2 * 3 * np.ones((3, 2, 3))) self.assertAllClose(m1, -0.5 * 2 * misc.identity(3, 2, 3)) # Check node larger than mu and alpha mu = GaussianARD(np.zeros((2, 3)), 1e10, shape=(2, 3)) X = GaussianARD(mu, 2 * np.ones((3, )), shape=(3, 2, 3)) X.observe(3 * np.ones((3, 2, 3))) (m0, m1) = mu._message_from_children() self.assertAllClose(m0, 2 * 3 * 3 * np.ones((2, 3))) self.assertAllClose(m1, -0.5 * 2 * 3 * misc.identity(2, 3)) # Check broadcasting of dimensions mu = GaussianARD(np.zeros((2, 1)), 1e10, shape=(2, 1)) X = GaussianARD(mu, 2 * np.ones((2, 3)), shape=(2, 3)) X.observe(3 * np.ones((2, 3))) (m0, m1) = mu._message_from_children() self.assertAllClose(m0, 2 * 3 * 3 * np.ones((2, 1))) self.assertAllClose(m1, -0.5 * 2 * 3 * misc.identity(2, 1)) # Check plates for smaller mu than node mu = GaussianARD(0, 1, shape=(3, ), plates=(4, 1, 1)) X = GaussianARD(mu, 2 * np.ones((3, )), shape=(2, 3), plates=(4, 5)) X.observe(3 * np.ones((4, 5, 2, 3))) (m0, m1) = mu._message_from_children() self.assertAllClose(m0 * np.ones((4, 1, 1, 3)), 2 * 3 * 5 * 2 * np.ones((4, 1, 1, 3))) self.assertAllClose( m1 * np.ones((4, 1, 1, 3, 3)), -0.5 * 2 * 5 * 2 * misc.identity(3) * np.ones((4, 1, 1, 3, 3))) # Check mask mu = GaussianARD(np.zeros((2, 1, 3)), 1e10, shape=(3, )) X = GaussianARD(mu, 2 * np.ones((2, 4, 3)), shape=(3, ), plates=( 2, 4, )) X.observe(3 * np.ones((2, 4, 3)), mask=[[True, True, True, False], [False, True, False, True]]) (m0, m1) = mu._message_from_children() self.assertAllClose(m0, (2 * 3 * np.ones( (2, 1, 3)) * np.array([[[3]], [[2]]]))) self.assertAllClose(m1, (-0.5 * 2 * misc.identity(3) * np.ones( (2, 1, 1, 1)) * np.array([[[[3]]], [[[2]]]]))) # Check mask with different shapes mu = GaussianARD(np.zeros((2, 1, 3)), 1e10, shape=()) X = GaussianARD(mu, 2 * np.ones((2, 4, 3)), shape=(3, ), plates=( 2, 4, )) mask = np.array([[True, True, True, False], [False, True, False, True]]) X.observe(3 * np.ones((2, 4, 3)), mask=mask) (m0, m1) = mu._message_from_children() self.assertAllClose( m0, 2 * 3 * np.sum( np.ones((2, 4, 3)) * mask[..., None], axis=-2, keepdims=True)) self.assertAllClose(m1, (-0.5 * 2 * np.sum( np.ones((2, 4, 3)) * mask[..., None], axis=-2, keepdims=True))) # Check non-ARD Gaussian child mu = np.array([1, 2]) Mu = GaussianARD(mu, 1e10, shape=(2, )) alpha = np.array([3, 4]) Lambda = np.array([[1, 0.5], [0.5, 1]]) X = GaussianARD(Mu, alpha, ndim=1) Y = Gaussian(X, Lambda) y = np.array([5, 6]) Y.observe(y) X.update() (m0, m1) = Mu._message_from_children() mean = np.dot(np.linalg.inv(np.diag(alpha) + Lambda), np.dot(np.diag(alpha), mu) + np.dot(Lambda, y)) self.assertAllClose(m0, np.dot(np.diag(alpha), mean)) self.assertAllClose(m1, -0.5 * np.diag(alpha)) # Check broadcasted variable axes mu = GaussianARD(np.zeros(1), 1e10, shape=(1, )) X = GaussianARD(mu, 2, shape=(3, )) X.observe(3 * np.ones(3)) (m0, m1) = mu._message_from_children() self.assertAllClose(m0, 2 * 3 * np.sum(np.ones(3), axis=-1, keepdims=True)) self.assertAllClose( m1, -0.5 * 2 * np.sum(np.identity(3), axis=(-1, -2), keepdims=True)) pass