Exemple #1
0
 def check_lower_bound(shape_mu, shape_alpha, plates_mu=(), **kwargs):
     M = GaussianARD(np.ones(plates_mu + shape_mu),
                     np.ones(plates_mu + shape_mu),
                     shape=shape_mu,
                     plates=plates_mu)
     if not ('ndim' in kwargs or 'shape' in kwargs):
         kwargs['ndim'] = len(shape_mu)
     X = GaussianARD(M,
                     2*np.ones(shape_alpha),
                     **kwargs)
     Y = GaussianARD(X,
                     3*np.ones(X.get_shape(0)),
                     **kwargs)
     Y.observe(4*np.ones(Y.get_shape(0)))
     X.update()
     Cov = 1/(2+3)
     mu = Cov * (2*1 + 3*4)
     x2 = mu**2 + Cov
     logH_X = (+ 0.5*(1+np.log(2*np.pi)) 
               + 0.5*np.log(Cov))
     logp_X = (- 0.5*np.log(2*np.pi) 
               + 0.5*np.log(2) 
               - 0.5*2*(x2 - 2*mu*1 + 1**2+1))
     r = np.prod(X.get_shape(0))
     self.assertAllClose(r * (logp_X + logH_X),
                         X.lower_bound_contribution())
Exemple #2
0
def run():
    k = 2
    c = 5
    s = 2
    x = np.arange(10)
    y = k * x + c + s * np.random.randn(10)

    X=np.vstack([x,np.ones(len(x))]).T
    B = GaussianARD(0, 1e-6, shape=(2,))
    F = SumMultiply('i,i', B, X)
    tau = Gamma(1e-3, 1e-3)

    Y = GaussianARD(F, tau)

    Y.observe(y)
    from bayespy.inference import VB
    Q = VB(Y, B, tau)
    Q.update(repeat=1000)
    xh = np.linspace(-5, 15, 100)
    Xh = np.vstack([xh, np.ones(len(xh))]).T
    Fh = SumMultiply('i,i', B, Xh)


    bpplt.pyplot.figure()
    bpplt.plot(Fh, x=xh, scale=2)
    bpplt.plot(y, x=x, color='r', marker='x', linestyle='None')
    bpplt.plot(k*xh+c, x=xh, color='r');
    bpplt.pyplot.show()
Exemple #3
0
def _setup_linear_regression():
    """
    Setup code for the pdf and contour tests.

    This code is from http://www.bayespy.org/examples/regression.html
    """
    np.random.seed(1)
    k = 2 # slope
    c = 5 # bias
    s = 2 # noise standard deviation

    x = np.arange(10)
    y = k*x + c + s*np.random.randn(10)
    X = np.vstack([x, np.ones(len(x))]).T

    B = GaussianARD(0, 1e-6, shape=(2,))

    F = SumMultiply('i,i', B, X)

    tau = Gamma(1e-3, 1e-3)
    Y = GaussianARD(F, tau)
    Y.observe(y)

    Q = VB(Y, B, tau)
    Q.update(repeat=1000)
    xh = np.linspace(-5, 15, 100)
    Xh = np.vstack([xh, np.ones(len(xh))]).T
    Fh = SumMultiply('i,i', B, Xh)

    return locals()
Exemple #4
0
    def test_mask_to_parent(self):
        """
        Test the mask handling in Mixture node
        """

        K = 3
        Z = Categorical(np.ones(K)/K,
                        plates=(4,5))
        Mu = GaussianARD(0, 1,
                         shape=(2,),
                         plates=(4,K,5))
        Alpha = Gamma(1, 1,
                      plates=(4,K,5,2))
        X = Mixture(Z, GaussianARD, Mu, Alpha, cluster_plate=-2)
        Y = GaussianARD(X, 1)
        mask = np.reshape((np.mod(np.arange(4*5), 2) == 0),
                          (4,5))
        Y.observe(np.ones((4,5,2)), 
                  mask=mask)
        self.assertArrayEqual(Z.mask,
                              mask)
        self.assertArrayEqual(Mu.mask,
                              mask[:,None,:])
        self.assertArrayEqual(Alpha.mask,
                              mask[:,None,:,None])
                         
        pass
Exemple #5
0
    def test_riemannian_gradient(self):
        """Test Riemannian gradient of a Gamma node."""

        #
        # Without observations
        #

        # Construct model
        a = np.random.rand()
        b = np.random.rand()
        tau = Gamma(a, b)
        # Random initialization
        tau.initialize_from_parameters(np.random.rand(),
                                       np.random.rand())
        # Initial parameters
        phi0 = tau.phi
        # Gradient
        g = tau.get_riemannian_gradient()
        # Parameters after VB-EM update
        tau.update()
        phi1 = tau.phi
        # Check
        self.assertAllClose(g[0],
                            phi1[0] - phi0[0])
        self.assertAllClose(g[1],
                            phi1[1] - phi0[1])

        #
        # With observations
        #

        # Construct model
        a = np.random.rand()
        b = np.random.rand()
        tau = Gamma(a, b)
        mu = np.random.randn()
        Y = GaussianARD(mu, tau)
        Y.observe(np.random.randn())
        # Random initialization
        tau.initialize_from_parameters(np.random.rand(),
                                       np.random.rand())
        # Initial parameters
        phi0 = tau.phi
        # Gradient
        g = tau.get_riemannian_gradient()
        # Parameters after VB-EM update
        tau.update()
        phi1 = tau.phi
        # Check
        self.assertAllClose(g[0],
                            phi1[0] - phi0[0])
        self.assertAllClose(g[1],
                            phi1[1] - phi0[1])

        pass
Exemple #6
0
    def test_lower_bound_contribution(self):

        a = 15
        b = 21
        y = 4
        x = Gamma(a, b)
        x.observe(y)
        testing.assert_allclose(
            x.lower_bound_contribution(),
            (
                a * np.log(b) +
                (a - 1) * np.log(y) -
                b * y -
                special.gammaln(a)
            )
        )

        # Just one latent node so we'll get exact marginal likelihood
        #
        # p(Y) = p(Y,X)/p(X|Y) = p(Y|X) * p(X) / p(X|Y)
        a = 2.3
        b = 4.1
        x = 1.9
        y = 4.8
        tau = Gamma(a, b)
        Y = GaussianARD(x, tau)
        Y.observe(y)
        mu = x
        nu = 2 * a
        s2 = b / a
        a_post = a + 0.5
        b_post = b + 0.5*(y - x)**2
        tau.update()
        testing.assert_allclose(
            [-b_post, a_post],
            tau.phi
        )
        testing.assert_allclose(
            Y.lower_bound_contribution() + tau.lower_bound_contribution(), # + tau.g,
            (
                special.gammaln((nu+1)/2)
                - special.gammaln(nu/2)
                - 0.5 * np.log(nu)
                - 0.5 * np.log(np.pi)
                - 0.5 * np.log(s2)
                - 0.5 * (nu + 1) * np.log(
                    1 + (y - mu)**2 / (nu * s2)
                )
            )
        )

        return
Exemple #7
0
    def test_message_to_parents(self):
        """ Check gradient passed to inputs parent node """
        D = 3

        X = Gaussian(np.random.randn(D), random.covariance(D))
        a = Gamma(np.random.rand(D), np.random.rand(D))

        Y = GaussianARD(X, a)
        Y.observe(np.random.randn(D))

        self.assert_message_to_parent(Y, X)
        self.assert_message_to_parent(Y, a)

        pass
Exemple #8
0
 def check(shape, plates, einsum_x, einsum_xx, axis=-1):
     # TODO/FIXME: Improve by having non-diagonal precision/covariance
     # parameter for the Gaussian X
     D = shape[axis]
     X = GaussianARD(np.random.randn(*(plates+shape)),
                     np.random.rand(*(plates+shape)),
                     shape=shape,
                     plates=plates)
     (x, xx) = X.get_moments()
     R = np.random.randn(D,D)
     X.rotate(R, axis=axis)
     (rx, rxxr) = X.get_moments()
     self.assertAllClose(rx,
                         np.einsum(einsum_x, R, x))
     self.assertAllClose(rxxr,
                         np.einsum(einsum_xx, R, xx, R))
     pass
def pca():

    np.random.seed(41)

    M = 10
    N = 3000
    D = 5

    # Construct the PCA model
    alpha = Gamma(1e-3, 1e-3, plates=(D,), name='alpha')
    W = GaussianARD(0, alpha, plates=(M,1), shape=(D,), name='W')
    X = GaussianARD(0, 1, plates=(1,N), shape=(D,), name='X')
    tau = Gamma(1e-3, 1e-3, name='tau')
    W.initialize_from_random()
    F = SumMultiply('d,d->', W, X)
    Y = GaussianARD(F, tau, name='Y')

    # Observe data
    data = np.sum(np.random.randn(M,1,D-1) * np.random.randn(1,N,D-1), axis=-1) + 1e-1 * np.random.randn(M,N)
    Y.observe(data)

    # Initialize VB engine
    Q = VB(Y, X, W, alpha, tau)

    # Take one update step (so phi is ok)
    Q.update(repeat=1)
    Q.save()

    # Run VB-EM
    Q.update(repeat=200)
    bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-')

    # Restore the state
    Q.load()

    # Run Riemannian conjugate gradient
    #Q.optimize(X, alpha, maxiter=100, collapsed=[W, tau])
    Q.optimize(W, tau, maxiter=100, collapsed=[X, alpha])
    bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:')

    bpplt.pyplot.show()
Exemple #10
0
        def check(indices, plates, shape, axis=-1, use_mask=False):
            mu = np.random.rand(*(plates+shape))
            alpha = np.random.rand(*(plates+shape))
            X = GaussianARD(mu, alpha, shape=shape, plates=plates)
            Y = Take(X, indices, plate_axis=axis)
            Z = GaussianARD(Y, 1, shape=shape)
            z = np.random.randn(*(Z.get_shape(0)))
            if use_mask:
                mask = np.mod(np.reshape(np.arange(np.prod(Z.plates)), Z.plates), 2) != 0
            else:
                mask = True
            Z.observe(z, mask=mask)
            X.update()
            (x0, x1) = X.get_moments()

            # For comparison, build the same model brute force
            X = GaussianARD(mu, alpha, shape=shape, plates=plates)

            # Number of trailing plate axes before the take axis
            N = len(X.plates) + axis

            # Reshape the take axes into a single axis
            z_shape = X.plates[:axis] + (-1,)
            if axis < -1:
                z_shape = z_shape + X.plates[(axis+1):]
            z_shape = z_shape + shape
            z = np.reshape(z, z_shape)

            # Reshape the take axes into a single axis
            if use_mask:
                mask_shape = X.plates[:axis] + (-1,)
                if axis < -1:
                    mask_shape = mask_shape + X.plates[(axis+1):]
                mask = np.reshape(mask, mask_shape)

            for (j, i) in enumerate(range(np.size(indices))):
                ind = np.array(indices).flatten()[i]
                index_x = N*(slice(None),) + (ind,)
                index_z = N*(slice(None),) + (j,)
                # print(index)
                Xi = X[index_x]
                zi = z[index_z]
                Zi = GaussianARD(Xi, 1, ndim=len(shape))
                if use_mask:
                    maski = mask[index_z]
                else:
                    maski = True
                Zi.observe(zi, mask=maski)

            X.update()

            self.assertAllClose(
                x0,
                X.get_moments()[0],
            )

            self.assertAllClose(
                x1,
                X.get_moments()[1],
            )

            return
Exemple #11
0
    def test_message_to_child(self):
        """
        Test the message to child of Concatenate node.
        """

        # Two parents without shapes
        X1 = GaussianARD(0, 1, plates=(2,), shape=())
        X2 = GaussianARD(0, 1, plates=(3,), shape=())
        Y = Concatenate(X1, X2)
        u1 = X1.get_moments()
        u2 = X2.get_moments()
        u = Y.get_moments()
        self.assertAllClose((u[0]*np.ones((5,)))[:2],
                            u1[0]*np.ones((2,)))
        self.assertAllClose((u[1]*np.ones((5,)))[:2],
                            u1[1]*np.ones((2,)))
        self.assertAllClose((u[0]*np.ones((5,)))[2:],
                            u2[0]*np.ones((3,)))
        self.assertAllClose((u[1]*np.ones((5,)))[2:],
                            u2[1]*np.ones((3,)))

        # Two parents with shapes
        X1 = GaussianARD(0, 1, plates=(2,), shape=(4,))
        X2 = GaussianARD(0, 1, plates=(3,), shape=(4,))
        Y = Concatenate(X1, X2)
        u1 = X1.get_moments()
        u2 = X2.get_moments()
        u = Y.get_moments()
        self.assertAllClose((u[0]*np.ones((5,4)))[:2],
                            u1[0]*np.ones((2,4)))
        self.assertAllClose((u[1]*np.ones((5,4,4)))[:2],
                            u1[1]*np.ones((2,4,4)))
        self.assertAllClose((u[0]*np.ones((5,4)))[2:],
                            u2[0]*np.ones((3,4)))
        self.assertAllClose((u[1]*np.ones((5,4,4)))[2:],
                            u2[1]*np.ones((3,4,4)))

        # Test with non-constant axis
        X1 = GaussianARD(0, 1, plates=(2,4), shape=())
        X2 = GaussianARD(0, 1, plates=(3,4), shape=())
        Y = Concatenate(X1, X2, axis=-2)
        u1 = X1.get_moments()
        u2 = X2.get_moments()
        u = Y.get_moments()
        self.assertAllClose((u[0]*np.ones((5,4)))[:2],
                            u1[0]*np.ones((2,4)))
        self.assertAllClose((u[1]*np.ones((5,4)))[:2],
                            u1[1]*np.ones((2,4)))
        self.assertAllClose((u[0]*np.ones((5,4)))[2:],
                            u2[0]*np.ones((3,4)))
        self.assertAllClose((u[1]*np.ones((5,4)))[2:],
                            u2[1]*np.ones((3,4)))

        # Test with constant parent
        X1 = np.random.randn(2, 4)
        X2 = GaussianARD(0, 1, plates=(3,), shape=(4,))
        Y = Concatenate(X1, X2)
        u1 = Y.parents[0].get_moments()
        u2 = X2.get_moments()
        u = Y.get_moments()
        self.assertAllClose((u[0]*np.ones((5,4)))[:2],
                            u1[0]*np.ones((2,4)))
        self.assertAllClose((u[1]*np.ones((5,4,4)))[:2],
                            u1[1]*np.ones((2,4,4)))
        self.assertAllClose((u[0]*np.ones((5,4)))[2:],
                            u2[0]*np.ones((3,4)))
        self.assertAllClose((u[1]*np.ones((5,4,4)))[2:],
                            u2[1]*np.ones((3,4,4)))


        pass
Exemple #12
0
    def test_message_to_parent(self):
        """
        Test the message to parents of Concatenate node.
        """

        # Two parents without shapes
        X1 = GaussianARD(0, 1, plates=(2,), shape=())
        X2 = GaussianARD(0, 1, plates=(3,), shape=())
        Z = Concatenate(X1, X2)
        Y = GaussianARD(Z, 1)
        Y.observe(np.random.randn(*Y.get_shape(0)))
        m1 = X1._message_from_children()
        m2 = X2._message_from_children()
        m = Z._message_from_children()
        self.assertAllClose((m[0]*np.ones((5,)))[:2],
                            m1[0]*np.ones((2,)))
        self.assertAllClose((m[1]*np.ones((5,)))[:2],
                            m1[1]*np.ones((2,)))
        self.assertAllClose((m[0]*np.ones((5,)))[2:],
                            m2[0]*np.ones((3,)))
        self.assertAllClose((m[1]*np.ones((5,)))[2:],
                            m2[1]*np.ones((3,)))

        # Two parents with shapes
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", FutureWarning)

            X1 = GaussianARD(0, 1, plates=(2,), shape=(4,6))
            X2 = GaussianARD(0, 1, plates=(3,), shape=(4,6))
            Z = Concatenate(X1, X2)
            Y = GaussianARD(Z, 1)
            Y.observe(np.random.randn(*Y.get_shape(0)))
            m1 = X1._message_from_children()
            m2 = X2._message_from_children()
            m = Z._message_from_children()
            self.assertAllClose((m[0]*np.ones((5,4,6)))[:2],
                                m1[0]*np.ones((2,4,6)))
            self.assertAllClose((m[1]*np.ones((5,4,6,4,6)))[:2],
                                m1[1]*np.ones((2,4,6,4,6)))
            self.assertAllClose((m[0]*np.ones((5,4,6)))[2:],
                                m2[0]*np.ones((3,4,6)))
            self.assertAllClose((m[1]*np.ones((5,4,6,4,6)))[2:],
                                m2[1]*np.ones((3,4,6,4,6)))

            # Two parents with non-default concatenation axis
            X1 = GaussianARD(0, 1, plates=(2,4), shape=())
            X2 = GaussianARD(0, 1, plates=(3,4), shape=())
            Z = Concatenate(X1, X2, axis=-2)
            Y = GaussianARD(Z, 1)
            Y.observe(np.random.randn(*Y.get_shape(0)))
            m1 = X1._message_from_children()
            m2 = X2._message_from_children()
            m = Z._message_from_children()
            self.assertAllClose((m[0]*np.ones((5,4)))[:2],
                                m1[0]*np.ones((2,4)))
            self.assertAllClose((m[1]*np.ones((5,4)))[:2],
                                m1[1]*np.ones((2,4)))
            self.assertAllClose((m[0]*np.ones((5,4)))[2:],
                                m2[0]*np.ones((3,4)))
            self.assertAllClose((m[1]*np.ones((5,4)))[2:],
                                m2[1]*np.ones((3,4)))

            # Constant parent
            X1 = np.random.randn(2,4,6)
            X2 = GaussianARD(0, 1, plates=(3,), shape=(4,6))
            Z = Concatenate(X1, X2)
            Y = GaussianARD(Z, 1)
            Y.observe(np.random.randn(*Y.get_shape(0)))
            m1 = Z._message_to_parent(0)
            m2 = X2._message_from_children()
            m = Z._message_from_children()
            self.assertAllClose((m[0]*np.ones((5,4,6)))[:2],
                                m1[0]*np.ones((2,4,6)))
            self.assertAllClose((m[1]*np.ones((5,4,6,4,6)))[:2],
                                m1[1]*np.ones((2,4,6,4,6)))
            self.assertAllClose((m[0]*np.ones((5,4,6)))[2:],
                                m2[0]*np.ones((3,4,6)))
            self.assertAllClose((m[1]*np.ones((5,4,6,4,6)))[2:],
                                m2[1]*np.ones((3,4,6,4,6)))

        pass
Exemple #13
0
    def test_rotate_plates(self):

        # Basic test for Gaussian vectors
        X = GaussianARD(np.random.randn(3,2),
                        np.random.rand(3,2),
                        shape=(2,),
                        plates=(3,))
        (u0, u1) = X.get_moments()
        Cov = u1 - linalg.outer(u0, u0, ndim=1)
        Q = np.random.randn(3,3)
        Qu0 = np.einsum('ik,kj->ij', Q, u0)
        QCov = np.einsum('k,kij->kij', np.sum(Q, axis=0)**2, Cov)
        Qu1 = QCov + linalg.outer(Qu0, Qu0, ndim=1)
        X.rotate_plates(Q, plate_axis=-1)
        (u0, u1) = X.get_moments()
        self.assertAllClose(u0, Qu0)
        self.assertAllClose(u1, Qu1)

        # Test full covariance, that is, with observations
        X = GaussianARD(np.random.randn(3,2),
                        np.random.rand(3,2),
                        shape=(2,),
                        plates=(3,))
        Y = Gaussian(X, [[2.0, 1.5], [1.5, 3.0]],
                     plates=(3,))
        Y.observe(np.random.randn(3,2))
        X.update()
        (u0, u1) = X.get_moments()
        Cov = u1 - linalg.outer(u0, u0, ndim=1)
        Q = np.random.randn(3,3)
        Qu0 = np.einsum('ik,kj->ij', Q, u0)
        QCov = np.einsum('k,kij->kij', np.sum(Q, axis=0)**2, Cov)
        Qu1 = QCov + linalg.outer(Qu0, Qu0, ndim=1)
        X.rotate_plates(Q, plate_axis=-1)
        (u0, u1) = X.get_moments()
        self.assertAllClose(u0, Qu0)
        self.assertAllClose(u1, Qu1)

        pass
Exemple #14
0
    def test_initialization(self):
        """
        Test initialization methods of GaussianARD
        """

        X = GaussianARD(1, 2, shape=(2,), plates=(3,))

        # Prior initialization
        mu = 1 * np.ones((3, 2))
        alpha = 2 * np.ones((3, 2))
        X.initialize_from_prior()
        u = X._message_to_child()
        self.assertAllClose(u[0]*np.ones((3,2)), 
                            mu)
        self.assertAllClose(u[1]*np.ones((3,2,2)), 
                            linalg.outer(mu, mu, ndim=1) + 
                            misc.diag(1/alpha, ndim=1))

        # Parameter initialization
        mu = np.random.randn(3, 2)
        alpha = np.random.rand(3, 2)
        X.initialize_from_parameters(mu, alpha)
        u = X._message_to_child()
        self.assertAllClose(u[0], mu)
        self.assertAllClose(u[1], linalg.outer(mu, mu, ndim=1) + 
                                  misc.diag(1/alpha, ndim=1))

        # Value initialization
        x = np.random.randn(3, 2)
        X.initialize_from_value(x)
        u = X._message_to_child()
        self.assertAllClose(u[0], x)
        self.assertAllClose(u[1], linalg.outer(x, x, ndim=1))

        # Random initialization
        X.initialize_from_random()

        pass
Exemple #15
0
def model(M=20, N=100, D=10, K=3):
    """
    Construct the linear state-space model with switching dynamics.
    """

    #
    # Switching dynamics (HMM)
    #

    # Prior for initial state probabilities
    rho = Dirichlet(1e-3*np.ones(K),
                    name='rho')

    # Prior for state transition probabilities
    V = Dirichlet(1e-3*np.ones(K),
                  plates=(K,),
                  name='V')
    v = 10*np.identity(K) + 1*np.ones((K,K))
    v /= np.sum(v, axis=-1, keepdims=True)
    V.initialize_from_value(v)

    # Hidden states (with unknown initial state probabilities and state
    # transition probabilities)
    Z = CategoricalMarkovChain(rho, V,
                               states=N-1,
                               name='Z',
                               plotter=bpplt.CategoricalMarkovChainPlotter(),
                               initialize=False)
    Z.u[0] = np.random.dirichlet(np.ones(K))
    Z.u[1] = np.reshape(np.random.dirichlet(0.5*np.ones(K*K), size=(N-2)),
                        (N-2, K, K))

    #
    # Linear state-space models
    #

    # Dynamics matrix with ARD
    # (K,D) x ()
    alpha = Gamma(1e-5,
                  1e-5,
                  plates=(K,1,D),
                  name='alpha')
    # (K,1,1,D) x (D)
    A = GaussianARD(0,
                    alpha,
                    shape=(D,),
                    plates=(K,D),
                    name='A',
                    plotter=bpplt.GaussianHintonPlotter())
    A.initialize_from_value(np.identity(D)*np.ones((K,D,D))
                            + 0.1*np.random.randn(K,D,D))

    # Latent states with dynamics
    # (K,1) x (N,D)
    X = SwitchingGaussianMarkovChain(np.zeros(D),         # mean of x0
                                     1e-3*np.identity(D), # prec of x0
                                     A,                   # dynamics
                                     Z,                   # dynamics selection
                                     np.ones(D),          # innovation
                                     n=N,                 # time instances
                                     name='X',
                                     plotter=bpplt.GaussianMarkovChainPlotter())
    X.initialize_from_value(10*np.random.randn(N,D))

    # Mixing matrix from latent space to observation space using ARD
    # (K,1,1,D) x ()
    gamma = Gamma(1e-5,
                  1e-5,
                  plates=(D,),
                  name='gamma')
    # (K,M,1) x (D)
    C = GaussianARD(0,
                    gamma,
                    shape=(D,),
                    plates=(M,1),
                    name='C',
                    plotter=bpplt.GaussianHintonPlotter(rows=-3,cols=-1))
    C.initialize_from_value(np.random.randn(M,1,D))

    # Underlying noiseless function
    # (K,M,N) x ()
    F = SumMultiply('i,i', 
                    C, 
                    X,
                    name='F')
    
    #
    # Mixing the models
    #

    # Observation noise
    tau = Gamma(1e-5,
                1e-5,
                name='tau')
    tau.initialize_from_value(1e2)

    # Emission/observation distribution
    Y = GaussianARD(F, tau,
                    name='Y')

    Q = VB(Y, F,
           Z, rho, V,
           C, gamma, X, A, alpha,
           tau)

    return Q
    def test_annealing(self):

        X = GaussianARD(3, 4)
        X.initialize_from_parameters(-1, 6)

        Q = VB(X)
        Q.set_annealing(0.1)

        #
        # Check that the gradient is correct
        #

        # Initial parameters 
        phi0 = X.phi
        # Gradient
        rg = X.get_riemannian_gradient()
        g = X.get_gradient(rg)
        # Numerical gradient of the first parameter
        eps = 1e-6
        p0 = X.get_parameters()
        l0 = Q.compute_lowerbound(ignore_masked=False)
        g_num = [(), ()]
        e = eps
        p1 = p0[0] + e
        X.set_parameters([p1, p0[1]])
        l1 = Q.compute_lowerbound(ignore_masked=False)
        g_num[0] = (l1 - l0) / eps
        # Numerical gradient of the second parameter
        p1 = p0[1] + e
        X.set_parameters([p0[0], p1])
        l1 = Q.compute_lowerbound(ignore_masked=False)
        g_num[1] = (l1 - l0) / (eps)
        # Check
        self.assertAllClose(g[0],
                            g_num[0])
        self.assertAllClose(g[1],
                            g_num[1])

        #
        # Gradient should be zero after updating
        #

        X.update()
        # Initial parameters 
        phi0 = X.phi
        # Numerical gradient of the first parameter
        eps = 1e-8
        p0 = X.get_parameters()
        l0 = Q.compute_lowerbound(ignore_masked=False)
        g_num = [(), ()]
        e = eps
        p1 = p0[0] + e
        X.set_parameters([p1, p0[1]])
        l1 = Q.compute_lowerbound(ignore_masked=False)
        g_num[0] = (l1 - l0) / eps
        # Numerical gradient of the second parameter
        p1 = p0[1] + e
        X.set_parameters([p0[0], p1])
        l1 = Q.compute_lowerbound(ignore_masked=False)
        g_num[1] = (l1 - l0) / (eps)
        # Check
        self.assertAllClose(0,
                            g_num[0],
                            atol=1e-5)
        self.assertAllClose(0,
                            g_num[1],
                            atol=1e-5)

        # Not at the optimum
        X.initialize_from_parameters(-1, 6)
        # Initial parameters 
        phi0 = X.phi
        # Gradient
        g = X.get_riemannian_gradient()
        # Parameters after VB-EM update
        X.update()
        phi1 = X.phi
        # Check
        self.assertAllClose(g[0],
                            phi1[0] - phi0[0])
        self.assertAllClose(g[1],
                            phi1[1] - phi0[1])
        

        pass
# p(\mathbf{y}|\mu,\tau) &= \prod^{9}_{n=0} \mathcal{N}(y_n|\mu,\tau) \\
# p(\mu) &= \mathcal{N}(\mu|0,10^{-6}) \\
# p(\tau) &= \mathcal{G}(\tau|10^{-6},10^{-6})
# \end{split}
# $$
# 
# where $\mathcal{N}$ is the Gaussian distribution parameterized by its mean and precision (i.e., inverse variance), and $\mathcal{G}$ is the gamma distribution parameterized by its shape and rate parameters. Note that we have given quite uninformative priors for the variables $\mu$ and $\tau$.  This simple model can also be shown as a directed factor graph:

# This model can be constructed in BayesPy as follows:

# In[2]:

from bayespy.nodes import GaussianARD, Gamma
mu = GaussianARD(0, 1e-6)
tau = Gamma(1e-6, 1e-6)
y = GaussianARD(mu, tau, plates=(10,))


# In[3]:

y.observe(data)


# Next we want to estimate the posterior distribution.  In principle, we could use different inference engines (e.g., MCMC or EP) but currently only variational Bayesian (VB) engine is implemented.  The engine is initialized by giving all the nodes of the model:

# In[4]:

from bayespy.inference import VB
Q = VB(mu, tau, y)

Exemple #18
0
    def test_lowerbound(self):
        """
        Test the variational Bayesian lower bound term for GaussianARD.
        """

        # Test vector formula with full noise covariance
        m = np.random.randn(2)
        alpha = np.random.rand(2)
        y = np.random.randn(2)
        X = GaussianARD(m, alpha, ndim=1)
        V = np.array([[3,1],[1,3]])
        Y = Gaussian(X, V)
        Y.observe(y)
        X.update()
        Cov = np.linalg.inv(np.diag(alpha) + V)
        mu = np.dot(Cov, np.dot(V, y) + alpha*m)
        x2 = np.outer(mu, mu) + Cov
        logH_X = (+ 2*0.5*(1+np.log(2*np.pi)) 
                  + 0.5*np.log(np.linalg.det(Cov)))
        logp_X = (- 2*0.5*np.log(2*np.pi) 
                  + 0.5*np.log(np.linalg.det(np.diag(alpha)))
                  - 0.5*np.sum(np.diag(alpha)
                               * (x2 
                                  - np.outer(mu,m) 
                                  - np.outer(m,mu) 
                                  + np.outer(m,m))))
        self.assertAllClose(logp_X + logH_X,
                            X.lower_bound_contribution())

        def check_lower_bound(shape_mu, shape_alpha, plates_mu=(), **kwargs):
            M = GaussianARD(np.ones(plates_mu + shape_mu),
                            np.ones(plates_mu + shape_mu),
                            shape=shape_mu,
                            plates=plates_mu)
            if not ('ndim' in kwargs or 'shape' in kwargs):
                kwargs['ndim'] = len(shape_mu)
            X = GaussianARD(M,
                            2*np.ones(shape_alpha),
                            **kwargs)
            Y = GaussianARD(X,
                            3*np.ones(X.get_shape(0)),
                            **kwargs)
            Y.observe(4*np.ones(Y.get_shape(0)))
            X.update()
            Cov = 1/(2+3)
            mu = Cov * (2*1 + 3*4)
            x2 = mu**2 + Cov
            logH_X = (+ 0.5*(1+np.log(2*np.pi)) 
                      + 0.5*np.log(Cov))
            logp_X = (- 0.5*np.log(2*np.pi) 
                      + 0.5*np.log(2) 
                      - 0.5*2*(x2 - 2*mu*1 + 1**2+1))
            r = np.prod(X.get_shape(0))
            self.assertAllClose(r * (logp_X + logH_X),
                                X.lower_bound_contribution())
            
        # Test scalar formula
        check_lower_bound((), ())

        # Test array formula
        check_lower_bound((2,3), (2,3))

        # Test dim-broadcasting of mu
        check_lower_bound((3,1), (2,3,4))

        # Test dim-broadcasting of alpha
        check_lower_bound((2,3,4), (3,1))

        # Test dim-broadcasting of mu and alpha
        check_lower_bound((3,1), (3,1),
                          shape=(2,3,4))

        # Test dim-broadcasting of mu with plates
        check_lower_bound((), (),
                          plates_mu=(),
                          shape=(),
                          plates=(5,))

        # BUG: Scalar parents for array variable caused einsum error
        check_lower_bound((), (),
                          shape=(3,))
        
        # BUG: Log-det was summed over plates
        check_lower_bound((), (),
                          shape=(3,),
                          plates=(4,))

        pass
Exemple #19
0
def run(N=500, seed=42, maxiter=100, plot=True):
    """
    Run deterministic annealing demo for 1-D Gaussian mixture.
    """

    if seed is not None:
        np.random.seed(seed)

    mu = GaussianARD(0, 1,
                     plates=(2,),
                     name='means')
    Z = Categorical([0.3, 0.7],
                    plates=(N,),
                    name='classes')
    Y = Mixture(Z, GaussianARD, mu, 1,
                name='observations')

    # Generate data
    z = Z.random()
    data = np.empty(N)
    for n in range(N):
        data[n] = [4, -4][z[n]]

    Y.observe(data)

    # Initialize means closer to the inferior local optimum in which the
    # cluster means are swapped
    mu.initialize_from_value([0, 6])

    Q = VB(Y, Z, mu)
    Q.save()

    #
    # Standard VB-EM algorithm
    #
    Q.update(repeat=maxiter)

    mu_vbem = mu.u[0].copy()
    L_vbem = Q.compute_lowerbound()

    #
    # VB-EM with deterministic annealing
    #
    Q.load()
    beta = 0.01
    while beta < 1.0:
        beta = min(beta*1.2, 1.0)
        print("Set annealing to %.2f" % beta)
        Q.set_annealing(beta)
        Q.update(repeat=maxiter, tol=1e-4)

    mu_anneal = mu.u[0].copy()
    L_anneal = Q.compute_lowerbound()

    print("==============================")
    print("RESULTS FOR VB-EM vs ANNEALING")
    print("Fixed component probabilities:", np.array([0.3, 0.7]))
    print("True component means:", np.array([4, -4]))
    print("VB-EM component means:", mu_vbem)
    print("VB-EM lower bound:", L_vbem)
    print("Annealed VB-EM component means:", mu_anneal)
    print("Annealed VB-EM lower bound:", L_anneal)
    
    return
Exemple #20
0
    def test_message_to_parent_alpha(self):
        """
        Test the message from GaussianARD the 2nd parent (alpha).
        """

        # Check formula with uncertain parent mu
        mu = GaussianARD(1,1)
        tau = Gamma(0.5*1e10, 1e10)
        X = GaussianARD(mu,
                        tau)
        X.observe(3)
        (m0, m1) = tau._message_from_children()
        self.assertAllClose(m0,
                            -0.5*(3**2 - 2*3*1 + 1**2+1))
        self.assertAllClose(m1,
                            0.5)

        # Check formula with uncertain node
        tau = Gamma(1e10, 1e10)
        X = GaussianARD(2, tau)
        Y = GaussianARD(X, 1)
        Y.observe(5)
        X.update()
        (m0, m1) = tau._message_from_children()
        self.assertAllClose(m0,
                            -0.5*(1/(1+1)+3.5**2 - 2*3.5*2 + 2**2))
        self.assertAllClose(m1,
                            0.5)

        # Check alpha larger than mu
        alpha = Gamma(np.ones((3,2,3))*1e10, 1e10)
        X = GaussianARD(np.ones((2,3)),
                        alpha,
                        ndim=3)
        X.observe(2*np.ones((3,2,3)))
        (m0, m1) = alpha._message_from_children()
        self.assertAllClose(m0 * np.ones((3,2,3)),
                            -0.5*(2**2 - 2*2*1 + 1**2) * np.ones((3,2,3)))
        self.assertAllClose(m1*np.ones((3,2,3)),
                            0.5*np.ones((3,2,3)))

        # Check mu larger than alpha
        tau = Gamma(np.ones((2,3))*1e10, 1e10)
        X = GaussianARD(np.ones((3,2,3)),
                        tau,
                        ndim=3)
        X.observe(2*np.ones((3,2,3)))
        (m0, m1) = tau._message_from_children()
        self.assertAllClose(m0,
                            -0.5*(2**2 - 2*2*1 + 1**2) * 3 * np.ones((2,3)))
        self.assertAllClose(m1 * np.ones((2,3)),
                            0.5 * 3 * np.ones((2,3)))

        # Check node larger than mu and alpha
        tau = Gamma(np.ones((3,))*1e10, 1e10)
        X = GaussianARD(np.ones((2,3)),
                        tau,
                        shape=(3,2,3))
        X.observe(2*np.ones((3,2,3)))
        (m0, m1) = tau._message_from_children()
        self.assertAllClose(m0 * np.ones(3),
                            -0.5*(2**2 - 2*2*1 + 1**2) * 6 * np.ones((3,)))
        self.assertAllClose(m1 * np.ones(3),
                            0.5 * 6 * np.ones(3))

        # Check plates for smaller mu than node
        tau = Gamma(np.ones((4,1,2,3))*1e10, 1e10)
        X = GaussianARD(GaussianARD(1, 1, 
                                    shape=(3,),
                                    plates=(4,1,1)),
                        tau,
                        shape=(2,3),
                        plates=(4,5))
        X.observe(2*np.ones((4,5,2,3)))
        (m0, m1) = tau._message_from_children()
        self.assertAllClose(m0 * np.ones((4,1,2,3)),
                            (-0.5 * (2**2 - 2*2*1 + 1**2+1)
                             * 5*np.ones((4,1,2,3))))
        self.assertAllClose(m1 * np.ones((4,1,2,3)),
                            5*0.5 * np.ones((4,1,2,3)))

        # Check mask
        tau = Gamma(np.ones((4,3))*1e10, 1e10)
        X = GaussianARD(np.ones(3),
                        tau,
                        shape=(3,),
                        plates=(2,4,))
        X.observe(2*np.ones((2,4,3)), mask=[[True, False, True, False],
                                            [False, True, True, False]])
        (m0, m1) = tau._message_from_children()
        self.assertAllClose(m0 * np.ones((4,3)),
                            (-0.5 * (2**2 - 2*2*1 + 1**2) 
                             * np.ones((4,3)) 
                             * np.array([[1], [1], [2], [0]])))
        self.assertAllClose(m1 * np.ones((4,3)),
                            0.5 * np.array([[1], [1], [2], [0]]) * np.ones((4,3)))
        
        # Check non-ARD Gaussian child
        mu = np.array([1,2])
        alpha = np.array([3,4])
        Alpha = Gamma(alpha*1e10, 1e10)
        Lambda = np.array([[1, 0.5],
                          [0.5, 1]])
        X = GaussianARD(mu, Alpha, ndim=1)
        Y = Gaussian(X, Lambda)
        y = np.array([5,6])
        Y.observe(y)
        X.update()
        (m0, m1) = Alpha._message_from_children()
        Cov = np.linalg.inv(np.diag(alpha)+Lambda)
        mean = np.dot(Cov, np.dot(np.diag(alpha), mu)
                           + np.dot(Lambda, y))
        self.assertAllClose(m0 * np.ones(2),
                            -0.5 * np.diag(
                                np.outer(mean, mean) + Cov
                                - np.outer(mean, mu)
                                - np.outer(mu, mean)
                                + np.outer(mu, mu)))
        self.assertAllClose(m1 * np.ones(2),
                            0.5 * np.ones(2))
        
        pass
Exemple #21
0
    def test_message_to_parent(self):
        """
        Test the message to parents of Mixture node.
        """

        K = 3

        # Broadcasting the moments on the cluster axis
        Mu = GaussianARD(2, 1,
                         ndim=0,
                         plates=(K,))
        (mu, mumu) = Mu._message_to_child()
        Alpha = Gamma(3, 1,
                      plates=(K,))
        (alpha, logalpha) = Alpha._message_to_child()
        z = Categorical(np.ones(K)/K)
        X = Mixture(z, GaussianARD, Mu, Alpha)
        tau = 4
        Y = GaussianARD(X, tau)
        y = 5
        Y.observe(y)
        (x, xx) = X._message_to_child()
        m = X._message_to_parent(0)
        self.assertAllClose(m[0],
                            random.gaussian_logpdf(xx*alpha,
                                                   x*alpha*mu,
                                                   mumu*alpha,
                                                   logalpha,
                                                   0))
                                                   
        m = X._message_to_parent(1)
        self.assertAllClose(m[0],
                            1/K * (alpha*x) * np.ones(3))
        self.assertAllClose(m[1],
                            -0.5 * 1/K * alpha * np.ones(3))

        # Some parameters do not have cluster plate axis
        Mu = GaussianARD(2, 1,
                         ndim=0,
                         plates=(K,))
        (mu, mumu) = Mu._message_to_child()
        Alpha = Gamma(3, 1) # Note: no cluster plate axis!
        (alpha, logalpha) = Alpha._message_to_child()
        z = Categorical(np.ones(K)/K)
        X = Mixture(z, GaussianARD, Mu, Alpha)
        tau = 4
        Y = GaussianARD(X, tau)
        y = 5
        Y.observe(y)
        (x, xx) = X._message_to_child()
        m = X._message_to_parent(0)
        self.assertAllClose(m[0],
                            random.gaussian_logpdf(xx*alpha,
                                                   x*alpha*mu,
                                                   mumu*alpha,
                                                   logalpha,
                                                   0))
                                                   
        m = X._message_to_parent(1)
        self.assertAllClose(m[0],
                            1/K * (alpha*x) * np.ones(3))
        self.assertAllClose(m[1],
                            -0.5 * 1/K * alpha * np.ones(3))

        # Cluster assignments do not have as many plate axes as parameters.
        M = 2
        Mu = GaussianARD(2, 1,
                         ndim=0,
                         plates=(K,M))
        (mu, mumu) = Mu._message_to_child()
        Alpha = Gamma(3, 1,
                      plates=(K,M))
        (alpha, logalpha) = Alpha._message_to_child()
        z = Categorical(np.ones(K)/K)
        X = Mixture(z, GaussianARD, Mu, Alpha, cluster_plate=-2)
        tau = 4
        Y = GaussianARD(X, tau)
        y = 5 * np.ones(M)
        Y.observe(y)
        (x, xx) = X._message_to_child()
        m = X._message_to_parent(0)
        self.assertAllClose(m[0]*np.ones(K),
                            np.sum(random.gaussian_logpdf(xx*alpha,
                                                          x*alpha*mu,
                                                          mumu*alpha,
                                                          logalpha,
                                                          0) *
                                   np.ones((K,M)),
                                   axis=-1))
                                                   
        m = X._message_to_parent(1)
        self.assertAllClose(m[0] * np.ones((K,M)),
                            1/K * (alpha*x) * np.ones((K,M)))
        self.assertAllClose(m[1] * np.ones((K,M)),
                            -0.5 * 1/K * alpha * np.ones((K,M)))
        

        pass
Exemple #22
0
def model(M, N, D, K):
    """
    Construct the linear state-space model with time-varying dynamics

    For reference, see the following publication:
    (TODO)
    """

    #
    # The model block for the latent mixing weight process
    #
    
    # Dynamics matrix with ARD
    # beta : (K) x ()
    beta = Gamma(1e-5,
                 1e-5,
                 plates=(K,),
                 name='beta')
    # B : (K) x (K)
    B = GaussianARD(np.identity(K),
                    beta,
                    shape=(K,),
                    plates=(K,),
                    name='B',
                    plotter=bpplt.GaussianHintonPlotter(rows=0, 
                                                        cols=1,
                                                        scale=0),
                    initialize=False)
    B.initialize_from_value(np.identity(K))

    # Mixing weight process, that is, the weights in the linear combination of
    # state dynamics matrices
    # S : () x (N,K)
    S = GaussianMarkovChain(np.ones(K),
                            1e-6*np.identity(K),
                            B,
                            np.ones(K),
                            n=N,
                            name='S',
                            plotter=bpplt.GaussianMarkovChainPlotter(scale=2),
                            initialize=False)
    s = 10*np.random.randn(N,K)
    s[:,0] = 10
    S.initialize_from_value(s)

    #
    # The model block for the latent states
    #
        
    # Projection matrix of the dynamics matrix
    # alpha : (K) x ()
    alpha = Gamma(1e-5,
                  1e-5,
                  plates=(D,K),
                  name='alpha')
    alpha.initialize_from_value(1*np.ones((D,K)))
    # A : (D) x (D,K)
    A = GaussianARD(0,
                    alpha,
                    shape=(D,K),
                    plates=(D,),
                    name='A',
                    plotter=bpplt.GaussianHintonPlotter(rows=0, 
                                                        cols=1,
                                                        scale=0),
                    initialize=False)

    # Initialize S and A such that A*S is almost an identity matrix
    a = np.zeros((D,D,K))
    a[np.arange(D),np.arange(D),np.zeros(D,dtype=int)] = 1
    a[:,:,0] = np.identity(D) / s[0,0]
    a[:,:,1:] = 0.1/s[0,0]*np.random.randn(D,D,K-1)
    A.initialize_from_value(a)

    # Latent states with dynamics
    # X : () x (N,D)
    X = VaryingGaussianMarkovChain(np.zeros(D),         # mean of x0
                                   1e-3*np.identity(D), # prec of x0
                                   A,                   # dynamics matrices
                                   S._convert(GaussianMoments)[1:], # temporal weights
                                   np.ones(D),          # innovation
                                   n=N,                 # time instances
                                   name='X',
                                   plotter=bpplt.GaussianMarkovChainPlotter(scale=2),
                                   initialize=False)
    X.initialize_from_value(np.random.randn(N,D))

    #
    # The model block for observations
    #

    # Mixing matrix from latent space to observation space using ARD
    # gamma : (D) x ()
    gamma = Gamma(1e-5,
                  1e-5,
                  plates=(D,),
                  name='gamma')
    gamma.initialize_from_value(1e-2*np.ones(D))
    # C : (M,1) x (D)
    C = GaussianARD(0,
                    gamma,
                    shape=(D,),
                    plates=(M,1),
                    name='C',
                    plotter=bpplt.GaussianHintonPlotter(rows=0,
                                                        cols=2,
                                                        scale=0))
    C.initialize_from_value(np.random.randn(M,1,D))

    # Noiseless process
    # F : (M,N) x ()
    F = SumMultiply('d,d',
                    C,
                    X,
                    name='F')
                  
    # Observation noise
    # tau : () x ()
    tau = Gamma(1e-5,
                1e-5,
                name='tau')
    tau.initialize_from_value(1e2)

    # Observations
    # Y: (M,N) x ()
    Y = GaussianARD(F,
                    tau,
                    name='Y')

    # Construct inference machine
    Q = VB(Y, F, C, gamma, X, A, alpha, tau, S, B, beta)

    return Q
Exemple #23
0
    def test_message_to_parent(self):
        """
        Test the message to parents of Mixture node.
        """

        K = 3

        # Broadcasting the moments on the cluster axis
        Mu = GaussianARD(2, 1,
                         ndim=0,
                         plates=(K,))
        (mu, mumu) = Mu._message_to_child()
        Alpha = Gamma(3, 1,
                      plates=(K,))
        (alpha, logalpha) = Alpha._message_to_child()
        z = Categorical(np.ones(K)/K)
        X = Mixture(z, GaussianARD, Mu, Alpha)
        tau = 4
        Y = GaussianARD(X, tau)
        y = 5
        Y.observe(y)
        (x, xx) = X._message_to_child()
        m = z._message_from_children()
        self.assertAllClose(m[0] * np.ones(K),
                            random.gaussian_logpdf(xx*alpha,
                                                   x*alpha*mu,
                                                   mumu*alpha,
                                                   logalpha,
                                                   0)
                            * np.ones(K))
        m = Mu._message_from_children()
        self.assertAllClose(m[0],
                            1/K * (alpha*x) * np.ones(3))
        self.assertAllClose(m[1],
                            -0.5 * 1/K * alpha * np.ones(3))

        # Some parameters do not have cluster plate axis
        Mu = GaussianARD(2, 1,
                         ndim=0,
                         plates=(K,))
        (mu, mumu) = Mu._message_to_child()
        Alpha = Gamma(3, 1) # Note: no cluster plate axis!
        (alpha, logalpha) = Alpha._message_to_child()
        z = Categorical(np.ones(K)/K)
        X = Mixture(z, GaussianARD, Mu, Alpha)
        tau = 4
        Y = GaussianARD(X, tau)
        y = 5
        Y.observe(y)
        (x, xx) = X._message_to_child()
        m = z._message_from_children()
        self.assertAllClose(m[0] * np.ones(K),
                            random.gaussian_logpdf(xx*alpha,
                                                   x*alpha*mu,
                                                   mumu*alpha,
                                                   logalpha,
                                                   0)
                            * np.ones(K))
                                                   
        m = Mu._message_from_children()
        self.assertAllClose(m[0],
                            1/K * (alpha*x) * np.ones(3))
        self.assertAllClose(m[1],
                            -0.5 * 1/K * alpha * np.ones(3))

        # Cluster assignments do not have as many plate axes as parameters.
        M = 2
        Mu = GaussianARD(2, 1,
                         ndim=0,
                         plates=(K,M))
        (mu, mumu) = Mu._message_to_child()
        Alpha = Gamma(3, 1,
                      plates=(K,M))
        (alpha, logalpha) = Alpha._message_to_child()
        z = Categorical(np.ones(K)/K)
        X = Mixture(z, GaussianARD, Mu, Alpha, cluster_plate=-2)
        tau = 4
        Y = GaussianARD(X, tau)
        y = 5 * np.ones(M)
        Y.observe(y)
        (x, xx) = X._message_to_child()
        m = z._message_from_children()
        self.assertAllClose(m[0]*np.ones(K),
                            np.sum(random.gaussian_logpdf(xx*alpha,
                                                          x*alpha*mu,
                                                          mumu*alpha,
                                                          logalpha,
                                                          0) *
                                   np.ones((K,M)),
                                   axis=-1))
                                                   
        m = Mu._message_from_children()
        self.assertAllClose(m[0] * np.ones((K,M)),
                            1/K * (alpha*x) * np.ones((K,M)))
        self.assertAllClose(m[1] * np.ones((K,M)),
                            -0.5 * 1/K * alpha * np.ones((K,M)))
        

        # Mixed distribution broadcasts g
        # This tests for a found bug. The bug caused an error.
        Z = Categorical([0.3, 0.5, 0.2])
        X = Mixture(Z, Categorical, [[0.2,0.8], [0.1,0.9], [0.3,0.7]])
        m = Z._message_from_children()

        pass
Exemple #24
0
    def test_gradient(self):
        """Test standard gradient of a Gamma node."""
        D = 3

        np.random.seed(42)

        #
        # Without observations
        #

        # Construct model
        a = np.random.rand(D)
        b = np.random.rand(D)
        tau = Gamma(a, b)
        Q = VB(tau)
        # Random initialization
        tau.initialize_from_parameters(np.random.rand(D),
                                       np.random.rand(D))
        # Initial parameters
        phi0 = tau.phi
        # Gradient
        rg = tau.get_riemannian_gradient()
        g = tau.get_gradient(rg)
        # Numerical gradient
        eps = 1e-8
        p0 = tau.get_parameters()
        l0 = Q.compute_lowerbound(ignore_masked=False)
        g_num = [np.zeros(D), np.zeros(D)]
        for i in range(D):
            e = np.zeros(D)
            e[i] = eps
            p1 = p0[0] + e
            tau.set_parameters([p1, p0[1]])
            l1 = Q.compute_lowerbound(ignore_masked=False)
            g_num[0][i] = (l1 - l0) / eps
        for i in range(D):
            e = np.zeros(D)
            e[i] = eps
            p1 = p0[1] + e
            tau.set_parameters([p0[0], p1])
            l1 = Q.compute_lowerbound(ignore_masked=False)
            g_num[1][i] = (l1 - l0) / eps

        # Check
        self.assertAllClose(g[0],
                            g_num[0])
        self.assertAllClose(g[1],
                            g_num[1])

        #
        # With observations
        #

        # Construct model
        a = np.random.rand(D)
        b = np.random.rand(D)
        tau = Gamma(a, b)
        mu = np.random.randn(D)
        Y = GaussianARD(mu, tau)
        Y.observe(np.random.randn(D))
        Q = VB(Y, tau)
        # Random initialization
        tau.initialize_from_parameters(np.random.rand(D),
                                       np.random.rand(D))
        # Initial parameters
        phi0 = tau.phi
        # Gradient
        rg = tau.get_riemannian_gradient()
        g = tau.get_gradient(rg)
        # Numerical gradient
        eps = 1e-8
        p0 = tau.get_parameters()
        l0 = Q.compute_lowerbound(ignore_masked=False)
        g_num = [np.zeros(D), np.zeros(D)]
        for i in range(D):
            e = np.zeros(D)
            e[i] = eps
            p1 = p0[0] + e
            tau.set_parameters([p1, p0[1]])
            l1 = Q.compute_lowerbound(ignore_masked=False)
            g_num[0][i] = (l1 - l0) / eps
        for i in range(D):
            e = np.zeros(D)
            e[i] = eps
            p1 = p0[1] + e
            tau.set_parameters([p0[0], p1])
            l1 = Q.compute_lowerbound(ignore_masked=False)
            g_num[1][i] = (l1 - l0) / eps

        # Check
        self.assertAllClose(g[0],
                            g_num[0])
        self.assertAllClose(g[1],
                            g_num[1])

        pass
Exemple #25
0
    def test_message_to_parent(self):
        """
        Test the message to parents of Gate node.
        """

        # Unobserved and broadcasting
        Z = 2
        X = GaussianARD(0, 1, shape=(), plates=(3,))
        F = Gate(Z, X)
        Y = GaussianARD(F, 1)
        m = F._message_to_parent(0)
        self.assertEqual(len(m), 1)
        self.assertAllClose(m[0], 0*np.ones(3))
        m = F._message_to_parent(1)
        self.assertEqual(len(m), 2)
        self.assertAllClose(m[0]*np.ones(3), [0, 0, 0])
        self.assertAllClose(m[1]*np.ones(3), [0, 0, 0])
        
        # Gating scalar node
        Z = 2
        X = GaussianARD([1,2,3], 1, shape=(), plates=(3,))
        F = Gate(Z, X)
        Y = GaussianARD(F, 1)
        Y.observe(10)
        m = F._message_to_parent(0)
        self.assertAllClose(m[0], [10*1-0.5*2, 10*2-0.5*5, 10*3-0.5*10])
        m = F._message_to_parent(1)
        self.assertAllClose(m[0], [0, 0, 10])
        self.assertAllClose(m[1], [0, 0, -0.5])
        
        # Fixed X
        Z = 2
        X = [1,2,3]
        F = Gate(Z, X, moments=GaussianMoments(0))
        Y = GaussianARD(F, 1)
        Y.observe(10)
        m = F._message_to_parent(0)
        self.assertAllClose(m[0], [10*1-0.5*1, 10*2-0.5*4, 10*3-0.5*9])
        m = F._message_to_parent(1)
        self.assertAllClose(m[0], [0, 0, 10])
        self.assertAllClose(m[1], [0, 0, -0.5])

        # Uncertain gating
        Z = Categorical([0.2, 0.3, 0.5])
        X = GaussianARD([1,2,3], 1, shape=(), plates=(3,))
        F = Gate(Z, X)
        Y = GaussianARD(F, 1)
        Y.observe(10)
        m = F._message_to_parent(0)
        self.assertAllClose(m[0], [10*1-0.5*2, 10*2-0.5*5, 10*3-0.5*10])
        m = F._message_to_parent(1)
        self.assertAllClose(m[0], [0.2*10, 0.3*10, 0.5*10])
        self.assertAllClose(m[1], [-0.5*0.2, -0.5*0.3, -0.5*0.5])

        # Plates in Z
        Z = [2, 0]
        X = GaussianARD([1,2,3], 1, shape=(), plates=(3,))
        F = Gate(Z, X)
        Y = GaussianARD(F, 1)
        Y.observe([10, 20])
        m = F._message_to_parent(0)
        self.assertAllClose(m[0], [[10*1-0.5*2, 10*2-0.5*5, 10*3-0.5*10],
                                   [20*1-0.5*2, 20*2-0.5*5, 20*3-0.5*10]])
        m = F._message_to_parent(1)
        self.assertAllClose(m[0], [20, 0, 10])
        self.assertAllClose(m[1], [-0.5, 0, -0.5])

        # Plates in X
        Z = 2
        X = GaussianARD([[1,2,3], [4,5,6]], 1, shape=(), plates=(2,3,))
        F = Gate(Z, X)
        Y = GaussianARD(F, 1)
        Y.observe([10, 20])
        m = F._message_to_parent(0)
        self.assertAllClose(m[0], [10*1-0.5*2 + 20*4-0.5*17,
                                   10*2-0.5*5 + 20*5-0.5*26,
                                   10*3-0.5*10 + 20*6-0.5*37])
        m = F._message_to_parent(1)
        self.assertAllClose(m[0], [[0, 0, 10],
                                   [0, 0, 20]])
        self.assertAllClose(m[1]*np.ones((2,3)), [[0, 0, -0.5],
                                                  [0, 0, -0.5]])

        # Gating non-default plate
        Z = 2
        X = GaussianARD([[1],[2],[3]], 1, shape=(), plates=(3,1))
        F = Gate(Z, X, gated_plate=-2)
        Y = GaussianARD(F, 1)
        Y.observe([10])
        m = F._message_to_parent(0)
        self.assertAllClose(m[0], [10*1-0.5*2, 10*2-0.5*5, 10*3-0.5*10])
        m = F._message_to_parent(1)
        self.assertAllClose(m[0], [[0], [0], [10]])
        self.assertAllClose(m[1], [[0], [0], [-0.5]])

        # Gating non-scalar node
        Z = 2
        X = GaussianARD([[1,4],[2,5],[3,6]], 1, shape=(2,), plates=(3,))
        F = Gate(Z, X)
        Y = GaussianARD(F, 1)
        Y.observe([10,20])
        m = F._message_to_parent(0)
        self.assertAllClose(m[0], [10*1-0.5*2 + 20*4-0.5*17,
                                   10*2-0.5*5 + 20*5-0.5*26,
                                   10*3-0.5*10 + 20*6-0.5*37])
        m = F._message_to_parent(1)
        I = np.identity(2)
        self.assertAllClose(m[0], [[0,0], [0,0], [10,20]])
        self.assertAllClose(m[1], [0*I, 0*I, -0.5*I])
        
        # Broadcasting the moments on the cluster axis
        Z = 2
        X = GaussianARD(2, 1, shape=(), plates=(3,))
        F = Gate(Z, X)
        Y = GaussianARD(F, 1)
        Y.observe(10)
        m = F._message_to_parent(0)
        self.assertAllClose(m[0], [10*2-0.5*5, 10*2-0.5*5, 10*2-0.5*5])
        m = F._message_to_parent(1)
        self.assertAllClose(m[0], [0, 0, 10])
        self.assertAllClose(m[1], [0, 0, -0.5])

        pass
Exemple #26
0
    def test_message_to_child(self):
        """
        Test moments of GaussianARD.
        """

        # Check that moments have full shape when broadcasting
        X = GaussianARD(np.zeros((2,)),
                        np.ones((3,2)),
                        shape=(4,3,2))
        (u0, u1) = X._message_to_child()
        self.assertEqual(np.shape(u0),
                         (4,3,2))
        self.assertEqual(np.shape(u1),
                         (4,3,2,4,3,2))

        # Check the formula
        X = GaussianARD(2, 3)
        (u0, u1) = X._message_to_child()
        self.assertAllClose(u0, 2)
        self.assertAllClose(u1, 2**2 + 1/3)

        # Check the formula for multidimensional arrays
        X = GaussianARD(2*np.ones((2,1,4)),
                        3*np.ones((2,3,1)),
                        ndim=3)
        (u0, u1) = X._message_to_child()
        self.assertAllClose(u0, 2*np.ones((2,3,4)))
        self.assertAllClose(u1, 
                            2**2 * np.ones((2,3,4,2,3,4))
                            + 1/3 * misc.identity(2,3,4))
                            

        # Check the formula for dim-broadcasted mu
        X = GaussianARD(2*np.ones((3,1)),
                        3*np.ones((2,3,4)),
                        ndim=3)
        (u0, u1) = X._message_to_child()
        self.assertAllClose(u0, 2*np.ones((2,3,4)))
        self.assertAllClose(u1, 
                            2**2 * np.ones((2,3,4,2,3,4))
                            + 1/3 * misc.identity(2,3,4))
                            
        # Check the formula for dim-broadcasted alpha
        X = GaussianARD(2*np.ones((2,3,4)),
                        3*np.ones((3,1)),
                        ndim=3)
        (u0, u1) = X._message_to_child()
        self.assertAllClose(u0, 2*np.ones((2,3,4)))
        self.assertAllClose(u1, 
                            2**2 * np.ones((2,3,4,2,3,4))
                            + 1/3 * misc.identity(2,3,4))
                            
        # Check the formula for dim-broadcasted mu and alpha
        X = GaussianARD(2*np.ones((3,1)),
                        3*np.ones((3,1)),
                        shape=(2,3,4))
        (u0, u1) = X._message_to_child()
        self.assertAllClose(u0, 2*np.ones((2,3,4)))
        self.assertAllClose(u1, 
                            2**2 * np.ones((2,3,4,2,3,4))
                            + 1/3 * misc.identity(2,3,4))
                            
        # Check the formula for dim-broadcasted mu with plates
        mu = GaussianARD(2*np.ones((5,1,3,4)),
                         np.ones((5,1,3,4)),
                         shape=(3,4),
                         plates=(5,1))
        X = GaussianARD(mu,
                        3*np.ones((5,2,3,4)),
                        shape=(2,3,4),
                        plates=(5,))
        (u0, u1) = X._message_to_child()
        self.assertAllClose(u0, 2*np.ones((5,2,3,4)))
        self.assertAllClose(u1, 
                            2**2 * np.ones((5,2,3,4,2,3,4))
                            + 1/3 * misc.identity(2,3,4))

        # Check posterior
        X = GaussianARD(2, 3)
        Y = GaussianARD(X, 1)
        Y.observe(10)
        X.update()
        (u0, u1) = X._message_to_child()
        self.assertAllClose(u0,
                            1/(3+1) * (3*2 + 1*10))
        self.assertAllClose(u1,
                            (1/(3+1) * (3*2 + 1*10))**2 + 1/(3+1))
        
        pass
    def test_message_to_parent(self):
        """
        Test the message to parents of Mixture node.
        """

        K = 3

        # Broadcasting the moments on the cluster axis
        Mu = GaussianARD(2, 1,
                         ndim=0,
                         plates=(K,))
        (mu, mumu) = Mu._message_to_child()
        Alpha = Gamma(3, 1,
                      plates=(K,))
        (alpha, logalpha) = Alpha._message_to_child()
        z = Categorical(np.ones(K)/K)
        X = Mixture(z, GaussianARD, Mu, Alpha)
        tau = 4
        Y = GaussianARD(X, tau)
        y = 5
        Y.observe(y)
        (x, xx) = X._message_to_child()
        m = z._message_from_children()
        self.assertAllClose(m[0] * np.ones(K),
                            random.gaussian_logpdf(xx*alpha,
                                                   x*alpha*mu,
                                                   mumu*alpha,
                                                   logalpha,
                                                   0)
                            * np.ones(K))
        m = Mu._message_from_children()
        self.assertAllClose(m[0],
                            1/K * (alpha*x) * np.ones(3))
        self.assertAllClose(m[1],
                            -0.5 * 1/K * alpha * np.ones(3))

        # Some parameters do not have cluster plate axis
        Mu = GaussianARD(2, 1,
                         ndim=0,
                         plates=(K,))
        (mu, mumu) = Mu._message_to_child()
        Alpha = Gamma(3, 1) # Note: no cluster plate axis!
        (alpha, logalpha) = Alpha._message_to_child()
        z = Categorical(np.ones(K)/K)
        X = Mixture(z, GaussianARD, Mu, Alpha)
        tau = 4
        Y = GaussianARD(X, tau)
        y = 5
        Y.observe(y)
        (x, xx) = X._message_to_child()
        m = z._message_from_children()
        self.assertAllClose(m[0] * np.ones(K),
                            random.gaussian_logpdf(xx*alpha,
                                                   x*alpha*mu,
                                                   mumu*alpha,
                                                   logalpha,
                                                   0)
                            * np.ones(K))
                                                   
        m = Mu._message_from_children()
        self.assertAllClose(m[0],
                            1/K * (alpha*x) * np.ones(3))
        self.assertAllClose(m[1],
                            -0.5 * 1/K * alpha * np.ones(3))

        # Cluster assignments do not have as many plate axes as parameters.
        M = 2
        Mu = GaussianARD(2, 1,
                         ndim=0,
                         plates=(K,M))
        (mu, mumu) = Mu._message_to_child()
        Alpha = Gamma(3, 1,
                      plates=(K,M))
        (alpha, logalpha) = Alpha._message_to_child()
        z = Categorical(np.ones(K)/K)
        X = Mixture(z, GaussianARD, Mu, Alpha, cluster_plate=-2)
        tau = 4
        Y = GaussianARD(X, tau)
        y = 5 * np.ones(M)
        Y.observe(y)
        (x, xx) = X._message_to_child()
        m = z._message_from_children()
        self.assertAllClose(m[0]*np.ones(K),
                            np.sum(random.gaussian_logpdf(xx*alpha,
                                                          x*alpha*mu,
                                                          mumu*alpha,
                                                          logalpha,
                                                          0) *
                                   np.ones((K,M)),
                                   axis=-1))
                                                   
        m = Mu._message_from_children()
        self.assertAllClose(m[0] * np.ones((K,M)),
                            1/K * (alpha*x) * np.ones((K,M)))
        self.assertAllClose(m[1] * np.ones((K,M)),
                            -0.5 * 1/K * alpha * np.ones((K,M)))
        

        # Mixed distribution broadcasts g
        # This tests for a found bug. The bug caused an error.
        Z = Categorical([0.3, 0.5, 0.2])
        X = Mixture(Z, Categorical, [[0.2,0.8], [0.1,0.9], [0.3,0.7]])
        m = Z._message_from_children()

        #
        # Test nested mixtures
        #
        t1 = [1, 1, 0, 3, 3]
        t2 = [2]
        p = Dirichlet([1, 1], plates=(4, 3))
        X = Mixture(t1, Mixture, t2, Categorical, p)
        X.observe([1, 1, 0, 0, 0])
        p.update()
        self.assertAllClose(
            p.phi[0],
            [
                [[1, 1], [1, 1], [2, 1]],
                [[1, 1], [1, 1], [1, 3]],
                [[1, 1], [1, 1], [1, 1]],
                [[1, 1], [1, 1], [3, 1]],
            ]
        )

        # Test sample plates in nested mixtures
        t1 = Categorical([0.3, 0.7], plates=(5,))
        t2 = [[1], [1], [0], [3], [3]]
        t3 = 2
        p = Dirichlet([1, 1], plates=(2, 4, 3))
        X = Mixture(t1, Mixture, t2, Mixture, t3, Categorical, p)
        X.observe([1, 1, 0, 0, 0])
        p.update()
        self.assertAllClose(
            p.phi[0],
            [
                [
                    [[1, 1], [1, 1], [1.3, 1]],
                    [[1, 1], [1, 1], [1, 1.6]],
                    [[1, 1], [1, 1], [1, 1]],
                    [[1, 1], [1, 1], [1.6, 1]],
                ],
                [
                    [[1, 1], [1, 1], [1.7, 1]],
                    [[1, 1], [1, 1], [1, 2.4]],
                    [[1, 1], [1, 1], [1, 1]],
                    [[1, 1], [1, 1], [2.4, 1]],
                ]
            ]
        )

        # Check that Gate and nested Mixture are equal
        t1 = Categorical([0.3, 0.7], plates=(5,))
        t2 = Categorical([0.1, 0.3, 0.6], plates=(5, 1))
        p = Dirichlet([1, 2, 3, 4], plates=(2, 3))
        X = Mixture(t1, Mixture, t2, Categorical, p)
        X.observe([3, 3, 1, 2, 2])
        t1_msg = t1._message_from_children()
        t2_msg = t2._message_from_children()
        p_msg = p._message_from_children()
        t1 = Categorical([0.3, 0.7], plates=(5,))
        t2 = Categorical([0.1, 0.3, 0.6], plates=(5, 1))
        p = Dirichlet([1, 2, 3, 4], plates=(2, 3))
        X = Categorical(Gate(t1, Gate(t2, p)))
        X.observe([3, 3, 1, 2, 2])
        t1_msg2 = t1._message_from_children()
        t2_msg2 = t2._message_from_children()
        p_msg2 = p._message_from_children()
        self.assertAllClose(t1_msg[0], t1_msg2[0])
        self.assertAllClose(t2_msg[0], t2_msg2[0])
        self.assertAllClose(p_msg[0], p_msg2[0])

        pass
Exemple #28
0
def model(M=10, N=100, D=3):
    """
    Construct linear state-space model.

    See, for instance, the following publication:
    "Fast variational Bayesian linear state-space model"
    Luttinen (ECML 2013)
    """

    # Dynamics matrix with ARD
    alpha = Gamma(1e-5,
                  1e-5,
                  plates=(D,),
                  name='alpha')
    A = GaussianARD(0,
                    alpha,
                    shape=(D,),
                    plates=(D,),
                    plotter=bpplt.GaussianHintonPlotter(rows=0, 
                                                        cols=1,
                                                        scale=0),
                    name='A')
    A.initialize_from_value(np.identity(D))

    # Latent states with dynamics
    X = GaussianMarkovChain(np.zeros(D),         # mean of x0
                            1e-3*np.identity(D), # prec of x0
                            A,                   # dynamics
                            np.ones(D),          # innovation
                            n=N,                 # time instances
                            plotter=bpplt.GaussianMarkovChainPlotter(scale=2),
                            name='X')
    X.initialize_from_value(np.random.randn(N,D))

    # Mixing matrix from latent space to observation space using ARD
    gamma = Gamma(1e-5,
                  1e-5,
                  plates=(D,),
                  name='gamma')
    gamma.initialize_from_value(1e-2*np.ones(D))
    C = GaussianARD(0,
                    gamma,
                    shape=(D,),
                    plates=(M,1),
                    plotter=bpplt.GaussianHintonPlotter(rows=0,
                                                        cols=2,
                                                        scale=0),
                    name='C')
    C.initialize_from_value(np.random.randn(M,1,D))

    # Observation noise
    tau = Gamma(1e-5,
                1e-5,
                name='tau')
    tau.initialize_from_value(1e2)

    # Underlying noiseless function
    F = SumMultiply('i,i', 
                    C, 
                    X,
                    name='F')
    
    # Noisy observations
    Y = GaussianARD(F,
                    tau,
                    name='Y')

    Q = VB(Y, F, C, gamma, X, A, alpha, tau, C)

    return Q
Exemple #29
0
    def test_message_to_parent_mu(self):
        """
        Test that GaussianARD computes the message to the 1st parent correctly.
        """

        # Check formula with uncertain parent alpha
        mu = GaussianARD(0, 1)
        alpha = Gamma(2,1)
        X = GaussianARD(mu,
                        alpha)
        X.observe(3)
        (m0, m1) = mu._message_from_children()
        #(m0, m1) = X._message_to_parent(0)
        self.assertAllClose(m0,
                            2*3)
        self.assertAllClose(m1,
                            -0.5*2)

        # Check formula with uncertain node
        mu = GaussianARD(1, 1e10)
        X = GaussianARD(mu, 2)
        Y = GaussianARD(X, 1)
        Y.observe(5)
        X.update()
        (m0, m1) = mu._message_from_children()
        self.assertAllClose(m0,
                            2 * 1/(2+1)*(2*1+1*5))
        self.assertAllClose(m1,
                            -0.5*2)

        # Check alpha larger than mu
        mu = GaussianARD(np.zeros((2,3)), 1e10, shape=(2,3))
        X = GaussianARD(mu,
                        2*np.ones((3,2,3)))
        X.observe(3*np.ones((3,2,3)))
        (m0, m1) = mu._message_from_children()
        self.assertAllClose(m0,
                            2*3 * 3 * np.ones((2,3)))
        self.assertAllClose(m1,
                            -0.5 * 3 * 2*misc.identity(2,3))

        # Check mu larger than alpha
        mu = GaussianARD(np.zeros((3,2,3)), 1e10, shape=(3,2,3))
        X = GaussianARD(mu,
                        2*np.ones((2,3)))
        X.observe(3*np.ones((3,2,3)))
        (m0, m1) = mu._message_from_children()
        self.assertAllClose(m0,
                            2 * 3 * np.ones((3,2,3)))
        self.assertAllClose(m1,
                            -0.5 * 2*misc.identity(3,2,3))

        # Check node larger than mu and alpha
        mu = GaussianARD(np.zeros((2,3)), 1e10, shape=(2,3))
        X = GaussianARD(mu,
                        2*np.ones((3,)),
                        shape=(3,2,3))
        X.observe(3*np.ones((3,2,3)))
        (m0, m1) = mu._message_from_children()
        self.assertAllClose(m0,
                            2*3 * 3*np.ones((2,3)))
        self.assertAllClose(m1,
                            -0.5 * 2 * 3*misc.identity(2,3))

        # Check broadcasting of dimensions
        mu = GaussianARD(np.zeros((2,1)), 1e10, shape=(2,1))
        X = GaussianARD(mu,
                        2*np.ones((2,3)),
                        shape=(2,3))
        X.observe(3*np.ones((2,3)))
        (m0, m1) = mu._message_from_children()
        self.assertAllClose(m0,
                            2*3 * 3*np.ones((2,1)))
        self.assertAllClose(m1,
                            -0.5 * 2 * 3*misc.identity(2,1))

        # Check plates for smaller mu than node
        mu = GaussianARD(0,1, 
                         shape=(3,),
                         plates=(4,1,1))
        X = GaussianARD(mu,
                        2*np.ones((3,)),
                        shape=(2,3),
                        plates=(4,5))
        X.observe(3*np.ones((4,5,2,3)))
        (m0, m1) = mu._message_from_children()
        self.assertAllClose(m0 * np.ones((4,1,1,3)),
                            2*3 * 5*2*np.ones((4,1,1,3)))
        self.assertAllClose(m1 * np.ones((4,1,1,3,3)),
                            -0.5*2 * 5*2*misc.identity(3) * np.ones((4,1,1,3,3)))

        # Check mask
        mu = GaussianARD(np.zeros((2,1,3)), 1e10, shape=(3,))
        X = GaussianARD(mu,
                        2*np.ones((2,4,3)),
                        shape=(3,),
                        plates=(2,4,))
        X.observe(3*np.ones((2,4,3)), mask=[[True, True, True, False],
                                            [False, True, False, True]])
        (m0, m1) = mu._message_from_children()
        self.assertAllClose(m0,
                            (2*3 * np.ones((2,1,3)) 
                             * np.array([[[3]], [[2]]])))
        self.assertAllClose(m1,
                            (-0.5*2 * misc.identity(3)
                             * np.ones((2,1,1,1))
                             * np.array([[[[3]]], [[[2]]]])))

        # Check mask with different shapes
        mu = GaussianARD(np.zeros((2,1,3)), 1e10, shape=())
        X = GaussianARD(mu,
                        2*np.ones((2,4,3)),
                        shape=(3,),
                        plates=(2,4,))
        mask = np.array([[True, True, True, False],
                         [False, True, False, True]])
        X.observe(3*np.ones((2,4,3)), mask=mask)
        (m0, m1) = mu._message_from_children()
        self.assertAllClose(m0,
                            2*3 * np.sum(np.ones((2,4,3))*mask[...,None], 
                                         axis=-2,
                                         keepdims=True))
        self.assertAllClose(m1,
                            (-0.5*2 * np.sum(np.ones((2,4,3))*mask[...,None],
                                             axis=-2,
                                             keepdims=True)))

        # Check non-ARD Gaussian child
        mu = np.array([1,2])
        Mu = GaussianARD(mu, 1e10, shape=(2,))
        alpha = np.array([3,4])
        Lambda = np.array([[1, 0.5],
                          [0.5, 1]])
        X = GaussianARD(Mu, alpha, ndim=1)
        Y = Gaussian(X, Lambda)
        y = np.array([5,6])
        Y.observe(y)
        X.update()
        (m0, m1) = Mu._message_from_children()
        mean = np.dot(np.linalg.inv(np.diag(alpha)+Lambda),
                      np.dot(np.diag(alpha), mu)
                      + np.dot(Lambda, y))
        self.assertAllClose(m0,
                            np.dot(np.diag(alpha), mean))
        self.assertAllClose(m1,
                            -0.5*np.diag(alpha))

        # Check broadcasted variable axes
        mu = GaussianARD(np.zeros(1), 1e10, shape=(1,))
        X = GaussianARD(mu,
                        2,
                        shape=(3,))
        X.observe(3*np.ones(3))
        (m0, m1) = mu._message_from_children()
        self.assertAllClose(m0,
                            2*3 * np.sum(np.ones(3), axis=-1, keepdims=True))
        self.assertAllClose(m1,
                            -0.5*2 * np.sum(np.identity(3), 
                                            axis=(-1,-2), 
                                            keepdims=True))

        pass