Exemple #1
0
def model(M=10, N=100, D=3):
    """
    Construct linear state-space model.

    See, for instance, the following publication:
    "Fast variational Bayesian linear state-space model"
    Luttinen (ECML 2013)
    """

    # Dynamics matrix with ARD
    alpha = Gamma(1e-5, 1e-5, plates=(D, ), name='alpha')
    A = GaussianARD(0,
                    alpha,
                    shape=(D, ),
                    plates=(D, ),
                    plotter=bpplt.GaussianHintonPlotter(rows=0,
                                                        cols=1,
                                                        scale=0),
                    name='A')
    A.initialize_from_value(np.identity(D))

    # Latent states with dynamics
    X = GaussianMarkovChain(
        np.zeros(D),  # mean of x0
        1e-3 * np.identity(D),  # prec of x0
        A,  # dynamics
        np.ones(D),  # innovation
        n=N,  # time instances
        plotter=bpplt.GaussianMarkovChainPlotter(scale=2),
        name='X')
    X.initialize_from_value(np.random.randn(N, D))

    # Mixing matrix from latent space to observation space using ARD
    gamma = Gamma(1e-5, 1e-5, plates=(D, ), name='gamma')
    gamma.initialize_from_value(1e-2 * np.ones(D))
    C = GaussianARD(0,
                    gamma,
                    shape=(D, ),
                    plates=(M, 1),
                    plotter=bpplt.GaussianHintonPlotter(rows=0,
                                                        cols=2,
                                                        scale=0),
                    name='C')
    C.initialize_from_value(np.random.randn(M, 1, D))

    # Observation noise
    tau = Gamma(1e-5, 1e-5, name='tau')
    tau.initialize_from_value(1e2)

    # Underlying noiseless function
    F = SumMultiply('i,i', C, X, name='F')

    # Noisy observations
    Y = GaussianARD(F, tau, name='Y')

    Q = VB(Y, F, C, gamma, X, A, alpha, tau, C)

    return Q
Exemple #2
0
    def test_initialization(self):
        """
        Test initialization methods of GaussianARD
        """

        X = GaussianARD(1, 2, shape=(2, ), plates=(3, ))

        # Prior initialization
        mu = 1 * np.ones((3, 2))
        alpha = 2 * np.ones((3, 2))
        X.initialize_from_prior()
        u = X._message_to_child()
        self.assertAllClose(u[0] * np.ones((3, 2)), mu)
        self.assertAllClose(
            u[1] * np.ones((3, 2, 2)),
            linalg.outer(mu, mu, ndim=1) + misc.diag(1 / alpha, ndim=1))

        # Parameter initialization
        mu = np.random.randn(3, 2)
        alpha = np.random.rand(3, 2)
        X.initialize_from_parameters(mu, alpha)
        u = X._message_to_child()
        self.assertAllClose(u[0], mu)
        self.assertAllClose(
            u[1],
            linalg.outer(mu, mu, ndim=1) + misc.diag(1 / alpha, ndim=1))

        # Value initialization
        x = np.random.randn(3, 2)
        X.initialize_from_value(x)
        u = X._message_to_child()
        self.assertAllClose(u[0], x)
        self.assertAllClose(u[1], linalg.outer(x, x, ndim=1))

        # Random initialization
        X.initialize_from_random()

        pass
Exemple #3
0
    def test_initialization(self):
        """
        Test initialization methods of GaussianARD
        """

        X = GaussianARD(1, 2, shape=(2,), plates=(3,))

        # Prior initialization
        mu = 1 * np.ones((3, 2))
        alpha = 2 * np.ones((3, 2))
        X.initialize_from_prior()
        u = X._message_to_child()
        self.assertAllClose(u[0]*np.ones((3,2)), 
                            mu)
        self.assertAllClose(u[1]*np.ones((3,2,2)), 
                            linalg.outer(mu, mu, ndim=1) + 
                            misc.diag(1/alpha, ndim=1))

        # Parameter initialization
        mu = np.random.randn(3, 2)
        alpha = np.random.rand(3, 2)
        X.initialize_from_parameters(mu, alpha)
        u = X._message_to_child()
        self.assertAllClose(u[0], mu)
        self.assertAllClose(u[1], linalg.outer(mu, mu, ndim=1) + 
                                  misc.diag(1/alpha, ndim=1))

        # Value initialization
        x = np.random.randn(3, 2)
        X.initialize_from_value(x)
        u = X._message_to_child()
        self.assertAllClose(u[0], x)
        self.assertAllClose(u[1], linalg.outer(x, x, ndim=1))

        # Random initialization
        X.initialize_from_random()

        pass
Exemple #4
0
def model(M=10, N=100, D=3):
    """
    Construct linear state-space model.

    See, for instance, the following publication:
    "Fast variational Bayesian linear state-space model"
    Luttinen (ECML 2013)
    """

    # Dynamics matrix with ARD
    alpha = Gamma(1e-5,
                  1e-5,
                  plates=(D,),
                  name='alpha')
    A = GaussianARD(0,
                    alpha,
                    shape=(D,),
                    plates=(D,),
                    plotter=bpplt.GaussianHintonPlotter(rows=0, 
                                                        cols=1,
                                                        scale=0),
                    name='A')
    A.initialize_from_value(np.identity(D))

    # Latent states with dynamics
    X = GaussianMarkovChain(np.zeros(D),         # mean of x0
                            1e-3*np.identity(D), # prec of x0
                            A,                   # dynamics
                            np.ones(D),          # innovation
                            n=N,                 # time instances
                            plotter=bpplt.GaussianMarkovChainPlotter(scale=2),
                            name='X')
    X.initialize_from_value(np.random.randn(N,D))

    # Mixing matrix from latent space to observation space using ARD
    gamma = Gamma(1e-5,
                  1e-5,
                  plates=(D,),
                  name='gamma')
    gamma.initialize_from_value(1e-2*np.ones(D))
    C = GaussianARD(0,
                    gamma,
                    shape=(D,),
                    plates=(M,1),
                    plotter=bpplt.GaussianHintonPlotter(rows=0,
                                                        cols=2,
                                                        scale=0),
                    name='C')
    C.initialize_from_value(np.random.randn(M,1,D))

    # Observation noise
    tau = Gamma(1e-5,
                1e-5,
                name='tau')
    tau.initialize_from_value(1e2)

    # Underlying noiseless function
    F = SumMultiply('i,i', 
                    C, 
                    X,
                    name='F')
    
    # Noisy observations
    Y = GaussianARD(F,
                    tau,
                    name='Y')

    Q = VB(Y, F, C, gamma, X, A, alpha, tau, C)

    return Q
Exemple #5
0
def model(M=20, N=100, D=10, K=3):
    """
    Construct the linear state-space model with switching dynamics.
    """

    #
    # Switching dynamics (HMM)
    #

    # Prior for initial state probabilities
    rho = Dirichlet(1e-3 * np.ones(K), name='rho')

    # Prior for state transition probabilities
    V = Dirichlet(1e-3 * np.ones(K), plates=(K, ), name='V')
    v = 10 * np.identity(K) + 1 * np.ones((K, K))
    v /= np.sum(v, axis=-1, keepdims=True)
    V.initialize_from_value(v)

    # Hidden states (with unknown initial state probabilities and state
    # transition probabilities)
    Z = CategoricalMarkovChain(rho,
                               V,
                               states=N - 1,
                               name='Z',
                               plotter=bpplt.CategoricalMarkovChainPlotter(),
                               initialize=False)
    Z.u[0] = np.random.dirichlet(np.ones(K))
    Z.u[1] = np.reshape(
        np.random.dirichlet(0.5 * np.ones(K * K), size=(N - 2)), (N - 2, K, K))

    #
    # Linear state-space models
    #

    # Dynamics matrix with ARD
    # (K,D) x ()
    alpha = Gamma(1e-5, 1e-5, plates=(K, 1, D), name='alpha')
    # (K,1,1,D) x (D)
    A = GaussianARD(0,
                    alpha,
                    shape=(D, ),
                    plates=(K, D),
                    name='A',
                    plotter=bpplt.GaussianHintonPlotter())
    A.initialize_from_value(
        np.identity(D) * np.ones((K, D, D)) + 0.1 * np.random.randn(K, D, D))

    # Latent states with dynamics
    # (K,1) x (N,D)
    X = SwitchingGaussianMarkovChain(
        np.zeros(D),  # mean of x0
        1e-3 * np.identity(D),  # prec of x0
        A,  # dynamics
        Z,  # dynamics selection
        np.ones(D),  # innovation
        n=N,  # time instances
        name='X',
        plotter=bpplt.GaussianMarkovChainPlotter())
    X.initialize_from_value(10 * np.random.randn(N, D))

    # Mixing matrix from latent space to observation space using ARD
    # (K,1,1,D) x ()
    gamma = Gamma(1e-5, 1e-5, plates=(D, ), name='gamma')
    # (K,M,1) x (D)
    C = GaussianARD(0,
                    gamma,
                    shape=(D, ),
                    plates=(M, 1),
                    name='C',
                    plotter=bpplt.GaussianHintonPlotter(rows=-3, cols=-1))
    C.initialize_from_value(np.random.randn(M, 1, D))

    # Underlying noiseless function
    # (K,M,N) x ()
    F = SumMultiply('i,i', C, X, name='F')

    #
    # Mixing the models
    #

    # Observation noise
    tau = Gamma(1e-5, 1e-5, name='tau')
    tau.initialize_from_value(1e2)

    # Emission/observation distribution
    Y = GaussianARD(F, tau, name='Y')

    Q = VB(Y, F, Z, rho, V, C, gamma, X, A, alpha, tau)

    return Q
Exemple #6
0
def model(M=20, N=100, D=10, K=3):
    """
    Construct the linear state-space model with switching dynamics.
    """

    #
    # Switching dynamics (HMM)
    #

    # Prior for initial state probabilities
    rho = Dirichlet(1e-3*np.ones(K),
                    name='rho')

    # Prior for state transition probabilities
    V = Dirichlet(1e-3*np.ones(K),
                  plates=(K,),
                  name='V')
    v = 10*np.identity(K) + 1*np.ones((K,K))
    v /= np.sum(v, axis=-1, keepdims=True)
    V.initialize_from_value(v)

    # Hidden states (with unknown initial state probabilities and state
    # transition probabilities)
    Z = CategoricalMarkovChain(rho, V,
                               states=N-1,
                               name='Z',
                               plotter=bpplt.CategoricalMarkovChainPlotter(),
                               initialize=False)
    Z.u[0] = np.random.dirichlet(np.ones(K))
    Z.u[1] = np.reshape(np.random.dirichlet(0.5*np.ones(K*K), size=(N-2)),
                        (N-2, K, K))

    #
    # Linear state-space models
    #

    # Dynamics matrix with ARD
    # (K,D) x ()
    alpha = Gamma(1e-5,
                  1e-5,
                  plates=(K,1,D),
                  name='alpha')
    # (K,1,1,D) x (D)
    A = GaussianARD(0,
                    alpha,
                    shape=(D,),
                    plates=(K,D),
                    name='A',
                    plotter=bpplt.GaussianHintonPlotter())
    A.initialize_from_value(np.identity(D)*np.ones((K,D,D))
                            + 0.1*np.random.randn(K,D,D))

    # Latent states with dynamics
    # (K,1) x (N,D)
    X = SwitchingGaussianMarkovChain(np.zeros(D),         # mean of x0
                                     1e-3*np.identity(D), # prec of x0
                                     A,                   # dynamics
                                     Z,                   # dynamics selection
                                     np.ones(D),          # innovation
                                     n=N,                 # time instances
                                     name='X',
                                     plotter=bpplt.GaussianMarkovChainPlotter())
    X.initialize_from_value(10*np.random.randn(N,D))

    # Mixing matrix from latent space to observation space using ARD
    # (K,1,1,D) x ()
    gamma = Gamma(1e-5,
                  1e-5,
                  plates=(D,),
                  name='gamma')
    # (K,M,1) x (D)
    C = GaussianARD(0,
                    gamma,
                    shape=(D,),
                    plates=(M,1),
                    name='C',
                    plotter=bpplt.GaussianHintonPlotter(rows=-3,cols=-1))
    C.initialize_from_value(np.random.randn(M,1,D))

    # Underlying noiseless function
    # (K,M,N) x ()
    F = SumMultiply('i,i', 
                    C, 
                    X,
                    name='F')
    
    #
    # Mixing the models
    #

    # Observation noise
    tau = Gamma(1e-5,
                1e-5,
                name='tau')
    tau.initialize_from_value(1e2)

    # Emission/observation distribution
    Y = GaussianARD(F, tau,
                    name='Y')

    Q = VB(Y, F,
           Z, rho, V,
           C, gamma, X, A, alpha,
           tau)

    return Q
Exemple #7
0
def model(M, N, D, K):
    """
    Construct the linear state-space model with time-varying dynamics

    For reference, see the following publication:
    (TODO)
    """

    #
    # The model block for the latent mixing weight process
    #
    
    # Dynamics matrix with ARD
    # beta : (K) x ()
    beta = Gamma(1e-5,
                 1e-5,
                 plates=(K,),
                 name='beta')
    # B : (K) x (K)
    B = GaussianARD(np.identity(K),
                    beta,
                    shape=(K,),
                    plates=(K,),
                    name='B',
                    plotter=bpplt.GaussianHintonPlotter(rows=0, 
                                                        cols=1,
                                                        scale=0),
                    initialize=False)
    B.initialize_from_value(np.identity(K))

    # Mixing weight process, that is, the weights in the linear combination of
    # state dynamics matrices
    # S : () x (N,K)
    S = GaussianMarkovChain(np.ones(K),
                            1e-6*np.identity(K),
                            B,
                            np.ones(K),
                            n=N,
                            name='S',
                            plotter=bpplt.GaussianMarkovChainPlotter(scale=2),
                            initialize=False)
    s = 10*np.random.randn(N,K)
    s[:,0] = 10
    S.initialize_from_value(s)

    #
    # The model block for the latent states
    #
        
    # Projection matrix of the dynamics matrix
    # alpha : (K) x ()
    alpha = Gamma(1e-5,
                  1e-5,
                  plates=(D,K),
                  name='alpha')
    alpha.initialize_from_value(1*np.ones((D,K)))
    # A : (D) x (D,K)
    A = GaussianARD(0,
                    alpha,
                    shape=(D,K),
                    plates=(D,),
                    name='A',
                    plotter=bpplt.GaussianHintonPlotter(rows=0, 
                                                        cols=1,
                                                        scale=0),
                    initialize=False)

    # Initialize S and A such that A*S is almost an identity matrix
    a = np.zeros((D,D,K))
    a[np.arange(D),np.arange(D),np.zeros(D,dtype=int)] = 1
    a[:,:,0] = np.identity(D) / s[0,0]
    a[:,:,1:] = 0.1/s[0,0]*np.random.randn(D,D,K-1)
    A.initialize_from_value(a)

    # Latent states with dynamics
    # X : () x (N,D)
    X = VaryingGaussianMarkovChain(np.zeros(D),         # mean of x0
                                   1e-3*np.identity(D), # prec of x0
                                   A,                   # dynamics matrices
                                   S._convert(GaussianMoments)[1:], # temporal weights
                                   np.ones(D),          # innovation
                                   n=N,                 # time instances
                                   name='X',
                                   plotter=bpplt.GaussianMarkovChainPlotter(scale=2),
                                   initialize=False)
    X.initialize_from_value(np.random.randn(N,D))

    #
    # The model block for observations
    #

    # Mixing matrix from latent space to observation space using ARD
    # gamma : (D) x ()
    gamma = Gamma(1e-5,
                  1e-5,
                  plates=(D,),
                  name='gamma')
    gamma.initialize_from_value(1e-2*np.ones(D))
    # C : (M,1) x (D)
    C = GaussianARD(0,
                    gamma,
                    shape=(D,),
                    plates=(M,1),
                    name='C',
                    plotter=bpplt.GaussianHintonPlotter(rows=0,
                                                        cols=2,
                                                        scale=0))
    C.initialize_from_value(np.random.randn(M,1,D))

    # Noiseless process
    # F : (M,N) x ()
    F = SumMultiply('d,d',
                    C,
                    X,
                    name='F')
                  
    # Observation noise
    # tau : () x ()
    tau = Gamma(1e-5,
                1e-5,
                name='tau')
    tau.initialize_from_value(1e2)

    # Observations
    # Y: (M,N) x ()
    Y = GaussianARD(F,
                    tau,
                    name='Y')

    # Construct inference machine
    Q = VB(Y, F, C, gamma, X, A, alpha, tau, S, B, beta)

    return Q
Exemple #8
0
def model(M, N, D, K):
    """
    Construct the linear state-space model with time-varying dynamics

    For reference, see the following publication:
    (TODO)
    """

    #
    # The model block for the latent mixing weight process
    #

    # Dynamics matrix with ARD
    # beta : (K) x ()
    beta = Gamma(1e-5, 1e-5, plates=(K, ), name='beta')
    # B : (K) x (K)
    B = GaussianARD(np.identity(K),
                    beta,
                    shape=(K, ),
                    plates=(K, ),
                    name='B',
                    plotter=bpplt.GaussianHintonPlotter(rows=0,
                                                        cols=1,
                                                        scale=0),
                    initialize=False)
    B.initialize_from_value(np.identity(K))

    # Mixing weight process, that is, the weights in the linear combination of
    # state dynamics matrices
    # S : () x (N,K)
    S = GaussianMarkovChain(np.ones(K),
                            1e-6 * np.identity(K),
                            B,
                            np.ones(K),
                            n=N,
                            name='S',
                            plotter=bpplt.GaussianMarkovChainPlotter(scale=2),
                            initialize=False)
    s = 10 * np.random.randn(N, K)
    s[:, 0] = 10
    S.initialize_from_value(s)

    #
    # The model block for the latent states
    #

    # Projection matrix of the dynamics matrix
    # alpha : (K) x ()
    alpha = Gamma(1e-5, 1e-5, plates=(D, K), name='alpha')
    alpha.initialize_from_value(1 * np.ones((D, K)))
    # A : (D) x (D,K)
    A = GaussianARD(0,
                    alpha,
                    shape=(D, K),
                    plates=(D, ),
                    name='A',
                    plotter=bpplt.GaussianHintonPlotter(rows=0,
                                                        cols=1,
                                                        scale=0),
                    initialize=False)

    # Initialize S and A such that A*S is almost an identity matrix
    a = np.zeros((D, D, K))
    a[np.arange(D), np.arange(D), np.zeros(D, dtype=int)] = 1
    a[:, :, 0] = np.identity(D) / s[0, 0]
    a[:, :, 1:] = 0.1 / s[0, 0] * np.random.randn(D, D, K - 1)
    A.initialize_from_value(a)

    # Latent states with dynamics
    # X : () x (N,D)
    X = VaryingGaussianMarkovChain(
        np.zeros(D),  # mean of x0
        1e-3 * np.identity(D),  # prec of x0
        A,  # dynamics matrices
        S._convert(GaussianMoments)[1:],  # temporal weights
        np.ones(D),  # innovation
        n=N,  # time instances
        name='X',
        plotter=bpplt.GaussianMarkovChainPlotter(scale=2),
        initialize=False)
    X.initialize_from_value(np.random.randn(N, D))

    #
    # The model block for observations
    #

    # Mixing matrix from latent space to observation space using ARD
    # gamma : (D) x ()
    gamma = Gamma(1e-5, 1e-5, plates=(D, ), name='gamma')
    gamma.initialize_from_value(1e-2 * np.ones(D))
    # C : (M,1) x (D)
    C = GaussianARD(0,
                    gamma,
                    shape=(D, ),
                    plates=(M, 1),
                    name='C',
                    plotter=bpplt.GaussianHintonPlotter(rows=0,
                                                        cols=2,
                                                        scale=0))
    C.initialize_from_value(np.random.randn(M, 1, D))

    # Noiseless process
    # F : (M,N) x ()
    F = SumMultiply('d,d', C, X, name='F')

    # Observation noise
    # tau : () x ()
    tau = Gamma(1e-5, 1e-5, name='tau')
    tau.initialize_from_value(1e2)

    # Observations
    # Y: (M,N) x ()
    Y = GaussianARD(F, tau, name='Y')

    # Construct inference machine
    Q = VB(Y, F, C, gamma, X, A, alpha, tau, S, B, beta)

    return Q
Exemple #9
0
def run(N=500, seed=42, maxiter=100, plot=True):
    """
    Run deterministic annealing demo for 1-D Gaussian mixture.
    """

    if seed is not None:
        np.random.seed(seed)

    mu = GaussianARD(0, 1,
                     plates=(2,),
                     name='means')
    Z = Categorical([0.3, 0.7],
                    plates=(N,),
                    name='classes')
    Y = Mixture(Z, GaussianARD, mu, 1,
                name='observations')

    # Generate data
    z = Z.random()
    data = np.empty(N)
    for n in range(N):
        data[n] = [4, -4][z[n]]

    Y.observe(data)

    # Initialize means closer to the inferior local optimum in which the
    # cluster means are swapped
    mu.initialize_from_value([0, 6])

    Q = VB(Y, Z, mu)
    Q.save()

    #
    # Standard VB-EM algorithm
    #
    Q.update(repeat=maxiter)

    mu_vbem = mu.u[0].copy()
    L_vbem = Q.compute_lowerbound()

    #
    # VB-EM with deterministic annealing
    #
    Q.load()
    beta = 0.01
    while beta < 1.0:
        beta = min(beta*1.2, 1.0)
        print("Set annealing to %.2f" % beta)
        Q.set_annealing(beta)
        Q.update(repeat=maxiter, tol=1e-4)

    mu_anneal = mu.u[0].copy()
    L_anneal = Q.compute_lowerbound()

    print("==============================")
    print("RESULTS FOR VB-EM vs ANNEALING")
    print("Fixed component probabilities:", np.array([0.3, 0.7]))
    print("True component means:", np.array([4, -4]))
    print("VB-EM component means:", mu_vbem)
    print("VB-EM lower bound:", L_vbem)
    print("Annealed VB-EM component means:", mu_anneal)
    print("Annealed VB-EM lower bound:", L_anneal)
    
    return