Exemple #1
0
def run(M=10, N=100, D_y=3, D=5, seed=42, rotate=False, maxiter=100, debug=False):

    if seed is not None:
        np.random.seed(seed)
    
    # Generate data
    w = np.random.normal(0, 1, size=(M,1,D_y))
    x = np.random.normal(0, 1, size=(1,N,D_y))
    f = misc.sum_product(w, x, axes_to_sum=[-1])
    y = f + np.random.normal(0, 0.2, size=(M,N))

    # Construct model
    (Y, F, W, X, tau, alpha) = model(M, N, D)

    # Data with missing values
    mask = random.mask(M, N, p=0.5) # randomly missing
    y[~mask] = np.nan
    Y.observe(y, mask=mask)

    # Construct inference machine
    Q = VB(Y, W, X, tau, alpha)

    # Initialize some nodes randomly
    X.initialize_from_random()
    W.initialize_from_random()

    # Run inference algorithm
    if rotate:
        # Use rotations to speed up learning
        rotW = transformations.RotateGaussianARD(W, alpha)
        rotX = transformations.RotateGaussianARD(X)
        R = transformations.RotationOptimizer(rotW, rotX, D)
        for ind in range(maxiter):
            Q.update()
            if debug:
                R.rotate(check_bound=True,
                         check_gradient=True)
            else:
                R.rotate()
            
    else:
        # Use standard VB-EM alone
        Q.update(repeat=maxiter)

    # Plot results
    plt.figure()
    bpplt.timeseries_normal(F, scale=2)
    bpplt.timeseries(f, color='g', linestyle='-')
    bpplt.timeseries(y, color='r', linestyle='None', marker='+')
    plt.show()
Exemple #2
0
def gaussianmix_model(N, K, D):
    # N = number of data vectors
    # K = number of clusters
    # D = dimensionality

    # Construct the Gaussian mixture model

    # K prior weights (for components)
    alpha = nodes.Dirichlet(1e-3 * np.ones(K), name='alpha')
    # N K-dimensional cluster assignments (for data)
    z = nodes.Categorical(alpha, plates=(N, ), name='z')
    # K D-dimensional component means
    X = nodes.Gaussian(np.zeros(D),
                       0.01 * np.identity(D),
                       plates=(K, ),
                       name='X')
    # K D-dimensional component covariances
    Lambda = nodes.Wishart(D,
                           0.01 * np.identity(D),
                           plates=(K, ),
                           name='Lambda')
    # N D-dimensional observation vectors
    Y = nodes.Mixture(z, nodes.Gaussian, X, Lambda, plates=(N, ), name='Y')
    # TODO: Plates should be learned automatically if not given (it
    # would be the smallest shape broadcasted from the shapes of the
    # parents)

    z.initialize_from_random()

    return VB(Y, X, Lambda, z, alpha)
Exemple #3
0
def model(M, N, D):
    # Construct the PCA model with ARD

    # ARD
    alpha = nodes.Gamma(1e-2, 1e-2, plates=(D, ), name='alpha')

    # Loadings
    W = nodes.GaussianARD(0, alpha, shape=(D, ), plates=(M, 1), name='W')

    # States
    X = nodes.GaussianARD(0, 1, shape=(D, ), plates=(1, N), name='X')

    # PCA
    F = nodes.SumMultiply('i,i', W, X, name='F')

    # Noise
    tau = nodes.Gamma(1e-2, 1e-2, name='tau')

    # Noisy observations
    Y = nodes.GaussianARD(F, tau, name='Y')

    # Initialize some nodes randomly
    X.initialize_from_random()
    W.initialize_from_random()

    return VB(Y, F, W, X, tau, alpha)
Exemple #4
0
def model(M=10, N=100, D=3):
    """
    Construct linear state-space model.

    See, for instance, the following publication:
    "Fast variational Bayesian linear state-space model"
    Luttinen (ECML 2013)
    """

    # Dynamics matrix with ARD
    alpha = Gamma(1e-5, 1e-5, plates=(D, ), name='alpha')
    A = GaussianARD(0,
                    alpha,
                    shape=(D, ),
                    plates=(D, ),
                    plotter=bpplt.GaussianHintonPlotter(rows=0,
                                                        cols=1,
                                                        scale=0),
                    name='A')
    A.initialize_from_value(np.identity(D))

    # Latent states with dynamics
    X = GaussianMarkovChain(
        np.zeros(D),  # mean of x0
        1e-3 * np.identity(D),  # prec of x0
        A,  # dynamics
        np.ones(D),  # innovation
        n=N,  # time instances
        plotter=bpplt.GaussianMarkovChainPlotter(scale=2),
        name='X')
    X.initialize_from_value(np.random.randn(N, D))

    # Mixing matrix from latent space to observation space using ARD
    gamma = Gamma(1e-5, 1e-5, plates=(D, ), name='gamma')
    gamma.initialize_from_value(1e-2 * np.ones(D))
    C = GaussianARD(0,
                    gamma,
                    shape=(D, ),
                    plates=(M, 1),
                    plotter=bpplt.GaussianHintonPlotter(rows=0,
                                                        cols=2,
                                                        scale=0),
                    name='C')
    C.initialize_from_value(np.random.randn(M, 1, D))

    # Observation noise
    tau = Gamma(1e-5, 1e-5, name='tau')
    tau.initialize_from_value(1e2)

    # Underlying noiseless function
    F = SumMultiply('i,i', C, X, name='F')

    # Noisy observations
    Y = GaussianARD(F, tau, name='Y')

    Q = VB(Y, F, C, gamma, X, A, alpha, tau, C)

    return Q
Exemple #5
0
def run(M=10, N=100, D_y=3, D=5):
    seed = 45
    print('seed =', seed)
    np.random.seed(seed)
    
    # Generate data
    w = np.random.normal(0, 1, size=(M,1,D_y))
    x = np.random.normal(0, 1, size=(1,N,D_y))
    f = utils.utils.sum_product(w, x, axes_to_sum=[-1])
    y = f + np.random.normal(0, 0.5, size=(M,N))

    # Construct model
    (Y, WX, W, X, tau, alpha) = pca_model(M, N, D)

    # Data with missing values
    mask = utils.random.mask(M, N, p=0.9) # randomly missing
    mask[:,20:40] = False # gap missing
    y[~mask] = np.nan
    Y.observe(y, mask=mask)

    # Construct inference machine
    Q = VB(Y, W, X, tau, alpha)

    # Initialize some nodes randomly
    X.initialize_from_value(X.random())
    W.initialize_from_value(W.random())

    # Inference loop.
    Q.update(repeat=100)

    # Plot results
    plt.clf()
    WX_params = WX.get_parameters()
    fh = WX_params[0] * np.ones(y.shape)
    err_fh = 2*np.sqrt(WX_params[1] + 1/tau.get_moments()[0]) * np.ones(y.shape)
    for m in range(M):
        plt.subplot(M,1,m+1)
        myplt.errorplot(fh[m], x=np.arange(N), error=err_fh[m])
        plt.plot(np.arange(N), f[m], 'g')
        plt.plot(np.arange(N), y[m], 'r+')

    plt.show()
Exemple #6
0
def run(M=30, D=5):

    # Generate data
    y = np.random.randint(D, size=(M, ))

    # Construct model
    p = nodes.Dirichlet(1 * np.ones(D), name='p')
    z = nodes.Categorical(p, plates=(M, ), name='z')

    # Observe the data with randomly missing values
    mask = random.mask(M, p=0.5)
    z.observe(y, mask=mask)

    # Run VB-EM
    Q = VB(p, z)
    Q.update()

    # Show results
    z.show()
    p.show()
Exemple #7
0
def run(M=30, D=5):

    # Generate data
    y = np.random.randint(D, size=(M,))

    # Construct model
    p = nodes.Dirichlet(1*np.ones(D),
                        name='p')
    z = nodes.Categorical(p, 
                          plates=(M,), 
                          name='z')

    # Observe the data with randomly missing values
    mask = random.mask(M, p=0.5)
    z.observe(y, mask=mask)

    # Run VB-EM
    Q = VB(p, z)
    Q.update()

    # Show results
    z.show()
    p.show()
Exemple #8
0
def mixture_model(distribution, *args, K=3, N=100):

    # Prior for state probabilities
    alpha = Dirichlet(1e-3 * np.ones(K), name='alpha')

    # Cluster assignments
    Z = Categorical(alpha, plates=(N, ), name='Z')

    # Observation distribution
    Y = Mixture(Z, distribution, *args, name='Y')

    Q = VB(Y, Z, alpha)

    return Q
Exemple #9
0
def run(M=10,
        N=100,
        D_y=3,
        D=5,
        seed=42,
        rotate=False,
        maxiter=100,
        debug=False,
        plot=True):

    if seed is not None:
        np.random.seed(seed)

    # Generate data
    w = np.random.normal(0, 1, size=(M, 1, D_y))
    x = np.random.normal(0, 1, size=(1, N, D_y))
    f = misc.sum_product(w, x, axes_to_sum=[-1])
    y = f + np.random.normal(0, 0.2, size=(M, N))

    # Construct model
    (Y, F, W, X, tau, alpha) = model(M, N, D)

    # Data with missing values
    mask = random.mask(M, N, p=0.5)  # randomly missing
    y[~mask] = np.nan
    Y.observe(y, mask=mask)

    # Construct inference machine
    Q = VB(Y, W, X, tau, alpha)

    # Initialize some nodes randomly
    X.initialize_from_random()
    W.initialize_from_random()

    # Run inference algorithm
    if rotate:
        # Use rotations to speed up learning
        rotW = transformations.RotateGaussianARD(W, alpha)
        rotX = transformations.RotateGaussianARD(X)
        R = transformations.RotationOptimizer(rotW, rotX, D)
        for ind in range(maxiter):
            Q.update()
            if debug:
                R.rotate(check_bound=True, check_gradient=True)
            else:
                R.rotate()

    else:
        # Use standard VB-EM alone
        Q.update(repeat=maxiter)

    # Plot results
    if plot:
        plt.figure()
        bpplt.timeseries_normal(F, scale=2)
        bpplt.timeseries(f, color='g', linestyle='-')
        bpplt.timeseries(y, color='r', linestyle='None', marker='+')
Exemple #10
0
def hidden_markov_model(distribution, *args, K=3, N=100):

    # Prior for initial state probabilities
    alpha = Dirichlet(1e-3 * np.ones(K), name='alpha')

    # Prior for state transition probabilities
    A = Dirichlet(1e-3 * np.ones(K), plates=(K, ), name='A')

    # Hidden states (with unknown initial state probabilities and state
    # transition probabilities)
    Z = CategoricalMarkovChain(alpha, A, states=N, name='Z')

    # Emission/observation distribution
    Y = Mixture(Z, distribution, *args, name='Y')

    Q = VB(Y, Z, alpha, A)

    return Q
Exemple #11
0
def gaussianmix_model(N, K, D, covariance='full'):
    # N = number of data vectors
    # K = number of clusters
    # D = dimensionality
    
    # Construct the Gaussian mixture model

    # K prior weights (for components)
    alpha = nodes.Dirichlet(1e-3*np.ones(K),
                            name='alpha')
    # N K-dimensional cluster assignments (for data)
    z = nodes.Categorical(alpha,
                          plates=(N,),
                          name='z')
    # K D-dimensional component means
    X = nodes.GaussianARD(0, 1e-3,
                          shape=(D,),
                          plates=(K,),
                          name='X')
    if covariance.lower() == 'full':
        # K D-dimensional component covariances
        Lambda = nodes.Wishart(D, 0.01*np.identity(D),
                               plates=(K,),
                               name='Lambda')
        # N D-dimensional observation vectors
        Y = nodes.Mixture(z, nodes.Gaussian, X, Lambda, plates=(N,), name='Y')
    elif covariance.lower() == 'diagonal':
        # Inverse variances
        Lambda = nodes.Gamma(1e-3, 1e-3, plates=(K, D), name='Lambda')
        # N D-dimensional observation vectors
        Y = nodes.Mixture(z, nodes.GaussianARD, X, Lambda, plates=(N,), name='Y')
    elif covariance.lower() == 'isotropic':
        # Inverse variances
        Lambda = nodes.Gamma(1e-3, 1e-3, plates=(K, 1), name='Lambda')
        # N D-dimensional observation vectors
        Y = nodes.Mixture(z, nodes.GaussianARD, X, Lambda, plates=(N,), name='Y')

    z.initialize_from_random()

    return VB(Y, X, Lambda, z, alpha)
Exemple #12
0
def run(M=10, N=100, D=5, seed=42, maxiter=100, plot=True):
    """
    Run deterministic annealing demo for 1-D Gaussian mixture.
    """

    raise NotImplementedError("Black box variational inference not yet implemented, sorry")

    if seed is not None:
        np.random.seed(seed)

    # Generate data
    data = np.dot(np.random.randn(M,D),
                  np.random.randn(D,N))

    # Construct model
    C = GaussianARD(0, 1, shape=(2,), plates=(M,1), name='C')
    X = GaussianARD(0, 1, shape=(2,), plates=(1,N), name='X')
    F = Dot(C, X)

    # Some arbitrary log likelihood
    def logpdf(y, f):
        """
        exp(f) / (1 + exp(f)) = 1/(1+exp(-f))

        -log(1+exp(-f)) = -log(exp(0)+exp(-f))

        also:
        1 - exp(f) / (1 + exp(f)) = (1 + exp(f) - exp(f)) / (1 + exp(f))
        = 1 / (1 + exp(f))
        = -log(1+exp(f)) = -log(exp(0)+exp(f))
        """
        return -np.logaddexp(0, -f * np.where(y, -1, +1))
        
    Y = LogPDF(logpdf, F, samples=10, shape=())
    #Y = GaussianARD(F, 1)

    Y.observe(data)

    Q = VB(Y, C, X)
    Q.ignore_bound_checks = True

    delay = 1
    forgetting_rate = 0.7
    for n in range(maxiter):

        # Observe a mini-batch
        #subset = np.random.choice(N, N_batch)
        #Y.observe(data[subset,:])

        # Learn intermediate variables
        #Q.update(Z)

        # Set step length
        step = (n + delay) ** (-forgetting_rate)

        # Stochastic gradient for the global variables
        Q.gradient_step(C, X, scale=step)
    
    if plot:
        bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:')
        bpplt.pyplot.xlabel('CPU time (in seconds)')
        bpplt.pyplot.ylabel('VB lower bound')

    return
def run(N=100000, N_batch=50, seed=42, maxiter=100, plot=True):
    """
    Run deterministic annealing demo for 1-D Gaussian mixture.
    """

    if seed is not None:
        np.random.seed(seed)

    # Number of clusters in the model
    K = 20

    # Dimensionality of the data
    D = 5

    # Generate data
    K_true = 10
    spread = 5
    means = spread * np.random.randn(K_true, D)
    z = random.categorical(np.ones(K_true), size=N)
    data = np.empty((N, D))
    for n in range(N):
        data[n] = means[z[n]] + np.random.randn(D)

    #
    # Standard VB-EM algorithm
    #

    # Full model
    mu = Gaussian(np.zeros(D), np.identity(D), plates=(K, ), name='means')
    alpha = Dirichlet(np.ones(K), name='class probabilities')
    Z = Categorical(alpha, plates=(N, ), name='classes')
    Y = Mixture(Z, Gaussian, mu, np.identity(D), name='observations')

    # Break symmetry with random initialization of the means
    mu.initialize_from_random()

    # Put the data in
    Y.observe(data)

    # Run inference
    Q = VB(Y, Z, mu, alpha)
    Q.save(mu)
    Q.update(repeat=maxiter)
    if plot:
        bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-')
    max_cputime = np.sum(Q.cputime[~np.isnan(Q.cputime)])

    #
    # Stochastic variational inference
    #

    # Construct smaller model (size of the mini-batch)
    mu = Gaussian(np.zeros(D), np.identity(D), plates=(K, ), name='means')
    alpha = Dirichlet(np.ones(K), name='class probabilities')
    Z = Categorical(alpha,
                    plates=(N_batch, ),
                    plates_multiplier=(N / N_batch, ),
                    name='classes')
    Y = Mixture(Z, Gaussian, mu, np.identity(D), name='observations')

    # Break symmetry with random initialization of the means
    mu.initialize_from_random()

    # Inference engine
    Q = VB(Y, Z, mu, alpha, autosave_filename=Q.autosave_filename)
    Q.load(mu)

    # Because using mini-batches, messages need to be multiplied appropriately
    print("Stochastic variational inference...")
    Q.ignore_bound_checks = True

    maxiter *= int(N / N_batch)
    delay = 1
    forgetting_rate = 0.7
    for n in range(maxiter):

        # Observe a mini-batch
        subset = np.random.choice(N, N_batch)
        Y.observe(data[subset, :])

        # Learn intermediate variables
        Q.update(Z)

        # Set step length
        step = (n + delay)**(-forgetting_rate)

        # Stochastic gradient for the global variables
        Q.gradient_step(mu, alpha, scale=step)

        if np.sum(Q.cputime[:n]) > max_cputime:
            break

    if plot:
        bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:')

        bpplt.pyplot.xlabel('CPU time (in seconds)')
        bpplt.pyplot.ylabel('VB lower bound')
        bpplt.pyplot.legend(['VB-EM', 'Stochastic inference'],
                            loc='lower right')
        bpplt.pyplot.title('VB for Gaussian mixture model')

    return
Exemple #14
0
def run(M=40, N=100, D_y=6, D=8, seed=42, rotate=False, maxiter=1000, debug=False, plot=True):
    """
    Run pattern search demo for PCA.
    """

    if seed is not None:
        np.random.seed(seed)
    
    # Generate data
    w = np.random.normal(0, 1, size=(M,1,D_y))
    x = np.random.normal(0, 1, size=(1,N,D_y))
    f = misc.sum_product(w, x, axes_to_sum=[-1])
    y = f + np.random.normal(0, 0.2, size=(M,N))

    # Construct model
    Q = VB(*(pca.model(M, N, D)))

    # Data with missing values
    mask = random.mask(M, N, p=0.5) # randomly missing
    y[~mask] = np.nan
    Q['Y'].observe(y, mask=mask)

    # Initialize some nodes randomly
    Q['X'].initialize_from_random()
    Q['W'].initialize_from_random()

    # Use a few VB-EM updates at the beginning
    Q.update(repeat=10)
    Q.save()

    # Standard VB-EM as a baseline
    Q.update(repeat=maxiter)
    if plot:
        bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-')

    # Restore initial state
    Q.load()

    # Pattern search method for comparison
    for n in range(maxiter):

        Q.pattern_search('W', 'tau', maxiter=3, collapsed=['X', 'alpha'])
        Q.update(repeat=20)

        if Q.has_converged():
            break

    if plot:
        bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:')

        bpplt.pyplot.xlabel('CPU time (in seconds)')
        bpplt.pyplot.ylabel('VB lower bound')
        bpplt.pyplot.legend(['VB-EM', 'Pattern search'], loc='lower right')
Exemple #15
0
def run(M=40,
        N=100,
        D_y=6,
        D=8,
        seed=42,
        rotate=False,
        maxiter=1000,
        debug=False,
        plot=True):
    """
    Run pattern search demo for PCA.
    """

    if seed is not None:
        np.random.seed(seed)

    # Generate data
    w = np.random.normal(0, 1, size=(M, 1, D_y))
    x = np.random.normal(0, 1, size=(1, N, D_y))
    f = misc.sum_product(w, x, axes_to_sum=[-1])
    y = f + np.random.normal(0, 0.2, size=(M, N))

    # Construct model
    Q = VB(*(pca.model(M, N, D)))

    # Data with missing values
    mask = random.mask(M, N, p=0.5)  # randomly missing
    y[~mask] = np.nan
    Q['Y'].observe(y, mask=mask)

    # Initialize some nodes randomly
    Q['X'].initialize_from_random()
    Q['W'].initialize_from_random()

    # Use a few VB-EM updates at the beginning
    Q.update(repeat=10)
    Q.save()

    # Standard VB-EM as a baseline
    Q.update(repeat=maxiter)
    if plot:
        bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-')

    # Restore initial state
    Q.load()

    # Pattern search method for comparison
    for n in range(maxiter):

        Q.pattern_search('W', 'tau', maxiter=3, collapsed=['X', 'alpha'])
        Q.update(repeat=20)

        if Q.has_converged():
            break

    if plot:
        bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:')

        bpplt.pyplot.xlabel('CPU time (in seconds)')
        bpplt.pyplot.ylabel('VB lower bound')
        bpplt.pyplot.legend(['VB-EM', 'Pattern search'], loc='lower right')
Exemple #16
0
def run_dlssm(y, f, mask, D, K, maxiter):
    """
    Run VB inference for linear state space model with drifting dynamics.
    """

    (M, N) = np.shape(y)

    # Dynamics matrix with ARD
    # alpha : (D) x ()
    alpha = Gamma(1e-5, 1e-5, plates=(K, ), name='alpha')
    # A : (K) x (K)
    A = Gaussian(
        np.zeros(K),
        #np.identity(K),
        diagonal(alpha),
        plates=(K, ),
        name='A_S')
    A.initialize_from_value(np.identity(K))

    # rho
    ## rho = Gamma(1e-5,
    ##             1e-5,
    ##             plates=(K,),
    ##             name="rho")

    # S : () x (N-1,K)
    S = GaussianMarkovChain(np.ones(K),
                            1e-6 * np.identity(K),
                            A,
                            np.ones(K),
                            n=N - 1,
                            name='S')
    S.initialize_from_value(1 * np.ones((N - 1, K)))

    # Projection matrix of the dynamics matrix
    # beta : (K) x ()
    beta = Gamma(1e-5, 1e-5, plates=(K, ), name='beta')
    # B : (D) x (D*K)
    B = Gaussian(np.zeros(D * K),
                 diagonal(tile(beta, D)),
                 plates=(D, ),
                 name='B')
    b = np.zeros((D, D, K))
    b[np.arange(D), np.arange(D), np.zeros(D, dtype=int)] = 1
    B.initialize_from_value(np.reshape(1 * b, (D, D * K)))

    # A : (N-1,D) x (D)
    BS = MatrixDot(B, S.as_gaussian().add_plate_axis(-1), name='BS')

    # Latent states with dynamics
    # X : () x (N,D)
    X = GaussianMarkovChain(
        np.zeros(D),  # mean of x0
        1e-3 * np.identity(D),  # prec of x0
        BS,  # dynamics
        np.ones(D),  # innovation
        n=N,  # time instances
        name='X',
        initialize=False)
    X.initialize_from_value(np.random.randn(N, D))

    # Mixing matrix from latent space to observation space using ARD
    # gamma : (D) x ()
    gamma = Gamma(1e-5, 1e-5, plates=(D, ), name='gamma')
    # C : (M,1) x (D)
    C = Gaussian(np.zeros(D), diagonal(gamma), plates=(M, 1), name='C')
    C.initialize_from_value(np.random.randn(M, 1, D))

    # Observation noise
    # tau : () x ()
    tau = Gamma(1e-5, 1e-5, name='tau')

    # Observations
    # Y : (M,N) x ()
    CX = Dot(C, X.as_gaussian())
    Y = Normal(CX, tau, name='Y')

    #
    # RUN INFERENCE
    #

    # Observe data
    Y.observe(y, mask=mask)
    # Construct inference machine
    Q = VB(Y, X, S, A, alpha, B, beta, C, gamma, tau)

    #
    # Run inference with rotations.
    #

    # Rotate the D-dimensional state space (C, X)
    rotB = transformations.RotateGaussianMatrixARD(B, beta, axis='rows')
    rotX = transformations.RotateDriftingMarkovChain(X, B, S, rotB)
    rotC = transformations.RotateGaussianARD(C, gamma)
    R_X = transformations.RotationOptimizer(rotX, rotC, D)

    # Rotate the K-dimensional latent dynamics space (B, S)
    rotA = transformations.RotateGaussianARD(A, alpha)
    rotS = transformations.RotateGaussianMarkovChain(S, A, rotA)
    rotB = transformations.RotateGaussianMatrixARD(B, beta, axis='cols')
    R_S = transformations.RotationOptimizer(rotS, rotB, K)

    # Iterate
    for ind in range(int(maxiter / 5)):
        Q.update(repeat=5)
        #Q.update(X, S, A, alpha, rho, B, beta, C, gamma, tau, repeat=maxiter)
        R_X.rotate()
        R_S.rotate()
        ## R_X.rotate(
        ## check_bound=Q.compute_lowerbound,
        ## check_bound_terms=Q.compute_lowerbound_terms,
        ## check_gradient=True
        ##     )
        ## R_S.rotate(
        ## check_bound=Q.compute_lowerbound,
        ## check_bound_terms=Q.compute_lowerbound_terms,
        ## check_gradient=True
        ##     )

    #
    # SHOW RESULTS
    #

    # Mean and standard deviation of the posterior
    (f_mean, f_squared) = CX.get_moments()
    f_std = np.sqrt(f_squared - f_mean**2)

    # Plot observations space
    for m in range(M):
        plt.subplot(M, 1, m + 1)
        plt.plot(y[m, :], 'r.')
        plt.plot(f[m, :], 'b-')
        bpplt.errorplot(y=f_mean[m, :], error=2 * f_std[m, :])
Exemple #17
0
def run(M=50, N=200, D_y=10, D=20, maxiter=100):
    seed = 45
    print("seed =", seed)
    np.random.seed(seed)

    # Generate data (covariance eigenvalues: 1,1,...,1,2^2,3^2,...,(D_y+1)^2
    (q, r) = scipy.linalg.qr(np.random.randn(M, M))
    C = np.diag(np.arange(2, 2 + D_y))
    C = np.ones(M)
    C[:D_y] += np.arange(1, 1 + D_y)
    y = C[:, np.newaxis] * np.random.randn(M, N)
    y = np.dot(q, y)

    # Construct model
    (Y, WX, W, X, tau, alpha) = pca_model(M, N, D)

    # Data with missing values
    mask = utils.random.mask(M, N, p=0.9)  # randomly missing
    mask[:, 20:40] = False  # gap missing
    y[~mask] = np.nan
    Y.observe(y, mask=mask)

    # Construct inference machine
    Q = VB(Y, W, X, tau, alpha, autosave_filename=utils.utils.tempfile())

    # Initialize nodes (from prior and randomly)
    X.initialize_from_value(X.random())
    W.initialize_from_value(W.random())

    Q.update(repeat=1)
    Q.save()

    #
    # Run inference with rotations.
    #
    rotX = transformations.RotateGaussian(X)
    rotW = transformations.RotateGaussianARD(W, alpha)
    R = transformations.RotationOptimizer(rotX, rotW, D)

    for ind in range(maxiter):
        Q.update()
        R.rotate(
            check_gradient=False,
            maxiter=10,
            verbose=False,
            check_bound=Q.compute_lowerbound,
            check_bound_terms=Q.compute_lowerbound_terms,
        )

    L_rot = Q.L

    #
    # Re-run inference without rotations.
    #
    Q.load()
    Q.update(repeat=maxiter)
    L_norot = Q.L

    #
    # Plot comparison
    #
    plt.plot(L_rot)
    plt.plot(L_norot)
    plt.legend(["With rotations", "Without rotations"], loc="lower right")
    plt.show()
Exemple #18
0
def run_lssm(y, f, mask, D, maxiter):
    """
    Run VB inference for linear state space model.
    """

    (M, N) = np.shape(y)

    #
    # CONSTRUCT THE MODEL
    #

    # Dynamic matrix
    # alpha: (D) x ()
    alpha = Gamma(1e-5,
                  1e-5,
                  plates=(D,),
                  name='alpha')
    # A : (D) x (D)
    A = Gaussian(np.zeros(D),
                 diagonal(alpha),
                 plates=(D,),
                 name='A')
    A.initialize_from_value(np.identity(D))

    # Latent states with dynamics
    # X : () x (N,D)
    X = GaussianMarkovChain(np.zeros(D),         # mean of x0
                            1e-3*np.identity(D), # prec of x0
                            A,                   # dynamics
                            np.ones(D),          # innovation
                            n=N,                 # time instances
                            name='X',
                            initialize=False)
    X.initialize_from_value(np.random.randn(N,D))

    # Mixing matrix from latent space to observation space using ARD
    # gamma : (D) x ()
    gamma = Gamma(1e-5,
                  1e-5,
                  plates=(D,),
                  name='gamma')
    # C : (M,1) x (D)
    C = Gaussian(np.zeros(D),
                 diagonal(gamma),
                 plates=(M,1),
                 name='C')
    C.initialize_from_value(np.random.randn(M,1,D))

    # Observation noise
    # tau : () x ()
    tau = Gamma(1e-5,
                1e-5,
                name='tau')

    # Observations
    # Y : (M,N) x ()
    CX = Dot(C, X.as_gaussian())
    Y = Normal(CX,
               tau,
               name='Y')

    # Rotate the D-dimensional latent space
    rotA = transformations.RotateGaussianARD(A, alpha)
    rotX = transformations.RotateGaussianMarkovChain(X, A, rotA)
    rotC = transformations.RotateGaussianARD(C, gamma)
    R = transformations.RotationOptimizer(rotX, rotC, D)

    #
    # RUN INFERENCE
    #

    # Observe data
    Y.observe(y, mask=mask)
    # Construct inference machine
    Q = VB(Y, X, A, alpha, C, gamma, tau)

    # Iterate
    for ind in range(maxiter):
        Q.update(X, A, alpha, C, gamma, tau)
        R.rotate()

    #
    # SHOW RESULTS
    #

    plt.figure()
    bpplt.timeseries_normal(CX)
    bpplt.timeseries(f, 'b-')
    bpplt.timeseries(y, 'r.')
Exemple #19
0
def pca():

    np.random.seed(41)

    M = 10
    N = 3000
    D = 5

    # Construct the PCA model
    alpha = Gamma(1e-3, 1e-3, plates=(D, ), name='alpha')
    W = GaussianARD(0, alpha, plates=(M, 1), shape=(D, ), name='W')
    X = GaussianARD(0, 1, plates=(1, N), shape=(D, ), name='X')
    tau = Gamma(1e-3, 1e-3, name='tau')
    W.initialize_from_random()
    F = SumMultiply('d,d->', W, X)
    Y = GaussianARD(F, tau, name='Y')

    # Observe data
    data = np.sum(np.random.randn(M, 1, D - 1) * np.random.randn(1, N, D - 1),
                  axis=-1) + 1e-1 * np.random.randn(M, N)
    Y.observe(data)

    # Initialize VB engine
    Q = VB(Y, X, W, alpha, tau)

    # Take one update step (so phi is ok)
    Q.update(repeat=1)
    Q.save()

    # Run VB-EM
    Q.update(repeat=200)
    bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-')

    # Restore the state
    Q.load()

    # Run Riemannian conjugate gradient
    #Q.optimize(X, alpha, maxiter=100, collapsed=[W, tau])
    Q.optimize(W, tau, maxiter=100, collapsed=[X, alpha])
    bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:')

    bpplt.pyplot.show()
Exemple #20
0
                X,
                name='F')
Y = GaussianARD(F,
                tau,
                name='Y')


# 
# An inference machine using variational Bayesian inference with variational
# message passing is then construced as
# 

# In[3]:

from bayespy.inference.vmp.vmp import VB
Q = VB(X, C, gamma, A, alpha, tau, Y)


# 
# Observe the data partially (80% is marked missing):
# 

# In[4]:

from bayespy.utils import random

# Add missing values randomly (keep only 20%)
mask = random.mask(M, N, p=0.2)
Y.observe(y, mask=mask)

Exemple #21
0
def run_lssm(y, f, mask, D, maxiter):
    """
    Run VB inference for linear state space model.
    """

    (M, N) = np.shape(y)

    #
    # CONSTRUCT THE MODEL
    #

    # Dynamic matrix
    # alpha: (D) x ()
    alpha = Gamma(1e-5, 1e-5, plates=(D, ), name='alpha')
    # A : (D) x (D)
    A = Gaussian(np.zeros(D), diagonal(alpha), plates=(D, ), name='A')
    A.initialize_from_value(np.identity(D))

    # Latent states with dynamics
    # X : () x (N,D)
    X = GaussianMarkovChain(
        np.zeros(D),  # mean of x0
        1e-3 * np.identity(D),  # prec of x0
        A,  # dynamics
        np.ones(D),  # innovation
        n=N,  # time instances
        name='X',
        initialize=False)
    X.initialize_from_value(np.random.randn(N, D))

    # Mixing matrix from latent space to observation space using ARD
    # gamma : (D) x ()
    gamma = Gamma(1e-5, 1e-5, plates=(D, ), name='gamma')
    # C : (M,1) x (D)
    C = Gaussian(np.zeros(D), diagonal(gamma), plates=(M, 1), name='C')
    C.initialize_from_value(np.random.randn(M, 1, D))

    # Observation noise
    # tau : () x ()
    tau = Gamma(1e-5, 1e-5, name='tau')

    # Observations
    # Y : (M,N) x ()
    CX = Dot(C, X.as_gaussian())
    Y = Normal(CX, tau, name='Y')

    #
    # RUN INFERENCE
    #

    # Observe data
    Y.observe(y, mask=mask)
    # Construct inference machine
    Q = VB(Y, X, A, alpha, C, gamma, tau)
    # Iterate
    Q.update(X, A, alpha, C, gamma, tau, repeat=maxiter)

    #
    # SHOW RESULTS
    #

    # Mean and standard deviation of the posterior
    (f_mean, f_squared) = CX.get_moments()
    f_std = np.sqrt(f_squared - f_mean**2)

    # Plot observations space
    #plt.figure()
    for m in range(M):
        plt.subplot(M, 1, m + 1)
        plt.plot(y[m, :], 'r.')
        plt.plot(f[m, :], 'b-')
        bpplt.errorplot(y=f_mean[m, :], error=2 * f_std[m, :])
Exemple #22
0
def run(maxiter=100):

    seed = 496  #np.random.randint(1000)
    print("seed = ", seed)
    np.random.seed(seed)

    # Simulate some data
    D = 3
    M = 6
    N = 200
    c = np.random.randn(M, D)
    w = 0.3
    a = np.array([[np.cos(w), -np.sin(w), 0], [np.sin(w),
                                               np.cos(w), 0], [0, 0, 1]])
    x = np.empty((N, D))
    f = np.empty((M, N))
    y = np.empty((M, N))
    x[0] = 10 * np.random.randn(D)
    f[:, 0] = np.dot(c, x[0])
    y[:, 0] = f[:, 0] + 3 * np.random.randn(M)
    for n in range(N - 1):
        x[n + 1] = np.dot(a, x[n]) + np.random.randn(D)
        f[:, n + 1] = np.dot(c, x[n + 1])
        y[:, n + 1] = f[:, n + 1] + 3 * np.random.randn(M)

    # Create the model
    (Y, CX, X, tau, C, gamma, A, alpha) = linear_state_space_model(D, N, M)

    # Add missing values randomly
    mask = random.mask(M, N, p=0.3)
    # Add missing values to a period of time
    mask[:, 30:80] = False
    y[~mask] = np.nan  # BayesPy doesn't require this. Just for plotting.
    # Observe the data
    Y.observe(y, mask=mask)

    # Initialize nodes (must use some randomness for C)
    C.initialize_from_random()

    # Run inference
    Q = VB(Y, X, C, gamma, A, alpha, tau)

    #
    # Run inference with rotations.
    #
    rotA = transformations.RotateGaussianARD(A, alpha)
    rotX = transformations.RotateGaussianMarkovChain(X, A, rotA)
    rotC = transformations.RotateGaussianARD(C, gamma)
    R = transformations.RotationOptimizer(rotX, rotC, D)

    #maxiter = 84
    for ind in range(maxiter):
        Q.update()
        #print('C term', C.lower_bound_contribution())
        R.rotate(
            maxiter=10,
            check_gradient=True,
            verbose=False,
            check_bound=Q.compute_lowerbound,
            #check_bound=None,
            check_bound_terms=Q.compute_lowerbound_terms)
        #check_bound_terms=None)

    X_vb = X.u[0]
    varX_vb = utils.diagonal(X.u[1] - X_vb[..., np.newaxis, :] *
                             X_vb[..., :, np.newaxis])

    u_CX = CX.get_moments()
    CX_vb = u_CX[0]
    varCX_vb = u_CX[1] - CX_vb**2

    # Show results
    plt.figure(3)
    plt.clf()
    for m in range(M):
        plt.subplot(M, 1, m + 1)
        plt.plot(y[m, :], 'r.')
        plt.plot(f[m, :], 'b-')
        bpplt.errorplot(y=CX_vb[m, :], error=2 * np.sqrt(varCX_vb[m, :]))

    plt.figure()
    Q.plot_iteration_by_nodes()
Exemple #23
0
def model(M, N, D, K):
    """
    Construct the linear state-space model with time-varying dynamics

    For reference, see the following publication:
    (TODO)
    """

    #
    # The model block for the latent mixing weight process
    #

    # Dynamics matrix with ARD
    # beta : (K) x ()
    beta = Gamma(1e-5, 1e-5, plates=(K, ), name='beta')
    # B : (K) x (K)
    B = GaussianARD(np.identity(K),
                    beta,
                    shape=(K, ),
                    plates=(K, ),
                    name='B',
                    plotter=bpplt.GaussianHintonPlotter(rows=0,
                                                        cols=1,
                                                        scale=0),
                    initialize=False)
    B.initialize_from_value(np.identity(K))

    # Mixing weight process, that is, the weights in the linear combination of
    # state dynamics matrices
    # S : () x (N,K)
    S = GaussianMarkovChain(np.ones(K),
                            1e-6 * np.identity(K),
                            B,
                            np.ones(K),
                            n=N,
                            name='S',
                            plotter=bpplt.GaussianMarkovChainPlotter(scale=2),
                            initialize=False)
    s = 10 * np.random.randn(N, K)
    s[:, 0] = 10
    S.initialize_from_value(s)

    #
    # The model block for the latent states
    #

    # Projection matrix of the dynamics matrix
    # alpha : (K) x ()
    alpha = Gamma(1e-5, 1e-5, plates=(D, K), name='alpha')
    alpha.initialize_from_value(1 * np.ones((D, K)))
    # A : (D) x (D,K)
    A = GaussianARD(0,
                    alpha,
                    shape=(D, K),
                    plates=(D, ),
                    name='A',
                    plotter=bpplt.GaussianHintonPlotter(rows=0,
                                                        cols=1,
                                                        scale=0),
                    initialize=False)

    # Initialize S and A such that A*S is almost an identity matrix
    a = np.zeros((D, D, K))
    a[np.arange(D), np.arange(D), np.zeros(D, dtype=int)] = 1
    a[:, :, 0] = np.identity(D) / s[0, 0]
    a[:, :, 1:] = 0.1 / s[0, 0] * np.random.randn(D, D, K - 1)
    A.initialize_from_value(a)

    # Latent states with dynamics
    # X : () x (N,D)
    X = VaryingGaussianMarkovChain(
        np.zeros(D),  # mean of x0
        1e-3 * np.identity(D),  # prec of x0
        A,  # dynamics matrices
        S._convert(GaussianMoments)[1:],  # temporal weights
        np.ones(D),  # innovation
        n=N,  # time instances
        name='X',
        plotter=bpplt.GaussianMarkovChainPlotter(scale=2),
        initialize=False)
    X.initialize_from_value(np.random.randn(N, D))

    #
    # The model block for observations
    #

    # Mixing matrix from latent space to observation space using ARD
    # gamma : (D) x ()
    gamma = Gamma(1e-5, 1e-5, plates=(D, ), name='gamma')
    gamma.initialize_from_value(1e-2 * np.ones(D))
    # C : (M,1) x (D)
    C = GaussianARD(0,
                    gamma,
                    shape=(D, ),
                    plates=(M, 1),
                    name='C',
                    plotter=bpplt.GaussianHintonPlotter(rows=0,
                                                        cols=2,
                                                        scale=0))
    C.initialize_from_value(np.random.randn(M, 1, D))

    # Noiseless process
    # F : (M,N) x ()
    F = SumMultiply('d,d', C, X, name='F')

    # Observation noise
    # tau : () x ()
    tau = Gamma(1e-5, 1e-5, name='tau')
    tau.initialize_from_value(1e2)

    # Observations
    # Y: (M,N) x ()
    Y = GaussianARD(F, tau, name='Y')

    # Construct inference machine
    Q = VB(Y, F, C, gamma, X, A, alpha, tau, S, B, beta)

    return Q
Exemple #24
0
def pca():

    np.random.seed(41)

    M = 10
    N = 3000
    D = 5

    # Construct the PCA model
    alpha = Gamma(1e-3, 1e-3, plates=(D,), name='alpha')
    W = GaussianARD(0, alpha, plates=(M,1), shape=(D,), name='W')
    X = GaussianARD(0, 1, plates=(1,N), shape=(D,), name='X')
    tau = Gamma(1e-3, 1e-3, name='tau')
    W.initialize_from_random()
    F = SumMultiply('d,d->', W, X)
    Y = GaussianARD(F, tau, name='Y')

    # Observe data
    data = np.sum(np.random.randn(M,1,D-1) * np.random.randn(1,N,D-1), axis=-1) + 1e-1 * np.random.randn(M,N)
    Y.observe(data)

    # Initialize VB engine
    Q = VB(Y, X, W, alpha, tau)

    # Take one update step (so phi is ok)
    Q.update(repeat=1)
    Q.save()

    # Run VB-EM
    Q.update(repeat=200)
    bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-')

    # Restore the state
    Q.load()

    # Run Riemannian conjugate gradient
    #Q.optimize(X, alpha, maxiter=100, collapsed=[W, tau])
    Q.optimize(W, tau, maxiter=100, collapsed=[X, alpha])
    bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:')

    bpplt.pyplot.show()
Exemple #25
0
def run(M=10, N=100, D_y=3, D=5):
    seed = 45
    print('seed =', seed)
    np.random.seed(seed)

    # Check HDF5 version.
    if h5py.version.hdf5_version_tuple < (1, 8, 7):
        print(
            "WARNING! Your HDF5 version is %s. HDF5 versions <1.8.7 are not "
            "able to save empty arrays, thus you may experience problems if "
            "you for instance try to save before running any iteration steps."
            % str(h5py.version.hdf5_version_tuple))

    # Generate data
    w = np.random.normal(0, 1, size=(M, 1, D_y))
    x = np.random.normal(0, 1, size=(1, N, D_y))
    f = misc.sum_product(w, x, axes_to_sum=[-1])
    y = f + np.random.normal(0, 0.5, size=(M, N))

    # Construct model
    (Y, WX, W, X, tau, alpha) = pca_model(M, N, D)

    # Data with missing values
    mask = random.mask(M, N, p=0.9)  # randomly missing
    mask[:, 20:40] = False  # gap missing
    y[~mask] = np.nan
    Y.observe(y, mask=mask)

    # Construct inference machine
    Q = VB(Y, W, X, tau, alpha, autosave_iterations=5)

    # Initialize some nodes randomly
    X.initialize_from_value(X.random())
    W.initialize_from_value(W.random())

    # Save the state into a HDF5 file
    filename = tempfile.NamedTemporaryFile(suffix='hdf5').name
    Q.update(X, W, alpha, tau, repeat=1)
    Q.save(filename=filename)

    # Inference loop.
    Q.update(X, W, alpha, tau, repeat=10)

    # Reload the state from the HDF5 file
    Q.load(filename=filename)

    # Inference loop again.
    Q.update(X, W, alpha, tau, repeat=10)

    # NOTE: Saving and loading requires that you have the model
    # constructed. "Save" does not store the model structure nor does "load"
    # read it. They are just used for reading and writing the contents of the
    # nodes. Thus, if you want to load, you first need to construct the same
    # model that was used for saving and then use load to set the states of the
    # nodes.

    plt.clf()
    WX_params = WX.get_parameters()
    fh = WX_params[0] * np.ones(y.shape)
    err_fh = 2 * np.sqrt(WX_params[1] + 1 / tau.get_moments()[0]) * np.ones(
        y.shape)
    for m in range(M):
        plt.subplot(M, 1, m + 1)
        #errorplot(y, error=None, x=None, lower=None, upper=None):
        bpplt.errorplot(fh[m], x=np.arange(N), error=err_fh[m])
        plt.plot(np.arange(N), f[m], 'g')
        plt.plot(np.arange(N), y[m], 'r+')

    plt.figure()
    Q.plot_iteration_by_nodes()

    plt.figure()
    plt.subplot(2, 2, 1)
    bpplt.binary_matrix(W.mask)
    plt.subplot(2, 2, 2)
    bpplt.binary_matrix(X.mask)
    plt.subplot(2, 2, 3)
    #bpplt.binary_matrix(WX.get_mask())
    plt.subplot(2, 2, 4)
    bpplt.binary_matrix(Y.mask)
Exemple #26
0
def run(M=50, N=200, D_y=10, D=20, maxiter=100):
    seed = 45
    print('seed =', seed)
    np.random.seed(seed)

    # Generate data (covariance eigenvalues: 1,1,...,1,2^2,3^2,...,(D_y+1)^2
    (q, r) = scipy.linalg.qr(np.random.randn(M, M))
    C = np.diag(np.arange(2, 2 + D_y))
    C = np.ones(M)
    C[:D_y] += np.arange(1, 1 + D_y)
    y = C[:, np.newaxis] * np.random.randn(M, N)
    y = np.dot(q, y)

    # Construct model
    (Y, WX, W, X, tau, alpha) = pca_model(M, N, D)

    # Data with missing values
    mask = utils.random.mask(M, N, p=0.9)  # randomly missing
    mask[:, 20:40] = False  # gap missing
    y[~mask] = np.nan
    Y.observe(y, mask=mask)

    # Construct inference machine
    Q = VB(Y, W, X, tau, alpha, autosave_filename=utils.utils.tempfile())

    # Initialize nodes (from prior and randomly)
    X.initialize_from_value(X.random())
    W.initialize_from_value(W.random())

    Q.update(repeat=1)
    Q.save()

    #
    # Run inference with rotations.
    #
    rotX = transformations.RotateGaussian(X)
    rotW = transformations.RotateGaussianARD(W, alpha)
    R = transformations.RotationOptimizer(rotX, rotW, D)

    for ind in range(maxiter):
        Q.update()
        R.rotate(check_gradient=False,
                 maxiter=10,
                 verbose=False,
                 check_bound=Q.compute_lowerbound,
                 check_bound_terms=Q.compute_lowerbound_terms)

    L_rot = Q.L

    #
    # Re-run inference without rotations.
    #
    Q.load()
    Q.update(repeat=maxiter)
    L_norot = Q.L

    #
    # Plot comparison
    #
    plt.plot(L_rot)
    plt.plot(L_norot)
    plt.legend(['With rotations', 'Without rotations'], loc='lower right')
    plt.show()
Exemple #27
0
def model(M=20, N=100, D=10, K=3):
    """
    Construct the linear state-space model with switching dynamics.
    """

    #
    # Switching dynamics (HMM)
    #

    # Prior for initial state probabilities
    rho = Dirichlet(1e-3 * np.ones(K), name='rho')

    # Prior for state transition probabilities
    V = Dirichlet(1e-3 * np.ones(K), plates=(K, ), name='V')
    v = 10 * np.identity(K) + 1 * np.ones((K, K))
    v /= np.sum(v, axis=-1, keepdims=True)
    V.initialize_from_value(v)

    # Hidden states (with unknown initial state probabilities and state
    # transition probabilities)
    Z = CategoricalMarkovChain(rho,
                               V,
                               states=N - 1,
                               name='Z',
                               plotter=bpplt.CategoricalMarkovChainPlotter(),
                               initialize=False)
    Z.u[0] = np.random.dirichlet(np.ones(K))
    Z.u[1] = np.reshape(
        np.random.dirichlet(0.5 * np.ones(K * K), size=(N - 2)), (N - 2, K, K))

    #
    # Linear state-space models
    #

    # Dynamics matrix with ARD
    # (K,D) x ()
    alpha = Gamma(1e-5, 1e-5, plates=(K, 1, D), name='alpha')
    # (K,1,1,D) x (D)
    A = GaussianARD(0,
                    alpha,
                    shape=(D, ),
                    plates=(K, D),
                    name='A',
                    plotter=bpplt.GaussianHintonPlotter())
    A.initialize_from_value(
        np.identity(D) * np.ones((K, D, D)) + 0.1 * np.random.randn(K, D, D))

    # Latent states with dynamics
    # (K,1) x (N,D)
    X = SwitchingGaussianMarkovChain(
        np.zeros(D),  # mean of x0
        1e-3 * np.identity(D),  # prec of x0
        A,  # dynamics
        Z,  # dynamics selection
        np.ones(D),  # innovation
        n=N,  # time instances
        name='X',
        plotter=bpplt.GaussianMarkovChainPlotter())
    X.initialize_from_value(10 * np.random.randn(N, D))

    # Mixing matrix from latent space to observation space using ARD
    # (K,1,1,D) x ()
    gamma = Gamma(1e-5, 1e-5, plates=(D, ), name='gamma')
    # (K,M,1) x (D)
    C = GaussianARD(0,
                    gamma,
                    shape=(D, ),
                    plates=(M, 1),
                    name='C',
                    plotter=bpplt.GaussianHintonPlotter(rows=-3, cols=-1))
    C.initialize_from_value(np.random.randn(M, 1, D))

    # Underlying noiseless function
    # (K,M,N) x ()
    F = SumMultiply('i,i', C, X, name='F')

    #
    # Mixing the models
    #

    # Observation noise
    tau = Gamma(1e-5, 1e-5, name='tau')
    tau.initialize_from_value(1e2)

    # Emission/observation distribution
    Y = GaussianARD(F, tau, name='Y')

    Q = VB(Y, F, Z, rho, V, C, gamma, X, A, alpha, tau)

    return Q
Exemple #28
0
def run(N=500, seed=42, maxiter=100, plot=True):
    """
    Run deterministic annealing demo for 1-D Gaussian mixture.
    """

    if seed is not None:
        np.random.seed(seed)

    mu = GaussianARD(0, 1,
                     plates=(2,),
                     name='means')
    Z = Categorical([0.3, 0.7],
                    plates=(N,),
                    name='classes')
    Y = Mixture(Z, GaussianARD, mu, 1,
                name='observations')

    # Generate data
    z = Z.random()
    data = np.empty(N)
    for n in range(N):
        data[n] = [4, -4][z[n]]

    Y.observe(data)

    # Initialize means closer to the inferior local optimum in which the
    # cluster means are swapped
    mu.initialize_from_value([0, 6])

    Q = VB(Y, Z, mu)
    Q.save()

    #
    # Standard VB-EM algorithm
    #
    Q.update(repeat=maxiter)

    mu_vbem = mu.u[0].copy()
    L_vbem = Q.compute_lowerbound()

    #
    # VB-EM with deterministic annealing
    #
    Q.load()
    beta = 0.01
    while beta < 1.0:
        beta = min(beta*1.2, 1.0)
        print("Set annealing to %.2f" % beta)
        Q.set_annealing(beta)
        Q.update(repeat=maxiter, tol=1e-4)

    mu_anneal = mu.u[0].copy()
    L_anneal = Q.compute_lowerbound()

    print("==============================")
    print("RESULTS FOR VB-EM vs ANNEALING")
    print("Fixed component probabilities:", np.array([0.3, 0.7]))
    print("True component means:", np.array([4, -4]))
    print("VB-EM component means:", mu_vbem)
    print("VB-EM lower bound:", L_vbem)
    print("Annealed VB-EM component means:", mu_anneal)
    print("Annealed VB-EM lower bound:", L_anneal)
    
    return
Exemple #29
0
def run(M=10, N=100, D=5, seed=42, maxiter=100, plot=True):
    """
    Run deterministic annealing demo for 1-D Gaussian mixture.
    """

    raise NotImplementedError("Black box variational inference not yet implemented, sorry")

    if seed is not None:
        np.random.seed(seed)

    # Generate data
    data = np.dot(np.random.randn(M,D),
                  np.random.randn(D,N))

    # Construct model
    C = GaussianARD(0, 1, shape=(2,), plates=(M,1), name='C')
    X = GaussianARD(0, 1, shape=(2,), plates=(1,N), name='X')
    F = Dot(C, X)

    # Some arbitrary log likelihood
    def logpdf(y, f):
        """
        exp(f) / (1 + exp(f)) = 1/(1+exp(-f))

        -log(1+exp(-f)) = -log(exp(0)+exp(-f))

        also:
        1 - exp(f) / (1 + exp(f)) = (1 + exp(f) - exp(f)) / (1 + exp(f))
        = 1 / (1 + exp(f))
        = -log(1+exp(f)) = -log(exp(0)+exp(f))
        """
        return -np.logaddexp(0, -f * np.where(y, -1, +1))
        
    Y = LogPDF(logpdf, F, samples=10, shape=())
    #Y = GaussianARD(F, 1)

    Y.observe(data)

    Q = VB(Y, C, X)
    Q.ignore_bound_checks = True

    delay = 1
    forgetting_rate = 0.7
    for n in range(maxiter):

        # Observe a mini-batch
        #subset = np.random.choice(N, N_batch)
        #Y.observe(data[subset,:])

        # Learn intermediate variables
        #Q.update(Z)

        # Set step length
        step = (n + delay) ** (-forgetting_rate)

        # Stochastic gradient for the global variables
        Q.gradient_step(C, X, scale=step)
    
    if plot:
        bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:')
        bpplt.pyplot.xlabel('CPU time (in seconds)')
        bpplt.pyplot.ylabel('VB lower bound')

    return
Exemple #30
0
def run_dlssm(y, f, mask, D, K, maxiter):
    """
    Run VB inference for linear state space model with drifting dynamics.
    """
        
    (M, N) = np.shape(y)

    # Dynamics matrix with ARD
    # alpha : (D) x ()
    alpha = Gamma(1e-5,
                  1e-5,
                  plates=(K,),
                  name='alpha')
    # A : (K) x (K)
    A = Gaussian(np.zeros(K),
    #np.identity(K),
                 diagonal(alpha),
                 plates=(K,),
                 name='A_S')
    A.initialize_from_value(np.identity(K))

    # rho
    ## rho = Gamma(1e-5,
    ##             1e-5,
    ##             plates=(K,),
    ##             name="rho")

    # S : () x (N-1,K)
    S = GaussianMarkovChain(np.ones(K),
                            1e-6*np.identity(K),
                            A,
                            np.ones(K),
                            n=N-1,
                            name='S')
    S.initialize_from_value(1*np.ones((N-1,K)))

    # Projection matrix of the dynamics matrix
    # beta : (K) x ()
    beta = Gamma(1e-5,
                 1e-5,
                 plates=(K,),
                 name='beta')
    # B : (D) x (D*K)
    B = Gaussian(np.zeros(D*K),
                 diagonal(tile(beta, D)),
                 plates=(D,),
                 name='B')
    b = np.zeros((D,D,K))
    b[np.arange(D),np.arange(D),np.zeros(D,dtype=int)] = 1
    B.initialize_from_value(np.reshape(1*b, (D,D*K)))

    # A : (N-1,D) x (D)
    BS = MatrixDot(B, 
                   S.as_gaussian().add_plate_axis(-1), 
                   name='BS')

    # Latent states with dynamics
    # X : () x (N,D)
    X = GaussianMarkovChain(np.zeros(D),         # mean of x0
                            1e-3*np.identity(D), # prec of x0
                            BS,                   # dynamics
                            np.ones(D),          # innovation
                            n=N,                 # time instances
                            name='X',
                            initialize=False)
    X.initialize_from_value(np.random.randn(N,D))

    # Mixing matrix from latent space to observation space using ARD
    # gamma : (D) x ()
    gamma = Gamma(1e-5,
                  1e-5,
                  plates=(D,),
                  name='gamma')
    # C : (M,1) x (D)
    C = Gaussian(np.zeros(D),
                 diagonal(gamma),
                 plates=(M,1),
                 name='C')
    C.initialize_from_value(np.random.randn(M,1,D))

    # Observation noise
    # tau : () x ()
    tau = Gamma(1e-5,
                1e-5,
                name='tau')

    # Observations
    # Y : (M,N) x ()
    CX = Dot(C, X.as_gaussian())
    Y = Normal(CX,
               tau,
               name='Y')

    #
    # RUN INFERENCE
    #

    # Observe data
    Y.observe(y, mask=mask)
    # Construct inference machine
    Q = VB(Y, X, S, A, alpha, B, beta, C, gamma, tau)

    #
    # Run inference with rotations.
    #

    # Rotate the D-dimensional state space (C, X)
    rotB = transformations.RotateGaussianMatrixARD(B, beta, axis='rows')
    rotX = transformations.RotateDriftingMarkovChain(X, B, S, rotB)
    rotC = transformations.RotateGaussianARD(C, gamma)
    R_X = transformations.RotationOptimizer(rotX, rotC, D)

    # Rotate the K-dimensional latent dynamics space (B, S)
    rotA = transformations.RotateGaussianARD(A, alpha)
    rotS = transformations.RotateGaussianMarkovChain(S, A, rotA)
    rotB = transformations.RotateGaussianMatrixARD(B, beta, axis='cols')
    R_S = transformations.RotationOptimizer(rotS, rotB, K)

    # Iterate
    for ind in range(int(maxiter/5)):
        Q.update(repeat=5)
        #Q.update(X, S, A, alpha, rho, B, beta, C, gamma, tau, repeat=maxiter)
        R_X.rotate()
        R_S.rotate()
        ## R_X.rotate(
        ## check_bound=Q.compute_lowerbound,
        ## check_bound_terms=Q.compute_lowerbound_terms,
        ## check_gradient=True
        ##     )
        ## R_S.rotate(
        ## check_bound=Q.compute_lowerbound,
        ## check_bound_terms=Q.compute_lowerbound_terms,
        ## check_gradient=True
        ##     )

    #
    # SHOW RESULTS
    #

    # Mean and standard deviation of the posterior
    (f_mean, f_squared) = CX.get_moments()
    f_std = np.sqrt(f_squared - f_mean**2)

    # Plot observations space
    for m in range(M):
        plt.subplot(M,1,m+1)
        plt.plot(y[m,:], 'r.')
        plt.plot(f[m,:], 'b-')
        bpplt.errorplot(y=f_mean[m,:], error=2*f_std[m,:])
Exemple #31
0
def run_dlssm(y, f, mask, D, K, maxiter):
    """
    Run VB inference for linear state space model with drifting dynamics.
    """
        
    (M, N) = np.shape(y)

    # Dynamics matrix with ARD
    # alpha : (K) x ()
    alpha = Gamma(1e-5,
                  1e-5,
                  plates=(K,),
                  name='alpha')
    # A : (K) x (K)
    A = GaussianArrayARD(np.identity(K),
                         alpha,
                         shape=(K,),
                         plates=(K,),
                         name='A_S',
                         initialize=False)
    A.initialize_from_value(np.identity(K))

    # State of the drift
    # S : () x (N,K)
    S = GaussianMarkovChain(np.ones(K),
                            1e-6*np.identity(K),
                            A,
                            np.ones(K),
                            n=N,
                            name='S',
                            initialize=False)
    S.initialize_from_value(np.ones((N,K)))

    # Projection matrix of the dynamics matrix
    # Initialize S and B such that BS is identity matrix
    # beta : (K) x ()
    beta = Gamma(1e-5,
                 1e-5,
                 plates=(D,K),
                 name='beta')
    # B : (D) x (D,K)
    b = np.zeros((D,D,K))
    b[np.arange(D),np.arange(D),np.zeros(D,dtype=int)] = 1
    B = GaussianArrayARD(0,
                         beta,
                         plates=(D,),
                         name='B',
                         initialize=False)
    B.initialize_from_value(np.reshape(1*b, (D,D,K)))
    # BS : (N-1,D) x (D)
    # TODO/FIXME: Implement __getitem__ method
    BS = SumMultiply('dk,k->d',
                     B, 
                     S.as_gaussian()[...,np.newaxis],
    #                     iterator_axis=0,
                     name='BS')

    # Latent states with dynamics
    # X : () x (N,D)
    X = GaussianMarkovChain(np.zeros(D),         # mean of x0
                            1e-3*np.identity(D), # prec of x0
                            BS,                  # dynamics
                            np.ones(D),          # innovation
                            n=N+1,               # time instances
                            name='X',
                            initialize=False)
    X.initialize_from_value(np.random.randn(N+1,D))

    # Mixing matrix from latent space to observation space using ARD
    # gamma : (D) x ()
    gamma = Gamma(1e-5,
                  1e-5,
                  plates=(D,K),
                  name='gamma')
    # C : (M,1) x (D,K)
    C = GaussianArrayARD(0,
                         gamma,
                         plates=(M,1),
                         name='C',
                         initialize=False)
    C.initialize_from_random()

    # Observation noise
    # tau : () x ()
    tau = Gamma(1e-5,
                1e-5,
                name='tau')

    # Observations
    # Y : (M,N) x ()
    F = SumMultiply('dk,d,k',
                    C,
                    X.as_gaussian()[1:],
                    S.as_gaussian(),
                    name='F')
                  
    Y = GaussianArrayARD(F,
                         tau,
                         name='Y')

    #
    # RUN INFERENCE
    #

    # Observe data
    Y.observe(y, mask=mask)
    # Construct inference machine
    Q = VB(Y, X, S, A, alpha, B, beta, C, gamma, tau)

    #
    # Run inference with rotations.
    #

    rotate = False
    if rotate:
        # Rotate the D-dimensional state space (C, X)
        rotB = transformations.RotateGaussianMatrixARD(B, beta, D, K, axis='rows')
        rotX = transformations.RotateDriftingMarkovChain(X, B, S, rotB)
        rotC = transformations.RotateGaussianARD(C, gamma)
        R_X = transformations.RotationOptimizer(rotX, rotC, D)

        # Rotate the K-dimensional latent dynamics space (B, S)
        rotA = transformations.RotateGaussianARD(A, alpha)
        rotS = transformations.RotateGaussianMarkovChain(S, A, rotA)
        rotB = transformations.RotateGaussianMatrixARD(B, beta, D, K, axis='cols')
        R_S = transformations.RotationOptimizer(rotS, rotB, K)

    # Iterate
    for ind in range(maxiter):
        print("X update")
        Q.update(X)
        print("S update")
        Q.update(S)
        print("A update")
        Q.update(A)
        print("alpha update")
        Q.update(alpha)
        print("B update")
        Q.update(B)
        print("beta update")
        Q.update(beta)
        print("C update")
        Q.update(C)
        print("gamma update")
        Q.update(gamma)
        print("tau update")
        Q.update(tau)
        if rotate:
            if ind >= 0:
                R_X.rotate()
            if ind >= 0:
                R_S.rotate()

    Q.plot_iteration_by_nodes()
    
    #
    # SHOW RESULTS
    #

    # Plot observations space
    plt.figure()
    bpplt.timeseries_normal(F)
    bpplt.timeseries(f, 'b-')
    bpplt.timeseries(y, 'r.')
    
    # Plot latent space
    plt.figure()
    bpplt.timeseries_gaussian_mc(X, scale=2)
    
    # Plot drift space
    plt.figure()
    bpplt.timeseries_gaussian_mc(S, scale=2)
Exemple #32
0
def run(maxiter=100):

    seed = 496#np.random.randint(1000)
    print("seed = ", seed)
    np.random.seed(seed)

    # Simulate some data
    D = 3
    M = 6
    N = 200
    c = np.random.randn(M,D)
    w = 0.3
    a = np.array([[np.cos(w), -np.sin(w), 0], 
                  [np.sin(w), np.cos(w),  0], 
                  [0,         0,          1]])
    x = np.empty((N,D))
    f = np.empty((M,N))
    y = np.empty((M,N))
    x[0] = 10*np.random.randn(D)
    f[:,0] = np.dot(c,x[0])
    y[:,0] = f[:,0] + 3*np.random.randn(M)
    for n in range(N-1):
        x[n+1] = np.dot(a,x[n]) + np.random.randn(D)
        f[:,n+1] = np.dot(c,x[n+1])
        y[:,n+1] = f[:,n+1] + 3*np.random.randn(M)

    # Create the model
    (Y, CX, X, tau, C, gamma, A, alpha) = linear_state_space_model(D, N, M)
    
    # Add missing values randomly
    mask = random.mask(M, N, p=0.3)
    # Add missing values to a period of time
    mask[:,30:80] = False
    y[~mask] = np.nan # BayesPy doesn't require this. Just for plotting.
    # Observe the data
    Y.observe(y, mask=mask)
    

    # Initialize nodes (must use some randomness for C)
    C.initialize_from_random()

    # Run inference
    Q = VB(Y, X, C, gamma, A, alpha, tau)

    #
    # Run inference with rotations.
    #
    rotA = transformations.RotateGaussianARD(A, alpha)
    rotX = transformations.RotateGaussianMarkovChain(X, A, rotA)
    rotC = transformations.RotateGaussianARD(C, gamma)
    R = transformations.RotationOptimizer(rotX, rotC, D)

    #maxiter = 84
    for ind in range(maxiter):
        Q.update()
        #print('C term', C.lower_bound_contribution())
        R.rotate(maxiter=10, 
                 check_gradient=True,
                 verbose=False,
                 check_bound=Q.compute_lowerbound,
        #check_bound=None,
                 check_bound_terms=Q.compute_lowerbound_terms)
        #check_bound_terms=None)

    X_vb = X.u[0]
    varX_vb = utils.diagonal(X.u[1] - X_vb[...,np.newaxis,:] * X_vb[...,:,np.newaxis])

    u_CX = CX.get_moments()
    CX_vb = u_CX[0]
    varCX_vb = u_CX[1] - CX_vb**2

    # Show results
    plt.figure(3)
    plt.clf()
    for m in range(M):
        plt.subplot(M,1,m+1)
        plt.plot(y[m,:], 'r.')
        plt.plot(f[m,:], 'b-')
        bpplt.errorplot(y=CX_vb[m,:],
                        error=2*np.sqrt(varCX_vb[m,:]))

    plt.figure()
    Q.plot_iteration_by_nodes()
def run(N=100000, N_batch=50, seed=42, maxiter=100, plot=True):
    """
    Run deterministic annealing demo for 1-D Gaussian mixture.
    """

    if seed is not None:
        np.random.seed(seed)

    # Number of clusters in the model
    K = 20

    # Dimensionality of the data
    D = 5

    # Generate data
    K_true = 10
    spread = 5
    means = spread * np.random.randn(K_true, D)
    z = random.categorical(np.ones(K_true), size=N)
    data = np.empty((N,D))
    for n in range(N):
        data[n] = means[z[n]] + np.random.randn(D)

    #
    # Standard VB-EM algorithm
    #

    # Full model
    mu = Gaussian(np.zeros(D), np.identity(D),
                  plates=(K,),
                  name='means')
    alpha = Dirichlet(np.ones(K),
                      name='class probabilities')
    Z = Categorical(alpha,
                    plates=(N,),
                    name='classes')
    Y = Mixture(Z, Gaussian, mu, np.identity(D),
                name='observations')

    # Break symmetry with random initialization of the means
    mu.initialize_from_random()

    # Put the data in
    Y.observe(data)

    # Run inference
    Q = VB(Y, Z, mu, alpha)
    Q.save(mu)
    Q.update(repeat=maxiter)
    if plot:
        bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-')
    max_cputime = np.sum(Q.cputime[~np.isnan(Q.cputime)])


    #
    # Stochastic variational inference
    #

    # Construct smaller model (size of the mini-batch)
    mu = Gaussian(np.zeros(D), np.identity(D),
                  plates=(K,),
                  name='means')
    alpha = Dirichlet(np.ones(K),
                      name='class probabilities')
    Z = Categorical(alpha,
                    plates=(N_batch,),
                    plates_multiplier=(N/N_batch,),
                    name='classes')
    Y = Mixture(Z, Gaussian, mu, np.identity(D),
                name='observations')

    # Break symmetry with random initialization of the means
    mu.initialize_from_random()

    # Inference engine
    Q = VB(Y, Z, mu, alpha, autosave_filename=Q.autosave_filename)
    Q.load(mu)

    # Because using mini-batches, messages need to be multiplied appropriately
    print("Stochastic variational inference...")
    Q.ignore_bound_checks = True

    maxiter *= int(N/N_batch)
    delay = 1
    forgetting_rate = 0.7
    for n in range(maxiter):

        # Observe a mini-batch
        subset = np.random.choice(N, N_batch)
        Y.observe(data[subset,:])

        # Learn intermediate variables
        Q.update(Z)

        # Set step length
        step = (n + delay) ** (-forgetting_rate)

        # Stochastic gradient for the global variables
        Q.gradient_step(mu, alpha, scale=step)

        if np.sum(Q.cputime[:n]) > max_cputime:
            break
    
    if plot:
        bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:')

        bpplt.pyplot.xlabel('CPU time (in seconds)')
        bpplt.pyplot.ylabel('VB lower bound')
        bpplt.pyplot.legend(['VB-EM', 'Stochastic inference'], loc='lower right')
        bpplt.pyplot.title('VB for Gaussian mixture model')

    return
Exemple #34
0
def run(M=10, N=100, D_y=3, D=5):
    seed = 45
    print('seed =', seed)
    np.random.seed(seed)

    # Check HDF5 version.
    if h5py.version.hdf5_version_tuple < (1,8,7): 
        print("WARNING! Your HDF5 version is %s. HDF5 versions <1.8.7 are not "
              "able to save empty arrays, thus you may experience problems if "
              "you for instance try to save before running any iteration steps."
              % str(h5py.version.hdf5_version_tuple))
    
    # Generate data
    w = np.random.normal(0, 1, size=(M,1,D_y))
    x = np.random.normal(0, 1, size=(1,N,D_y))
    f = misc.sum_product(w, x, axes_to_sum=[-1])
    y = f + np.random.normal(0, 0.5, size=(M,N))

    # Construct model
    (Y, WX, W, X, tau, alpha) = pca_model(M, N, D)

    # Data with missing values
    mask = random.mask(M, N, p=0.9) # randomly missing
    mask[:,20:40] = False # gap missing
    y[~mask] = np.nan
    Y.observe(y, mask=mask)

    # Construct inference machine
    Q = VB(Y, W, X, tau, alpha, autosave_iterations=5)

    # Initialize some nodes randomly
    X.initialize_from_value(X.random())
    W.initialize_from_value(W.random())

    # Save the state into a HDF5 file
    filename = tempfile.NamedTemporaryFile(suffix='hdf5').name
    Q.update(X, W, alpha, tau, repeat=1)
    Q.save(filename=filename)

    # Inference loop.
    Q.update(X, W, alpha, tau, repeat=10)

    # Reload the state from the HDF5 file
    Q.load(filename=filename)

    # Inference loop again.
    Q.update(X, W, alpha, tau, repeat=10)

    # NOTE: Saving and loading requires that you have the model
    # constructed. "Save" does not store the model structure nor does "load"
    # read it. They are just used for reading and writing the contents of the
    # nodes. Thus, if you want to load, you first need to construct the same
    # model that was used for saving and then use load to set the states of the
    # nodes.

    plt.clf()
    WX_params = WX.get_parameters()
    fh = WX_params[0] * np.ones(y.shape)
    err_fh = 2*np.sqrt(WX_params[1] + 1/tau.get_moments()[0]) * np.ones(y.shape)
    for m in range(M):
        plt.subplot(M,1,m+1)
        #errorplot(y, error=None, x=None, lower=None, upper=None):
        bpplt.errorplot(fh[m], x=np.arange(N), error=err_fh[m])
        plt.plot(np.arange(N), f[m], 'g')
        plt.plot(np.arange(N), y[m], 'r+')

    plt.figure()
    Q.plot_iteration_by_nodes()

    plt.figure()
    plt.subplot(2,2,1)
    bpplt.binary_matrix(W.mask)
    plt.subplot(2,2,2)
    bpplt.binary_matrix(X.mask)
    plt.subplot(2,2,3)
    #bpplt.binary_matrix(WX.get_mask())
    plt.subplot(2,2,4)
    bpplt.binary_matrix(Y.mask)
Exemple #35
0
def run(M=6, N=200, D=3, maxiter=100, debug=False, seed=42, rotate=False, precompute=False):

    # Use deterministic random numbers
    if seed is not None:
        np.random.seed(seed)

    # Simulate some data
    K = 3
    c = np.random.randn(M,K)
    w = 0.3
    a = np.array([[np.cos(w), -np.sin(w), 0], 
                  [np.sin(w), np.cos(w),  0], 
                  [0,         0,          1]])
    x = np.empty((N,K))
    f = np.empty((M,N))
    y = np.empty((M,N))
    x[0] = 10*np.random.randn(K)
    f[:,0] = np.dot(c,x[0])
    y[:,0] = f[:,0] + 3*np.random.randn(M)
    for n in range(N-1):
        x[n+1] = np.dot(a,x[n]) + np.random.randn(K)
        f[:,n+1] = np.dot(c,x[n+1])
        y[:,n+1] = f[:,n+1] + 3*np.random.randn(M)

    # Create the model
    (Y, CX, X, tau, C, gamma, A, alpha) = linear_state_space_model(D=D, 
                                                                   N=N,
                                                                   M=M)
    
    # Add missing values randomly
    mask = random.mask(M, N, p=0.3)
    # Add missing values to a period of time
    mask[:,30:80] = False
    y[~mask] = np.nan # BayesPy doesn't require this. Just for plotting.
    # Observe the data
    Y.observe(y, mask=mask)

    # Initialize nodes (must use some randomness for C)
    C.initialize_from_random()

    # Run inference
    Q = VB(Y, X, C, gamma, A, alpha, tau)

    #
    # Run inference with rotations.
    #
    if rotate:
        rotA = transformations.RotateGaussianArrayARD(A, alpha, precompute=precompute)
        rotX = transformations.RotateGaussianMarkovChain(X, A, rotA)
        rotC = transformations.RotateGaussianArrayARD(C, gamma)
        R = transformations.RotationOptimizer(rotX, rotC, D)

        for ind in range(maxiter):
            Q.update()
            if debug:
                R.rotate(maxiter=10, 
                         check_gradient=True,
                         check_bound=True)
            else:
                R.rotate()

    else:
        Q.update(repeat=maxiter)
        
    # Show results
    plt.figure()
    bpplt.timeseries_normal(CX, scale=2)
    bpplt.timeseries(f, 'b-')
    bpplt.timeseries(y, 'r.')
    plt.show()
Exemple #36
0
def run_lssm(y, f, mask, D, maxiter):
    """
    Run VB inference for linear state space model.
    """

    (M, N) = np.shape(y)

    #
    # CONSTRUCT THE MODEL
    #

    # Dynamic matrix
    # alpha: (D) x ()
    alpha = Gamma(1e-5,
                  1e-5,
                  plates=(D,),
                  name='alpha')
    # A : (D) x (D)
    A = Gaussian(np.zeros(D),
                 diagonal(alpha),
                 plates=(D,),
                 name='A')
    A.initialize_from_value(np.identity(D))

    # Latent states with dynamics
    # X : () x (N,D)
    X = GaussianMarkovChain(np.zeros(D),         # mean of x0
                            1e-3*np.identity(D), # prec of x0
                            A,                   # dynamics
                            np.ones(D),          # innovation
                            n=N,                 # time instances
                            name='X',
                            initialize=False)
    X.initialize_from_value(np.random.randn(N,D))

    # Mixing matrix from latent space to observation space using ARD
    # gamma : (D) x ()
    gamma = Gamma(1e-5,
                  1e-5,
                  plates=(D,),
                  name='gamma')
    # C : (M,1) x (D)
    C = Gaussian(np.zeros(D),
                 diagonal(gamma),
                 plates=(M,1),
                 name='C')
    C.initialize_from_value(np.random.randn(M,1,D))

    # Observation noise
    # tau : () x ()
    tau = Gamma(1e-5,
                1e-5,
                name='tau')

    # Observations
    # Y : (M,N) x ()
    CX = Dot(C, X.as_gaussian())
    Y = Normal(CX,
               tau,
               name='Y')

    #
    # RUN INFERENCE
    #

    # Observe data
    Y.observe(y, mask=mask)
    # Construct inference machine
    Q = VB(Y, X, A, alpha, C, gamma, tau)
    # Iterate
    Q.update(X, A, alpha, C, gamma, tau, repeat=maxiter)

    #
    # SHOW RESULTS
    #

    # Mean and standard deviation of the posterior
    (f_mean, f_squared) = CX.get_moments()
    f_std = np.sqrt(f_squared - f_mean**2)

    # Plot observations space
    #plt.figure()
    for m in range(M):
        plt.subplot(M,1,m+1)
        plt.plot(y[m,:], 'r.')
        plt.plot(f[m,:], 'b-')
        bpplt.errorplot(y=f_mean[m,:], error=2*f_std[m,:])