Ejemplo n.º 1
0
def run(M=10,
        N=100,
        D_y=3,
        D=5,
        seed=42,
        rotate=False,
        maxiter=100,
        debug=False,
        plot=True):

    if seed is not None:
        np.random.seed(seed)

    # Generate data
    w = np.random.normal(0, 1, size=(M, 1, D_y))
    x = np.random.normal(0, 1, size=(1, N, D_y))
    f = misc.sum_product(w, x, axes_to_sum=[-1])
    y = f + np.random.normal(0, 0.2, size=(M, N))

    # Construct model
    (Y, F, W, X, tau, alpha) = model(M, N, D)

    # Data with missing values
    mask = random.mask(M, N, p=0.5)  # randomly missing
    y[~mask] = np.nan
    Y.observe(y, mask=mask)

    # Construct inference machine
    Q = VB(Y, W, X, tau, alpha)

    # Initialize some nodes randomly
    X.initialize_from_random()
    W.initialize_from_random()

    # Run inference algorithm
    if rotate:
        # Use rotations to speed up learning
        rotW = transformations.RotateGaussianARD(W, alpha)
        rotX = transformations.RotateGaussianARD(X)
        R = transformations.RotationOptimizer(rotW, rotX, D)
        for ind in range(maxiter):
            Q.update()
            if debug:
                R.rotate(check_bound=True, check_gradient=True)
            else:
                R.rotate()

    else:
        # Use standard VB-EM alone
        Q.update(repeat=maxiter)

    # Plot results
    if plot:
        plt.figure()
        bpplt.timeseries_normal(F, scale=2)
        bpplt.timeseries(f, color='g', linestyle='-')
        bpplt.timeseries(y, color='r', linestyle='None', marker='+')
Ejemplo n.º 2
0
def run(M=10,
        N=100,
        D_y=3,
        D=5,
        seed=42,
        rotate=False,
        maxiter=1000,
        debug=False,
        plot=True):

    if seed is not None:
        np.random.seed(seed)

    # Generate data
    w = np.random.normal(0, 1, size=(M, 1, D_y))
    x = np.random.normal(0, 1, size=(1, N, D_y))
    f = misc.sum_product(w, x, axes_to_sum=[-1])
    y = f + np.random.normal(0, 0.1, size=(M, N))

    # Construct model
    Q = model(M, N, D)

    # Data with missing values
    mask = random.mask(M, N, p=0.5)  # randomly missing
    y[~mask] = np.nan
    Q['Y'].observe(y, mask=mask)

    # Run inference algorithm
    if rotate:
        # Use rotations to speed up learning
        rotW = transformations.RotateGaussianARD(Q['W'], Q['alpha'])
        rotX = transformations.RotateGaussianARD(Q['X'])
        R = transformations.RotationOptimizer(rotW, rotX, D)
        if debug:
            Q.callback = lambda: R.rotate(check_bound=True,
                                          check_gradient=True)
        else:
            Q.callback = R.rotate

    # Use standard VB-EM alone
    Q.update(repeat=maxiter)

    # Plot results
    if plot:
        plt.figure()
        bpplt.timeseries_normal(Q['F'], scale=2)
        bpplt.timeseries(f, color='g', linestyle='-')
        bpplt.timeseries(y, color='r', linestyle='None', marker='+')
Ejemplo n.º 3
0
    def __init__(self, x1, K=None, n_iter=2000, rotate=False, **kwargs):
        import bayespy as bp
        import bayespy.inference.vmp.transformations as bpt
        super().__init__(rotated=rotate, logdir='none', K=K, **kwargs)
        self.D, self.N = x1.shape
        K = self.D if K is None else K
        self.x1 = x1

        z = bp.nodes.GaussianARD(0, 1, plates=(1, self.N), shape=(K, ))
        alpha = bp.nodes.Gamma(1e-5, 1e-5, plates=(K, ))
        w = bp.nodes.GaussianARD(0, alpha, plates=(self.D, 1), shape=(K, ))
        # not sure what form the hyper-mean should take
        # there are definitely convergene issues if mean is far from real one
        hyper_m = bp.nodes.GaussianARD(x1.mean(), 0.001)
        m = bp.nodes.GaussianARD(hyper_m, 1, shape=(self.D, 1))
        tau = bp.nodes.Gamma(1e-5, 1e-5)
        x = bp.nodes.GaussianARD(bp.nodes.Add(bp.nodes.Dot(z, w), m), tau)
        x.observe(x1, mask=~x1.mask)
        self.inference = bp.inference.VB(x, z, w, alpha, tau, m, hyper_m)

        if rotate:
            rot_z = bpt.RotateGaussianARD(z)
            rot_w = bpt.RotateGaussianARD(w, alpha)
            R = bpt.RotationOptimizer(rot_z, rot_w, K)
            self.inference.set_callback(R.rotate)

        w.initialize_from_random()
        self.inference.update(repeat=n_iter)

        def make2D(x):
            return np.atleast_2d(x).T if x.ndim == 1 else x

        self.W = make2D(w.get_moments()[0].squeeze())
        self.Z = make2D(z.get_moments()[0].squeeze())
        self.mu = m.get_moments()[0]
        self.tau = tau.get_moments()[0].item()
        self.x = self.W.dot(self.Z.T) + self.mu
        self.alpha = alpha.get_moments()[0]
        self.n_iter = self.inference.iter
        self.loss = self.inference.loglikelihood_lowerbound()
Ejemplo n.º 4
0
def infer(y,
          D,
          K,
          rotate=True,
          debug=False,
          maxiter=100,
          mask=True,
          plot_C=True,
          monitor=False,
          update_hyper=0,
          autosave=None):
    """
    Apply LSSM with switching dynamics to the given data.
    """

    (M, N) = np.shape(y)

    # Construct model
    Q = model(M=M, K=K, N=N, D=D)
    if not plot_C:
        Q['C'].set_plotter(None)

    if autosave is not None:
        Q.set_autosave(autosave, iterations=10)

    Q['Y'].observe(y, mask=mask)

    # Set up rotation speed-up
    if rotate:
        # Initial rotate the D-dimensional state space (X, A, C)
        # Do not update hyperparameters
        rotA_init = transformations.RotateGaussianARD(Q['A'])
        rotX_init = transformations.RotateSwitchingMarkovChain(
            Q['X'], Q['A'], Q['Z'], rotA_init)
        rotC_init = transformations.RotateGaussianARD(Q['C'])
        R_init = transformations.RotationOptimizer(rotX_init, rotC_init, D)
        # Rotate the D-dimensional state space (X, A, C)
        rotA = transformations.RotateGaussianARD(Q['A'], Q['alpha'])
        rotX = transformations.RotateSwitchingMarkovChain(
            Q['X'], Q['A'], Q['Z'], rotA)
        rotC = transformations.RotateGaussianARD(Q['C'], Q['gamma'])
        R = transformations.RotationOptimizer(rotX, rotC, D)
        if debug:
            rotate_kwargs = {
                'maxiter': 10,
                'check_bound': True,
                'check_gradient': True
            }
        else:
            rotate_kwargs = {'maxiter': 10}

    # Run inference
    if monitor:
        Q.plot()
    for n in range(maxiter):
        if n < update_hyper:
            Q.update('X', 'C', 'A', 'tau', 'Z', plot=monitor)
            if rotate:
                R_init.rotate(**rotate_kwargs)
        else:
            Q.update(plot=monitor)
            if rotate:
                R.rotate(**rotate_kwargs)

    return Q
Ejemplo n.º 5
0
def run(M=50, N=200, D_y=10, D=20, maxiter=100):
    seed = 45
    print('seed =', seed)
    np.random.seed(seed)

    # Generate data (covariance eigenvalues: 1,1,...,1,2^2,3^2,...,(D_y+1)^2
    (q, r) = scipy.linalg.qr(np.random.randn(M, M))
    C = np.diag(np.arange(2, 2 + D_y))
    C = np.ones(M)
    C[:D_y] += np.arange(1, 1 + D_y)
    y = C[:, np.newaxis] * np.random.randn(M, N)
    y = np.dot(q, y)

    # Construct model
    (Y, WX, W, X, tau, alpha) = pca_model(M, N, D)

    # Data with missing values
    mask = utils.random.mask(M, N, p=0.9)  # randomly missing
    mask[:, 20:40] = False  # gap missing
    y[~mask] = np.nan
    Y.observe(y, mask=mask)

    # Construct inference machine
    Q = VB(Y, W, X, tau, alpha, autosave_filename=utils.utils.tempfile())

    # Initialize nodes (from prior and randomly)
    X.initialize_from_value(X.random())
    W.initialize_from_value(W.random())

    Q.update(repeat=1)
    Q.save()

    #
    # Run inference with rotations.
    #
    rotX = transformations.RotateGaussian(X)
    rotW = transformations.RotateGaussianARD(W, alpha)
    R = transformations.RotationOptimizer(rotX, rotW, D)

    for ind in range(maxiter):
        Q.update()
        R.rotate(check_gradient=False,
                 maxiter=10,
                 verbose=False,
                 check_bound=Q.compute_lowerbound,
                 check_bound_terms=Q.compute_lowerbound_terms)

    L_rot = Q.L

    #
    # Re-run inference without rotations.
    #
    Q.load()
    Q.update(repeat=maxiter)
    L_norot = Q.L

    #
    # Plot comparison
    #
    plt.plot(L_rot)
    plt.plot(L_norot)
    plt.legend(['With rotations', 'Without rotations'], loc='lower right')
    plt.show()
Ejemplo n.º 6
0
def latent_factor_model(y, hyperparameters, A=None):
    num_obs, num_nodes = y.shape
    num_factors = hyperparameters['num_factors']
    # Define the latent factors
    _x = bp.nodes.GaussianARD(0,
                              1,
                              plates=(num_obs, 1),
                              shape=(num_factors, ),
                              name='x')
    ard_prior = hyperparameters['ard_prior']
    if ard_prior == 'independent':
        # Automatic relevance determination prior for the observation model
        _lambda = bp.nodes.Gamma(hyperparameters['lambda/shape'],
                                 hyperparameters['lambda/scale'],
                                 plates=(num_factors, ),
                                 name='lambda')
        # Compute the predictor and add noise to get the observation
        _A = bp.nodes.GaussianARD(0,
                                  _lambda,
                                  plates=(1, num_nodes),
                                  shape=(num_factors, ),
                                  name='A')
    elif ard_prior == 'shared':
        # Automatic relevance determination prior for the observation model
        # (but shared across the dimensions so we need to use model selection)
        _lambda = bp.nodes.Gamma(hyperparameters['lambda/shape'],
                                 hyperparameters['lambda/scale'],
                                 name='lambda')
        # Compute the predictor and add noise to get the observation
        _A = bp.nodes.GaussianARD(0,
                                  _lambda,
                                  plates=(1, num_nodes),
                                  shape=(num_factors, ),
                                  name='A')
    elif ard_prior is None:
        _A = bp.nodes.GaussianARD(0,
                                  hyperparameters['A/precision'],
                                  shape=(num_factors, ),
                                  plates=(1, num_nodes),
                                  name='A')
    else:
        raise KeyError(ard_prior)

    _predictor = bp.nodes.SumMultiply('d,d->', _x, _A, name='predictor')

    _tau = bp.nodes.Gamma(hyperparameters['tau/shape'],
                          hyperparameters['tau/scale'],
                          name='tau',
                          plates=(num_nodes, ))

    _y = bp.nodes.GaussianARD(_predictor, _tau, name='y')

    # Observe the model and initialise the observation model randomly to
    # ensure that we don't end up with a trivial "all-zero" model
    _y.observe(y, mask=np.isfinite(y))
    if A is None:
        _A.initialize_from_random()
    else:
        _A.initialize_from_value(A)

    # Add rotations to speed up the algorithm
    rotations = [
        transformations.RotateGaussianARD(_x),
        transformations.RotateGaussianARD(_A),
    ]
    optimizer = transformations.RotationOptimizer(*rotations, num_factors)

    # Construct an inference model
    variables = [_y, _x, _A, _tau]
    if ard_prior:
        variables.append(_lambda)
    Q = bp.inference.VB(*variables)
    Q.set_callback(optimizer.rotate)
    return Q
Ejemplo n.º 7
0
                name='X')
alpha = Gamma(1e-3, 1e-3,
              plates=(D,),
              name='alpha')
C = GaussianARD(0, alpha,
                shape=(D,),
                plates=(10,1),
                name='C')
F = Dot(C, X)
tau = Gamma(1e-3, 1e-3, name='tau')
Y = GaussianARD(F, tau)
c = np.random.randn(10, 2)
x = np.random.randn(2, 100)
data = np.dot(c, x) + 0.1*np.random.randn(10, 100)
Y.observe(data)
Y.observe(data, mask=[[True], [False], [False], [True], [True],
                      [False], [True], [True], [True], [False]])
from bayespy.inference import VB
Q = VB(Y, C, X, alpha, tau)
X.initialize_from_parameters(np.random.randn(1, 100, D), 10)
from bayespy.inference.vmp import transformations
rotX = transformations.RotateGaussianARD(X)
rotC = transformations.RotateGaussianARD(C, alpha)
R = transformations.RotationOptimizer(rotC, rotX, D)
Q = VB(Y, C, X, alpha, tau)
Q.callback = R.rotate
Q.update(repeat=1000, tol=1e-6)
Q.update(repeat=50, tol=np.nan)

import bayespy.plot as bpplt
bpplt.pdf(Q['tau'], np.linspace(60, 140, num=100))
Ejemplo n.º 8
0
def run(maxiter=100):

    seed = 496  #np.random.randint(1000)
    print("seed = ", seed)
    np.random.seed(seed)

    # Simulate some data
    D = 3
    M = 6
    N = 200
    c = np.random.randn(M, D)
    w = 0.3
    a = np.array([[np.cos(w), -np.sin(w), 0], [np.sin(w),
                                               np.cos(w), 0], [0, 0, 1]])
    x = np.empty((N, D))
    f = np.empty((M, N))
    y = np.empty((M, N))
    x[0] = 10 * np.random.randn(D)
    f[:, 0] = np.dot(c, x[0])
    y[:, 0] = f[:, 0] + 3 * np.random.randn(M)
    for n in range(N - 1):
        x[n + 1] = np.dot(a, x[n]) + np.random.randn(D)
        f[:, n + 1] = np.dot(c, x[n + 1])
        y[:, n + 1] = f[:, n + 1] + 3 * np.random.randn(M)

    # Create the model
    (Y, CX, X, tau, C, gamma, A, alpha) = linear_state_space_model(D, N, M)

    # Add missing values randomly
    mask = random.mask(M, N, p=0.3)
    # Add missing values to a period of time
    mask[:, 30:80] = False
    y[~mask] = np.nan  # BayesPy doesn't require this. Just for plotting.
    # Observe the data
    Y.observe(y, mask=mask)

    # Initialize nodes (must use some randomness for C)
    C.initialize_from_random()

    # Run inference
    Q = VB(Y, X, C, gamma, A, alpha, tau)

    #
    # Run inference with rotations.
    #
    rotA = transformations.RotateGaussianARD(A, alpha)
    rotX = transformations.RotateGaussianMarkovChain(X, A, rotA)
    rotC = transformations.RotateGaussianARD(C, gamma)
    R = transformations.RotationOptimizer(rotX, rotC, D)

    #maxiter = 84
    for ind in range(maxiter):
        Q.update()
        #print('C term', C.lower_bound_contribution())
        R.rotate(
            maxiter=10,
            check_gradient=True,
            verbose=False,
            check_bound=Q.compute_lowerbound,
            #check_bound=None,
            check_bound_terms=Q.compute_lowerbound_terms)
        #check_bound_terms=None)

    X_vb = X.u[0]
    varX_vb = utils.diagonal(X.u[1] - X_vb[..., np.newaxis, :] *
                             X_vb[..., :, np.newaxis])

    u_CX = CX.get_moments()
    CX_vb = u_CX[0]
    varCX_vb = u_CX[1] - CX_vb**2

    # Show results
    plt.figure(3)
    plt.clf()
    for m in range(M):
        plt.subplot(M, 1, m + 1)
        plt.plot(y[m, :], 'r.')
        plt.plot(f[m, :], 'b-')
        bpplt.errorplot(y=CX_vb[m, :], error=2 * np.sqrt(varCX_vb[m, :]))

    plt.figure()
    Q.plot_iteration_by_nodes()
Ejemplo n.º 9
0
def infer(y,
          D,
          K,
          mask=True,
          maxiter=100,
          rotate=False,
          debug=False,
          precompute=False,
          update_hyper=0,
          start_rotating=0,
          start_rotating_weights=0,
          plot_C=True,
          monitor=True,
          autosave=None):
    """
    Run VB inference for linear state-space model with time-varying dynamics.
    """

    y = misc.atleast_nd(y, 2)
    (M, N) = np.shape(y)

    # Construct the model
    Q = model(M, N, D, K)
    if not plot_C:
        Q['C'].set_plotter(None)

    if autosave is not None:
        Q.set_autosave(autosave, iterations=10)

    # Observe data
    Q['Y'].observe(y, mask=mask)

    # Set up rotation speed-up
    if rotate:

        # Initial rotate the D-dimensional state space (X, A, C)
        # Does not update hyperparameters
        rotA_init = transformations.RotateGaussianARD(Q['A'],
                                                      axis=0,
                                                      precompute=precompute)
        rotX_init = transformations.RotateVaryingMarkovChain(
            Q['X'], Q['A'], Q['S']._convert(GaussianMoments)[..., 1:, None],
            rotA_init)
        rotC_init = transformations.RotateGaussianARD(Q['C'],
                                                      axis=0,
                                                      precompute=precompute)
        R_X_init = transformations.RotationOptimizer(rotX_init, rotC_init, D)

        # Rotate the D-dimensional state space (X, A, C)
        rotA = transformations.RotateGaussianARD(Q['A'],
                                                 Q['alpha'],
                                                 axis=0,
                                                 precompute=precompute)
        rotX = transformations.RotateVaryingMarkovChain(
            Q['X'], Q['A'], Q['S']._convert(GaussianMoments)[..., 1:, None],
            rotA)
        rotC = transformations.RotateGaussianARD(Q['C'],
                                                 Q['gamma'],
                                                 axis=0,
                                                 precompute=precompute)
        R_X = transformations.RotationOptimizer(rotX, rotC, D)

        # Rotate the K-dimensional latent dynamics space (S, A, C)
        rotB = transformations.RotateGaussianARD(Q['B'],
                                                 Q['beta'],
                                                 precompute=precompute)
        rotS = transformations.RotateGaussianMarkovChain(Q['S'], rotB)
        rotA = transformations.RotateGaussianARD(Q['A'],
                                                 Q['alpha'],
                                                 axis=-1,
                                                 precompute=precompute)
        R_S = transformations.RotationOptimizer(rotS, rotA, K)

        if debug:
            rotate_kwargs = {
                'maxiter': 10,
                'check_bound': True,
                'check_gradient': True
            }
        else:
            rotate_kwargs = {'maxiter': 10}

    # Plot initial distributions
    if monitor:
        Q.plot()

    # Run inference using rotations
    for ind in range(maxiter):

        if ind < update_hyper:
            # It might be a good idea to learn the lower level nodes a bit
            # before starting to learn the upper level nodes.
            Q.update('X', 'C', 'A', 'tau', plot=monitor)
            if rotate and ind >= start_rotating:
                # Use the rotation which does not update alpha nor beta
                R_X_init.rotate(**rotate_kwargs)
        else:
            Q.update(plot=monitor)
            if rotate and ind >= start_rotating:
                # It might be a good idea to not rotate immediately because it
                # might lead to pruning out components too efficiently before
                # even estimating them roughly
                R_X.rotate(**rotate_kwargs)
                if ind >= start_rotating_weights:
                    R_S.rotate(**rotate_kwargs)

    # Return the posterior approximation
    return Q
Ejemplo n.º 10
0
def run_dlssm(y, f, mask, D, K, maxiter):
    """
    Run VB inference for linear state space model with drifting dynamics.
    """

    (M, N) = np.shape(y)

    # Dynamics matrix with ARD
    # alpha : (D) x ()
    alpha = Gamma(1e-5, 1e-5, plates=(K, ), name='alpha')
    # A : (K) x (K)
    A = Gaussian(
        np.zeros(K),
        #np.identity(K),
        diagonal(alpha),
        plates=(K, ),
        name='A_S')
    A.initialize_from_value(np.identity(K))

    # rho
    ## rho = Gamma(1e-5,
    ##             1e-5,
    ##             plates=(K,),
    ##             name="rho")

    # S : () x (N-1,K)
    S = GaussianMarkovChain(np.ones(K),
                            1e-6 * np.identity(K),
                            A,
                            np.ones(K),
                            n=N - 1,
                            name='S')
    S.initialize_from_value(1 * np.ones((N - 1, K)))

    # Projection matrix of the dynamics matrix
    # beta : (K) x ()
    beta = Gamma(1e-5, 1e-5, plates=(K, ), name='beta')
    # B : (D) x (D*K)
    B = Gaussian(np.zeros(D * K),
                 diagonal(tile(beta, D)),
                 plates=(D, ),
                 name='B')
    b = np.zeros((D, D, K))
    b[np.arange(D), np.arange(D), np.zeros(D, dtype=int)] = 1
    B.initialize_from_value(np.reshape(1 * b, (D, D * K)))

    # A : (N-1,D) x (D)
    BS = MatrixDot(B, S.as_gaussian().add_plate_axis(-1), name='BS')

    # Latent states with dynamics
    # X : () x (N,D)
    X = GaussianMarkovChain(
        np.zeros(D),  # mean of x0
        1e-3 * np.identity(D),  # prec of x0
        BS,  # dynamics
        np.ones(D),  # innovation
        n=N,  # time instances
        name='X',
        initialize=False)
    X.initialize_from_value(np.random.randn(N, D))

    # Mixing matrix from latent space to observation space using ARD
    # gamma : (D) x ()
    gamma = Gamma(1e-5, 1e-5, plates=(D, ), name='gamma')
    # C : (M,1) x (D)
    C = Gaussian(np.zeros(D), diagonal(gamma), plates=(M, 1), name='C')
    C.initialize_from_value(np.random.randn(M, 1, D))

    # Observation noise
    # tau : () x ()
    tau = Gamma(1e-5, 1e-5, name='tau')

    # Observations
    # Y : (M,N) x ()
    CX = Dot(C, X.as_gaussian())
    Y = Normal(CX, tau, name='Y')

    #
    # RUN INFERENCE
    #

    # Observe data
    Y.observe(y, mask=mask)
    # Construct inference machine
    Q = VB(Y, X, S, A, alpha, B, beta, C, gamma, tau)

    #
    # Run inference with rotations.
    #

    # Rotate the D-dimensional state space (C, X)
    rotB = transformations.RotateGaussianMatrixARD(B, beta, axis='rows')
    rotX = transformations.RotateDriftingMarkovChain(X, B, S, rotB)
    rotC = transformations.RotateGaussianARD(C, gamma)
    R_X = transformations.RotationOptimizer(rotX, rotC, D)

    # Rotate the K-dimensional latent dynamics space (B, S)
    rotA = transformations.RotateGaussianARD(A, alpha)
    rotS = transformations.RotateGaussianMarkovChain(S, A, rotA)
    rotB = transformations.RotateGaussianMatrixARD(B, beta, axis='cols')
    R_S = transformations.RotationOptimizer(rotS, rotB, K)

    # Iterate
    for ind in range(int(maxiter / 5)):
        Q.update(repeat=5)
        #Q.update(X, S, A, alpha, rho, B, beta, C, gamma, tau, repeat=maxiter)
        R_X.rotate()
        R_S.rotate()
        ## R_X.rotate(
        ## check_bound=Q.compute_lowerbound,
        ## check_bound_terms=Q.compute_lowerbound_terms,
        ## check_gradient=True
        ##     )
        ## R_S.rotate(
        ## check_bound=Q.compute_lowerbound,
        ## check_bound_terms=Q.compute_lowerbound_terms,
        ## check_gradient=True
        ##     )

    #
    # SHOW RESULTS
    #

    # Mean and standard deviation of the posterior
    (f_mean, f_squared) = CX.get_moments()
    f_std = np.sqrt(f_squared - f_mean**2)

    # Plot observations space
    for m in range(M):
        plt.subplot(M, 1, m + 1)
        plt.plot(y[m, :], 'r.')
        plt.plot(f[m, :], 'b-')
        bpplt.errorplot(y=f_mean[m, :], error=2 * f_std[m, :])