Beispiel #1
0
def run(M=40, N=100, D_y=6, D=8, seed=42, rotate=False, maxiter=1000, debug=False, plot=True):
    """
    Run pattern search demo for PCA.
    """

    if seed is not None:
        np.random.seed(seed)
    
    # Generate data
    w = np.random.normal(0, 1, size=(M,1,D_y))
    x = np.random.normal(0, 1, size=(1,N,D_y))
    f = misc.sum_product(w, x, axes_to_sum=[-1])
    y = f + np.random.normal(0, 0.2, size=(M,N))

    # Construct model
    Q = VB(*(pca.model(M, N, D)))

    # Data with missing values
    mask = random.mask(M, N, p=0.5) # randomly missing
    y[~mask] = np.nan
    Q['Y'].observe(y, mask=mask)

    # Initialize some nodes randomly
    Q['X'].initialize_from_random()
    Q['W'].initialize_from_random()

    # Use a few VB-EM updates at the beginning
    Q.update(repeat=10)
    Q.save()

    # Standard VB-EM as a baseline
    Q.update(repeat=maxiter)
    if plot:
        bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-')

    # Restore initial state
    Q.load()

    # Pattern search method for comparison
    for n in range(maxiter):

        Q.pattern_search('W', 'tau', maxiter=3, collapsed=['X', 'alpha'])
        Q.update(repeat=20)

        if Q.has_converged():
            break

    if plot:
        bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:')

        bpplt.pyplot.xlabel('CPU time (in seconds)')
        bpplt.pyplot.ylabel('VB lower bound')
        bpplt.pyplot.legend(['VB-EM', 'Pattern search'], loc='lower right')
Beispiel #2
0
def pca():

    np.random.seed(41)

    M = 10
    N = 3000
    D = 5

    # Construct the PCA model
    alpha = Gamma(1e-3, 1e-3, plates=(D, ), name='alpha')
    W = GaussianARD(0, alpha, plates=(M, 1), shape=(D, ), name='W')
    X = GaussianARD(0, 1, plates=(1, N), shape=(D, ), name='X')
    tau = Gamma(1e-3, 1e-3, name='tau')
    W.initialize_from_random()
    F = SumMultiply('d,d->', W, X)
    Y = GaussianARD(F, tau, name='Y')

    # Observe data
    data = np.sum(np.random.randn(M, 1, D - 1) * np.random.randn(1, N, D - 1),
                  axis=-1) + 1e-1 * np.random.randn(M, N)
    Y.observe(data)

    # Initialize VB engine
    Q = VB(Y, X, W, alpha, tau)

    # Take one update step (so phi is ok)
    Q.update(repeat=1)
    Q.save()

    # Run VB-EM
    Q.update(repeat=200)
    bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-')

    # Restore the state
    Q.load()

    # Run Riemannian conjugate gradient
    #Q.optimize(X, alpha, maxiter=100, collapsed=[W, tau])
    Q.optimize(W, tau, maxiter=100, collapsed=[X, alpha])
    bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:')

    bpplt.pyplot.show()
Beispiel #3
0
def pca():

    np.random.seed(41)

    M = 10
    N = 3000
    D = 5

    # Construct the PCA model
    alpha = Gamma(1e-3, 1e-3, plates=(D,), name='alpha')
    W = GaussianARD(0, alpha, plates=(M,1), shape=(D,), name='W')
    X = GaussianARD(0, 1, plates=(1,N), shape=(D,), name='X')
    tau = Gamma(1e-3, 1e-3, name='tau')
    W.initialize_from_random()
    F = SumMultiply('d,d->', W, X)
    Y = GaussianARD(F, tau, name='Y')

    # Observe data
    data = np.sum(np.random.randn(M,1,D-1) * np.random.randn(1,N,D-1), axis=-1) + 1e-1 * np.random.randn(M,N)
    Y.observe(data)

    # Initialize VB engine
    Q = VB(Y, X, W, alpha, tau)

    # Take one update step (so phi is ok)
    Q.update(repeat=1)
    Q.save()

    # Run VB-EM
    Q.update(repeat=200)
    bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-')

    # Restore the state
    Q.load()

    # Run Riemannian conjugate gradient
    #Q.optimize(X, alpha, maxiter=100, collapsed=[W, tau])
    Q.optimize(W, tau, maxiter=100, collapsed=[X, alpha])
    bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:')

    bpplt.pyplot.show()
Beispiel #4
0
def run(M=50, N=200, D_y=10, D=20, maxiter=100):
    seed = 45
    print('seed =', seed)
    np.random.seed(seed)

    # Generate data (covariance eigenvalues: 1,1,...,1,2^2,3^2,...,(D_y+1)^2
    (q, r) = scipy.linalg.qr(np.random.randn(M, M))
    C = np.diag(np.arange(2, 2 + D_y))
    C = np.ones(M)
    C[:D_y] += np.arange(1, 1 + D_y)
    y = C[:, np.newaxis] * np.random.randn(M, N)
    y = np.dot(q, y)

    # Construct model
    (Y, WX, W, X, tau, alpha) = pca_model(M, N, D)

    # Data with missing values
    mask = utils.random.mask(M, N, p=0.9)  # randomly missing
    mask[:, 20:40] = False  # gap missing
    y[~mask] = np.nan
    Y.observe(y, mask=mask)

    # Construct inference machine
    Q = VB(Y, W, X, tau, alpha, autosave_filename=utils.utils.tempfile())

    # Initialize nodes (from prior and randomly)
    X.initialize_from_value(X.random())
    W.initialize_from_value(W.random())

    Q.update(repeat=1)
    Q.save()

    #
    # Run inference with rotations.
    #
    rotX = transformations.RotateGaussian(X)
    rotW = transformations.RotateGaussianARD(W, alpha)
    R = transformations.RotationOptimizer(rotX, rotW, D)

    for ind in range(maxiter):
        Q.update()
        R.rotate(check_gradient=False,
                 maxiter=10,
                 verbose=False,
                 check_bound=Q.compute_lowerbound,
                 check_bound_terms=Q.compute_lowerbound_terms)

    L_rot = Q.L

    #
    # Re-run inference without rotations.
    #
    Q.load()
    Q.update(repeat=maxiter)
    L_norot = Q.L

    #
    # Plot comparison
    #
    plt.plot(L_rot)
    plt.plot(L_norot)
    plt.legend(['With rotations', 'Without rotations'], loc='lower right')
    plt.show()
Beispiel #5
0
def run(M=10, N=100, D_y=3, D=5):
    seed = 45
    print('seed =', seed)
    np.random.seed(seed)

    # Check HDF5 version.
    if h5py.version.hdf5_version_tuple < (1, 8, 7):
        print(
            "WARNING! Your HDF5 version is %s. HDF5 versions <1.8.7 are not "
            "able to save empty arrays, thus you may experience problems if "
            "you for instance try to save before running any iteration steps."
            % str(h5py.version.hdf5_version_tuple))

    # Generate data
    w = np.random.normal(0, 1, size=(M, 1, D_y))
    x = np.random.normal(0, 1, size=(1, N, D_y))
    f = misc.sum_product(w, x, axes_to_sum=[-1])
    y = f + np.random.normal(0, 0.5, size=(M, N))

    # Construct model
    (Y, WX, W, X, tau, alpha) = pca_model(M, N, D)

    # Data with missing values
    mask = random.mask(M, N, p=0.9)  # randomly missing
    mask[:, 20:40] = False  # gap missing
    y[~mask] = np.nan
    Y.observe(y, mask=mask)

    # Construct inference machine
    Q = VB(Y, W, X, tau, alpha, autosave_iterations=5)

    # Initialize some nodes randomly
    X.initialize_from_value(X.random())
    W.initialize_from_value(W.random())

    # Save the state into a HDF5 file
    filename = tempfile.NamedTemporaryFile(suffix='hdf5').name
    Q.update(X, W, alpha, tau, repeat=1)
    Q.save(filename=filename)

    # Inference loop.
    Q.update(X, W, alpha, tau, repeat=10)

    # Reload the state from the HDF5 file
    Q.load(filename=filename)

    # Inference loop again.
    Q.update(X, W, alpha, tau, repeat=10)

    # NOTE: Saving and loading requires that you have the model
    # constructed. "Save" does not store the model structure nor does "load"
    # read it. They are just used for reading and writing the contents of the
    # nodes. Thus, if you want to load, you first need to construct the same
    # model that was used for saving and then use load to set the states of the
    # nodes.

    plt.clf()
    WX_params = WX.get_parameters()
    fh = WX_params[0] * np.ones(y.shape)
    err_fh = 2 * np.sqrt(WX_params[1] + 1 / tau.get_moments()[0]) * np.ones(
        y.shape)
    for m in range(M):
        plt.subplot(M, 1, m + 1)
        #errorplot(y, error=None, x=None, lower=None, upper=None):
        bpplt.errorplot(fh[m], x=np.arange(N), error=err_fh[m])
        plt.plot(np.arange(N), f[m], 'g')
        plt.plot(np.arange(N), y[m], 'r+')

    plt.figure()
    Q.plot_iteration_by_nodes()

    plt.figure()
    plt.subplot(2, 2, 1)
    bpplt.binary_matrix(W.mask)
    plt.subplot(2, 2, 2)
    bpplt.binary_matrix(X.mask)
    plt.subplot(2, 2, 3)
    #bpplt.binary_matrix(WX.get_mask())
    plt.subplot(2, 2, 4)
    bpplt.binary_matrix(Y.mask)
def run(N=100000, N_batch=50, seed=42, maxiter=100, plot=True):
    """
    Run deterministic annealing demo for 1-D Gaussian mixture.
    """

    if seed is not None:
        np.random.seed(seed)

    # Number of clusters in the model
    K = 20

    # Dimensionality of the data
    D = 5

    # Generate data
    K_true = 10
    spread = 5
    means = spread * np.random.randn(K_true, D)
    z = random.categorical(np.ones(K_true), size=N)
    data = np.empty((N, D))
    for n in range(N):
        data[n] = means[z[n]] + np.random.randn(D)

    #
    # Standard VB-EM algorithm
    #

    # Full model
    mu = Gaussian(np.zeros(D), np.identity(D), plates=(K, ), name='means')
    alpha = Dirichlet(np.ones(K), name='class probabilities')
    Z = Categorical(alpha, plates=(N, ), name='classes')
    Y = Mixture(Z, Gaussian, mu, np.identity(D), name='observations')

    # Break symmetry with random initialization of the means
    mu.initialize_from_random()

    # Put the data in
    Y.observe(data)

    # Run inference
    Q = VB(Y, Z, mu, alpha)
    Q.save(mu)
    Q.update(repeat=maxiter)
    if plot:
        bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-')
    max_cputime = np.sum(Q.cputime[~np.isnan(Q.cputime)])

    #
    # Stochastic variational inference
    #

    # Construct smaller model (size of the mini-batch)
    mu = Gaussian(np.zeros(D), np.identity(D), plates=(K, ), name='means')
    alpha = Dirichlet(np.ones(K), name='class probabilities')
    Z = Categorical(alpha,
                    plates=(N_batch, ),
                    plates_multiplier=(N / N_batch, ),
                    name='classes')
    Y = Mixture(Z, Gaussian, mu, np.identity(D), name='observations')

    # Break symmetry with random initialization of the means
    mu.initialize_from_random()

    # Inference engine
    Q = VB(Y, Z, mu, alpha, autosave_filename=Q.autosave_filename)
    Q.load(mu)

    # Because using mini-batches, messages need to be multiplied appropriately
    print("Stochastic variational inference...")
    Q.ignore_bound_checks = True

    maxiter *= int(N / N_batch)
    delay = 1
    forgetting_rate = 0.7
    for n in range(maxiter):

        # Observe a mini-batch
        subset = np.random.choice(N, N_batch)
        Y.observe(data[subset, :])

        # Learn intermediate variables
        Q.update(Z)

        # Set step length
        step = (n + delay)**(-forgetting_rate)

        # Stochastic gradient for the global variables
        Q.gradient_step(mu, alpha, scale=step)

        if np.sum(Q.cputime[:n]) > max_cputime:
            break

    if plot:
        bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:')

        bpplt.pyplot.xlabel('CPU time (in seconds)')
        bpplt.pyplot.ylabel('VB lower bound')
        bpplt.pyplot.legend(['VB-EM', 'Stochastic inference'],
                            loc='lower right')
        bpplt.pyplot.title('VB for Gaussian mixture model')

    return
Beispiel #7
0
def run(M=50, N=200, D_y=10, D=20, maxiter=100):
    seed = 45
    print("seed =", seed)
    np.random.seed(seed)

    # Generate data (covariance eigenvalues: 1,1,...,1,2^2,3^2,...,(D_y+1)^2
    (q, r) = scipy.linalg.qr(np.random.randn(M, M))
    C = np.diag(np.arange(2, 2 + D_y))
    C = np.ones(M)
    C[:D_y] += np.arange(1, 1 + D_y)
    y = C[:, np.newaxis] * np.random.randn(M, N)
    y = np.dot(q, y)

    # Construct model
    (Y, WX, W, X, tau, alpha) = pca_model(M, N, D)

    # Data with missing values
    mask = utils.random.mask(M, N, p=0.9)  # randomly missing
    mask[:, 20:40] = False  # gap missing
    y[~mask] = np.nan
    Y.observe(y, mask=mask)

    # Construct inference machine
    Q = VB(Y, W, X, tau, alpha, autosave_filename=utils.utils.tempfile())

    # Initialize nodes (from prior and randomly)
    X.initialize_from_value(X.random())
    W.initialize_from_value(W.random())

    Q.update(repeat=1)
    Q.save()

    #
    # Run inference with rotations.
    #
    rotX = transformations.RotateGaussian(X)
    rotW = transformations.RotateGaussianARD(W, alpha)
    R = transformations.RotationOptimizer(rotX, rotW, D)

    for ind in range(maxiter):
        Q.update()
        R.rotate(
            check_gradient=False,
            maxiter=10,
            verbose=False,
            check_bound=Q.compute_lowerbound,
            check_bound_terms=Q.compute_lowerbound_terms,
        )

    L_rot = Q.L

    #
    # Re-run inference without rotations.
    #
    Q.load()
    Q.update(repeat=maxiter)
    L_norot = Q.L

    #
    # Plot comparison
    #
    plt.plot(L_rot)
    plt.plot(L_norot)
    plt.legend(["With rotations", "Without rotations"], loc="lower right")
    plt.show()
Beispiel #8
0
def run(N=100000, N_batch=50, seed=42, maxiter=100, plot=True):
    """
    Run deterministic annealing demo for 1-D Gaussian mixture.
    """

    if seed is not None:
        np.random.seed(seed)

    # Number of clusters in the model
    K = 20

    # Dimensionality of the data
    D = 5

    # Generate data
    K_true = 10
    spread = 5
    means = spread * np.random.randn(K_true, D)
    z = random.categorical(np.ones(K_true), size=N)
    data = np.empty((N,D))
    for n in range(N):
        data[n] = means[z[n]] + np.random.randn(D)

    #
    # Standard VB-EM algorithm
    #

    # Full model
    mu = Gaussian(np.zeros(D), np.identity(D),
                  plates=(K,),
                  name='means')
    alpha = Dirichlet(np.ones(K),
                      name='class probabilities')
    Z = Categorical(alpha,
                    plates=(N,),
                    name='classes')
    Y = Mixture(Z, Gaussian, mu, np.identity(D),
                name='observations')

    # Break symmetry with random initialization of the means
    mu.initialize_from_random()

    # Put the data in
    Y.observe(data)

    # Run inference
    Q = VB(Y, Z, mu, alpha)
    Q.save(mu)
    Q.update(repeat=maxiter)
    if plot:
        bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-')
    max_cputime = np.sum(Q.cputime[~np.isnan(Q.cputime)])


    #
    # Stochastic variational inference
    #

    # Construct smaller model (size of the mini-batch)
    mu = Gaussian(np.zeros(D), np.identity(D),
                  plates=(K,),
                  name='means')
    alpha = Dirichlet(np.ones(K),
                      name='class probabilities')
    Z = Categorical(alpha,
                    plates=(N_batch,),
                    plates_multiplier=(N/N_batch,),
                    name='classes')
    Y = Mixture(Z, Gaussian, mu, np.identity(D),
                name='observations')

    # Break symmetry with random initialization of the means
    mu.initialize_from_random()

    # Inference engine
    Q = VB(Y, Z, mu, alpha, autosave_filename=Q.autosave_filename)
    Q.load(mu)

    # Because using mini-batches, messages need to be multiplied appropriately
    print("Stochastic variational inference...")
    Q.ignore_bound_checks = True

    maxiter *= int(N/N_batch)
    delay = 1
    forgetting_rate = 0.7
    for n in range(maxiter):

        # Observe a mini-batch
        subset = np.random.choice(N, N_batch)
        Y.observe(data[subset,:])

        # Learn intermediate variables
        Q.update(Z)

        # Set step length
        step = (n + delay) ** (-forgetting_rate)

        # Stochastic gradient for the global variables
        Q.gradient_step(mu, alpha, scale=step)

        if np.sum(Q.cputime[:n]) > max_cputime:
            break
    
    if plot:
        bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:')

        bpplt.pyplot.xlabel('CPU time (in seconds)')
        bpplt.pyplot.ylabel('VB lower bound')
        bpplt.pyplot.legend(['VB-EM', 'Stochastic inference'], loc='lower right')
        bpplt.pyplot.title('VB for Gaussian mixture model')

    return
Beispiel #9
0
def run(N=500, seed=42, maxiter=100, plot=True):
    """
    Run deterministic annealing demo for 1-D Gaussian mixture.
    """

    if seed is not None:
        np.random.seed(seed)

    mu = GaussianARD(0, 1,
                     plates=(2,),
                     name='means')
    Z = Categorical([0.3, 0.7],
                    plates=(N,),
                    name='classes')
    Y = Mixture(Z, GaussianARD, mu, 1,
                name='observations')

    # Generate data
    z = Z.random()
    data = np.empty(N)
    for n in range(N):
        data[n] = [4, -4][z[n]]

    Y.observe(data)

    # Initialize means closer to the inferior local optimum in which the
    # cluster means are swapped
    mu.initialize_from_value([0, 6])

    Q = VB(Y, Z, mu)
    Q.save()

    #
    # Standard VB-EM algorithm
    #
    Q.update(repeat=maxiter)

    mu_vbem = mu.u[0].copy()
    L_vbem = Q.compute_lowerbound()

    #
    # VB-EM with deterministic annealing
    #
    Q.load()
    beta = 0.01
    while beta < 1.0:
        beta = min(beta*1.2, 1.0)
        print("Set annealing to %.2f" % beta)
        Q.set_annealing(beta)
        Q.update(repeat=maxiter, tol=1e-4)

    mu_anneal = mu.u[0].copy()
    L_anneal = Q.compute_lowerbound()

    print("==============================")
    print("RESULTS FOR VB-EM vs ANNEALING")
    print("Fixed component probabilities:", np.array([0.3, 0.7]))
    print("True component means:", np.array([4, -4]))
    print("VB-EM component means:", mu_vbem)
    print("VB-EM lower bound:", L_vbem)
    print("Annealed VB-EM component means:", mu_anneal)
    print("Annealed VB-EM lower bound:", L_anneal)
    
    return
Beispiel #10
0
def run(M=40,
        N=100,
        D_y=6,
        D=8,
        seed=42,
        rotate=False,
        maxiter=1000,
        debug=False,
        plot=True):
    """
    Run pattern search demo for PCA.
    """

    if seed is not None:
        np.random.seed(seed)

    # Generate data
    w = np.random.normal(0, 1, size=(M, 1, D_y))
    x = np.random.normal(0, 1, size=(1, N, D_y))
    f = misc.sum_product(w, x, axes_to_sum=[-1])
    y = f + np.random.normal(0, 0.2, size=(M, N))

    # Construct model
    Q = VB(*(pca.model(M, N, D)))

    # Data with missing values
    mask = random.mask(M, N, p=0.5)  # randomly missing
    y[~mask] = np.nan
    Q['Y'].observe(y, mask=mask)

    # Initialize some nodes randomly
    Q['X'].initialize_from_random()
    Q['W'].initialize_from_random()

    # Use a few VB-EM updates at the beginning
    Q.update(repeat=10)
    Q.save()

    # Standard VB-EM as a baseline
    Q.update(repeat=maxiter)
    if plot:
        bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-')

    # Restore initial state
    Q.load()

    # Pattern search method for comparison
    for n in range(maxiter):

        Q.pattern_search('W', 'tau', maxiter=3, collapsed=['X', 'alpha'])
        Q.update(repeat=20)

        if Q.has_converged():
            break

    if plot:
        bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:')

        bpplt.pyplot.xlabel('CPU time (in seconds)')
        bpplt.pyplot.ylabel('VB lower bound')
        bpplt.pyplot.legend(['VB-EM', 'Pattern search'], loc='lower right')
Beispiel #11
0
def run(M=10, N=100, D_y=3, D=5):
    seed = 45
    print('seed =', seed)
    np.random.seed(seed)

    # Check HDF5 version.
    if h5py.version.hdf5_version_tuple < (1,8,7): 
        print("WARNING! Your HDF5 version is %s. HDF5 versions <1.8.7 are not "
              "able to save empty arrays, thus you may experience problems if "
              "you for instance try to save before running any iteration steps."
              % str(h5py.version.hdf5_version_tuple))
    
    # Generate data
    w = np.random.normal(0, 1, size=(M,1,D_y))
    x = np.random.normal(0, 1, size=(1,N,D_y))
    f = misc.sum_product(w, x, axes_to_sum=[-1])
    y = f + np.random.normal(0, 0.5, size=(M,N))

    # Construct model
    (Y, WX, W, X, tau, alpha) = pca_model(M, N, D)

    # Data with missing values
    mask = random.mask(M, N, p=0.9) # randomly missing
    mask[:,20:40] = False # gap missing
    y[~mask] = np.nan
    Y.observe(y, mask=mask)

    # Construct inference machine
    Q = VB(Y, W, X, tau, alpha, autosave_iterations=5)

    # Initialize some nodes randomly
    X.initialize_from_value(X.random())
    W.initialize_from_value(W.random())

    # Save the state into a HDF5 file
    filename = tempfile.NamedTemporaryFile(suffix='hdf5').name
    Q.update(X, W, alpha, tau, repeat=1)
    Q.save(filename=filename)

    # Inference loop.
    Q.update(X, W, alpha, tau, repeat=10)

    # Reload the state from the HDF5 file
    Q.load(filename=filename)

    # Inference loop again.
    Q.update(X, W, alpha, tau, repeat=10)

    # NOTE: Saving and loading requires that you have the model
    # constructed. "Save" does not store the model structure nor does "load"
    # read it. They are just used for reading and writing the contents of the
    # nodes. Thus, if you want to load, you first need to construct the same
    # model that was used for saving and then use load to set the states of the
    # nodes.

    plt.clf()
    WX_params = WX.get_parameters()
    fh = WX_params[0] * np.ones(y.shape)
    err_fh = 2*np.sqrt(WX_params[1] + 1/tau.get_moments()[0]) * np.ones(y.shape)
    for m in range(M):
        plt.subplot(M,1,m+1)
        #errorplot(y, error=None, x=None, lower=None, upper=None):
        bpplt.errorplot(fh[m], x=np.arange(N), error=err_fh[m])
        plt.plot(np.arange(N), f[m], 'g')
        plt.plot(np.arange(N), y[m], 'r+')

    plt.figure()
    Q.plot_iteration_by_nodes()

    plt.figure()
    plt.subplot(2,2,1)
    bpplt.binary_matrix(W.mask)
    plt.subplot(2,2,2)
    bpplt.binary_matrix(X.mask)
    plt.subplot(2,2,3)
    #bpplt.binary_matrix(WX.get_mask())
    plt.subplot(2,2,4)
    bpplt.binary_matrix(Y.mask)