def run(M=40, N=100, D_y=6, D=8, seed=42, rotate=False, maxiter=1000, debug=False, plot=True): """ Run pattern search demo for PCA. """ if seed is not None: np.random.seed(seed) # Generate data w = np.random.normal(0, 1, size=(M,1,D_y)) x = np.random.normal(0, 1, size=(1,N,D_y)) f = misc.sum_product(w, x, axes_to_sum=[-1]) y = f + np.random.normal(0, 0.2, size=(M,N)) # Construct model Q = VB(*(pca.model(M, N, D))) # Data with missing values mask = random.mask(M, N, p=0.5) # randomly missing y[~mask] = np.nan Q['Y'].observe(y, mask=mask) # Initialize some nodes randomly Q['X'].initialize_from_random() Q['W'].initialize_from_random() # Use a few VB-EM updates at the beginning Q.update(repeat=10) Q.save() # Standard VB-EM as a baseline Q.update(repeat=maxiter) if plot: bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-') # Restore initial state Q.load() # Pattern search method for comparison for n in range(maxiter): Q.pattern_search('W', 'tau', maxiter=3, collapsed=['X', 'alpha']) Q.update(repeat=20) if Q.has_converged(): break if plot: bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:') bpplt.pyplot.xlabel('CPU time (in seconds)') bpplt.pyplot.ylabel('VB lower bound') bpplt.pyplot.legend(['VB-EM', 'Pattern search'], loc='lower right')
def pca(): np.random.seed(41) M = 10 N = 3000 D = 5 # Construct the PCA model alpha = Gamma(1e-3, 1e-3, plates=(D, ), name='alpha') W = GaussianARD(0, alpha, plates=(M, 1), shape=(D, ), name='W') X = GaussianARD(0, 1, plates=(1, N), shape=(D, ), name='X') tau = Gamma(1e-3, 1e-3, name='tau') W.initialize_from_random() F = SumMultiply('d,d->', W, X) Y = GaussianARD(F, tau, name='Y') # Observe data data = np.sum(np.random.randn(M, 1, D - 1) * np.random.randn(1, N, D - 1), axis=-1) + 1e-1 * np.random.randn(M, N) Y.observe(data) # Initialize VB engine Q = VB(Y, X, W, alpha, tau) # Take one update step (so phi is ok) Q.update(repeat=1) Q.save() # Run VB-EM Q.update(repeat=200) bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-') # Restore the state Q.load() # Run Riemannian conjugate gradient #Q.optimize(X, alpha, maxiter=100, collapsed=[W, tau]) Q.optimize(W, tau, maxiter=100, collapsed=[X, alpha]) bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:') bpplt.pyplot.show()
def pca(): np.random.seed(41) M = 10 N = 3000 D = 5 # Construct the PCA model alpha = Gamma(1e-3, 1e-3, plates=(D,), name='alpha') W = GaussianARD(0, alpha, plates=(M,1), shape=(D,), name='W') X = GaussianARD(0, 1, plates=(1,N), shape=(D,), name='X') tau = Gamma(1e-3, 1e-3, name='tau') W.initialize_from_random() F = SumMultiply('d,d->', W, X) Y = GaussianARD(F, tau, name='Y') # Observe data data = np.sum(np.random.randn(M,1,D-1) * np.random.randn(1,N,D-1), axis=-1) + 1e-1 * np.random.randn(M,N) Y.observe(data) # Initialize VB engine Q = VB(Y, X, W, alpha, tau) # Take one update step (so phi is ok) Q.update(repeat=1) Q.save() # Run VB-EM Q.update(repeat=200) bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-') # Restore the state Q.load() # Run Riemannian conjugate gradient #Q.optimize(X, alpha, maxiter=100, collapsed=[W, tau]) Q.optimize(W, tau, maxiter=100, collapsed=[X, alpha]) bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:') bpplt.pyplot.show()
def run(M=50, N=200, D_y=10, D=20, maxiter=100): seed = 45 print('seed =', seed) np.random.seed(seed) # Generate data (covariance eigenvalues: 1,1,...,1,2^2,3^2,...,(D_y+1)^2 (q, r) = scipy.linalg.qr(np.random.randn(M, M)) C = np.diag(np.arange(2, 2 + D_y)) C = np.ones(M) C[:D_y] += np.arange(1, 1 + D_y) y = C[:, np.newaxis] * np.random.randn(M, N) y = np.dot(q, y) # Construct model (Y, WX, W, X, tau, alpha) = pca_model(M, N, D) # Data with missing values mask = utils.random.mask(M, N, p=0.9) # randomly missing mask[:, 20:40] = False # gap missing y[~mask] = np.nan Y.observe(y, mask=mask) # Construct inference machine Q = VB(Y, W, X, tau, alpha, autosave_filename=utils.utils.tempfile()) # Initialize nodes (from prior and randomly) X.initialize_from_value(X.random()) W.initialize_from_value(W.random()) Q.update(repeat=1) Q.save() # # Run inference with rotations. # rotX = transformations.RotateGaussian(X) rotW = transformations.RotateGaussianARD(W, alpha) R = transformations.RotationOptimizer(rotX, rotW, D) for ind in range(maxiter): Q.update() R.rotate(check_gradient=False, maxiter=10, verbose=False, check_bound=Q.compute_lowerbound, check_bound_terms=Q.compute_lowerbound_terms) L_rot = Q.L # # Re-run inference without rotations. # Q.load() Q.update(repeat=maxiter) L_norot = Q.L # # Plot comparison # plt.plot(L_rot) plt.plot(L_norot) plt.legend(['With rotations', 'Without rotations'], loc='lower right') plt.show()
def run(M=10, N=100, D_y=3, D=5): seed = 45 print('seed =', seed) np.random.seed(seed) # Check HDF5 version. if h5py.version.hdf5_version_tuple < (1, 8, 7): print( "WARNING! Your HDF5 version is %s. HDF5 versions <1.8.7 are not " "able to save empty arrays, thus you may experience problems if " "you for instance try to save before running any iteration steps." % str(h5py.version.hdf5_version_tuple)) # Generate data w = np.random.normal(0, 1, size=(M, 1, D_y)) x = np.random.normal(0, 1, size=(1, N, D_y)) f = misc.sum_product(w, x, axes_to_sum=[-1]) y = f + np.random.normal(0, 0.5, size=(M, N)) # Construct model (Y, WX, W, X, tau, alpha) = pca_model(M, N, D) # Data with missing values mask = random.mask(M, N, p=0.9) # randomly missing mask[:, 20:40] = False # gap missing y[~mask] = np.nan Y.observe(y, mask=mask) # Construct inference machine Q = VB(Y, W, X, tau, alpha, autosave_iterations=5) # Initialize some nodes randomly X.initialize_from_value(X.random()) W.initialize_from_value(W.random()) # Save the state into a HDF5 file filename = tempfile.NamedTemporaryFile(suffix='hdf5').name Q.update(X, W, alpha, tau, repeat=1) Q.save(filename=filename) # Inference loop. Q.update(X, W, alpha, tau, repeat=10) # Reload the state from the HDF5 file Q.load(filename=filename) # Inference loop again. Q.update(X, W, alpha, tau, repeat=10) # NOTE: Saving and loading requires that you have the model # constructed. "Save" does not store the model structure nor does "load" # read it. They are just used for reading and writing the contents of the # nodes. Thus, if you want to load, you first need to construct the same # model that was used for saving and then use load to set the states of the # nodes. plt.clf() WX_params = WX.get_parameters() fh = WX_params[0] * np.ones(y.shape) err_fh = 2 * np.sqrt(WX_params[1] + 1 / tau.get_moments()[0]) * np.ones( y.shape) for m in range(M): plt.subplot(M, 1, m + 1) #errorplot(y, error=None, x=None, lower=None, upper=None): bpplt.errorplot(fh[m], x=np.arange(N), error=err_fh[m]) plt.plot(np.arange(N), f[m], 'g') plt.plot(np.arange(N), y[m], 'r+') plt.figure() Q.plot_iteration_by_nodes() plt.figure() plt.subplot(2, 2, 1) bpplt.binary_matrix(W.mask) plt.subplot(2, 2, 2) bpplt.binary_matrix(X.mask) plt.subplot(2, 2, 3) #bpplt.binary_matrix(WX.get_mask()) plt.subplot(2, 2, 4) bpplt.binary_matrix(Y.mask)
def run(N=100000, N_batch=50, seed=42, maxiter=100, plot=True): """ Run deterministic annealing demo for 1-D Gaussian mixture. """ if seed is not None: np.random.seed(seed) # Number of clusters in the model K = 20 # Dimensionality of the data D = 5 # Generate data K_true = 10 spread = 5 means = spread * np.random.randn(K_true, D) z = random.categorical(np.ones(K_true), size=N) data = np.empty((N, D)) for n in range(N): data[n] = means[z[n]] + np.random.randn(D) # # Standard VB-EM algorithm # # Full model mu = Gaussian(np.zeros(D), np.identity(D), plates=(K, ), name='means') alpha = Dirichlet(np.ones(K), name='class probabilities') Z = Categorical(alpha, plates=(N, ), name='classes') Y = Mixture(Z, Gaussian, mu, np.identity(D), name='observations') # Break symmetry with random initialization of the means mu.initialize_from_random() # Put the data in Y.observe(data) # Run inference Q = VB(Y, Z, mu, alpha) Q.save(mu) Q.update(repeat=maxiter) if plot: bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-') max_cputime = np.sum(Q.cputime[~np.isnan(Q.cputime)]) # # Stochastic variational inference # # Construct smaller model (size of the mini-batch) mu = Gaussian(np.zeros(D), np.identity(D), plates=(K, ), name='means') alpha = Dirichlet(np.ones(K), name='class probabilities') Z = Categorical(alpha, plates=(N_batch, ), plates_multiplier=(N / N_batch, ), name='classes') Y = Mixture(Z, Gaussian, mu, np.identity(D), name='observations') # Break symmetry with random initialization of the means mu.initialize_from_random() # Inference engine Q = VB(Y, Z, mu, alpha, autosave_filename=Q.autosave_filename) Q.load(mu) # Because using mini-batches, messages need to be multiplied appropriately print("Stochastic variational inference...") Q.ignore_bound_checks = True maxiter *= int(N / N_batch) delay = 1 forgetting_rate = 0.7 for n in range(maxiter): # Observe a mini-batch subset = np.random.choice(N, N_batch) Y.observe(data[subset, :]) # Learn intermediate variables Q.update(Z) # Set step length step = (n + delay)**(-forgetting_rate) # Stochastic gradient for the global variables Q.gradient_step(mu, alpha, scale=step) if np.sum(Q.cputime[:n]) > max_cputime: break if plot: bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:') bpplt.pyplot.xlabel('CPU time (in seconds)') bpplt.pyplot.ylabel('VB lower bound') bpplt.pyplot.legend(['VB-EM', 'Stochastic inference'], loc='lower right') bpplt.pyplot.title('VB for Gaussian mixture model') return
def run(M=50, N=200, D_y=10, D=20, maxiter=100): seed = 45 print("seed =", seed) np.random.seed(seed) # Generate data (covariance eigenvalues: 1,1,...,1,2^2,3^2,...,(D_y+1)^2 (q, r) = scipy.linalg.qr(np.random.randn(M, M)) C = np.diag(np.arange(2, 2 + D_y)) C = np.ones(M) C[:D_y] += np.arange(1, 1 + D_y) y = C[:, np.newaxis] * np.random.randn(M, N) y = np.dot(q, y) # Construct model (Y, WX, W, X, tau, alpha) = pca_model(M, N, D) # Data with missing values mask = utils.random.mask(M, N, p=0.9) # randomly missing mask[:, 20:40] = False # gap missing y[~mask] = np.nan Y.observe(y, mask=mask) # Construct inference machine Q = VB(Y, W, X, tau, alpha, autosave_filename=utils.utils.tempfile()) # Initialize nodes (from prior and randomly) X.initialize_from_value(X.random()) W.initialize_from_value(W.random()) Q.update(repeat=1) Q.save() # # Run inference with rotations. # rotX = transformations.RotateGaussian(X) rotW = transformations.RotateGaussianARD(W, alpha) R = transformations.RotationOptimizer(rotX, rotW, D) for ind in range(maxiter): Q.update() R.rotate( check_gradient=False, maxiter=10, verbose=False, check_bound=Q.compute_lowerbound, check_bound_terms=Q.compute_lowerbound_terms, ) L_rot = Q.L # # Re-run inference without rotations. # Q.load() Q.update(repeat=maxiter) L_norot = Q.L # # Plot comparison # plt.plot(L_rot) plt.plot(L_norot) plt.legend(["With rotations", "Without rotations"], loc="lower right") plt.show()
def run(N=100000, N_batch=50, seed=42, maxiter=100, plot=True): """ Run deterministic annealing demo for 1-D Gaussian mixture. """ if seed is not None: np.random.seed(seed) # Number of clusters in the model K = 20 # Dimensionality of the data D = 5 # Generate data K_true = 10 spread = 5 means = spread * np.random.randn(K_true, D) z = random.categorical(np.ones(K_true), size=N) data = np.empty((N,D)) for n in range(N): data[n] = means[z[n]] + np.random.randn(D) # # Standard VB-EM algorithm # # Full model mu = Gaussian(np.zeros(D), np.identity(D), plates=(K,), name='means') alpha = Dirichlet(np.ones(K), name='class probabilities') Z = Categorical(alpha, plates=(N,), name='classes') Y = Mixture(Z, Gaussian, mu, np.identity(D), name='observations') # Break symmetry with random initialization of the means mu.initialize_from_random() # Put the data in Y.observe(data) # Run inference Q = VB(Y, Z, mu, alpha) Q.save(mu) Q.update(repeat=maxiter) if plot: bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-') max_cputime = np.sum(Q.cputime[~np.isnan(Q.cputime)]) # # Stochastic variational inference # # Construct smaller model (size of the mini-batch) mu = Gaussian(np.zeros(D), np.identity(D), plates=(K,), name='means') alpha = Dirichlet(np.ones(K), name='class probabilities') Z = Categorical(alpha, plates=(N_batch,), plates_multiplier=(N/N_batch,), name='classes') Y = Mixture(Z, Gaussian, mu, np.identity(D), name='observations') # Break symmetry with random initialization of the means mu.initialize_from_random() # Inference engine Q = VB(Y, Z, mu, alpha, autosave_filename=Q.autosave_filename) Q.load(mu) # Because using mini-batches, messages need to be multiplied appropriately print("Stochastic variational inference...") Q.ignore_bound_checks = True maxiter *= int(N/N_batch) delay = 1 forgetting_rate = 0.7 for n in range(maxiter): # Observe a mini-batch subset = np.random.choice(N, N_batch) Y.observe(data[subset,:]) # Learn intermediate variables Q.update(Z) # Set step length step = (n + delay) ** (-forgetting_rate) # Stochastic gradient for the global variables Q.gradient_step(mu, alpha, scale=step) if np.sum(Q.cputime[:n]) > max_cputime: break if plot: bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:') bpplt.pyplot.xlabel('CPU time (in seconds)') bpplt.pyplot.ylabel('VB lower bound') bpplt.pyplot.legend(['VB-EM', 'Stochastic inference'], loc='lower right') bpplt.pyplot.title('VB for Gaussian mixture model') return
def run(N=500, seed=42, maxiter=100, plot=True): """ Run deterministic annealing demo for 1-D Gaussian mixture. """ if seed is not None: np.random.seed(seed) mu = GaussianARD(0, 1, plates=(2,), name='means') Z = Categorical([0.3, 0.7], plates=(N,), name='classes') Y = Mixture(Z, GaussianARD, mu, 1, name='observations') # Generate data z = Z.random() data = np.empty(N) for n in range(N): data[n] = [4, -4][z[n]] Y.observe(data) # Initialize means closer to the inferior local optimum in which the # cluster means are swapped mu.initialize_from_value([0, 6]) Q = VB(Y, Z, mu) Q.save() # # Standard VB-EM algorithm # Q.update(repeat=maxiter) mu_vbem = mu.u[0].copy() L_vbem = Q.compute_lowerbound() # # VB-EM with deterministic annealing # Q.load() beta = 0.01 while beta < 1.0: beta = min(beta*1.2, 1.0) print("Set annealing to %.2f" % beta) Q.set_annealing(beta) Q.update(repeat=maxiter, tol=1e-4) mu_anneal = mu.u[0].copy() L_anneal = Q.compute_lowerbound() print("==============================") print("RESULTS FOR VB-EM vs ANNEALING") print("Fixed component probabilities:", np.array([0.3, 0.7])) print("True component means:", np.array([4, -4])) print("VB-EM component means:", mu_vbem) print("VB-EM lower bound:", L_vbem) print("Annealed VB-EM component means:", mu_anneal) print("Annealed VB-EM lower bound:", L_anneal) return
def run(M=40, N=100, D_y=6, D=8, seed=42, rotate=False, maxiter=1000, debug=False, plot=True): """ Run pattern search demo for PCA. """ if seed is not None: np.random.seed(seed) # Generate data w = np.random.normal(0, 1, size=(M, 1, D_y)) x = np.random.normal(0, 1, size=(1, N, D_y)) f = misc.sum_product(w, x, axes_to_sum=[-1]) y = f + np.random.normal(0, 0.2, size=(M, N)) # Construct model Q = VB(*(pca.model(M, N, D))) # Data with missing values mask = random.mask(M, N, p=0.5) # randomly missing y[~mask] = np.nan Q['Y'].observe(y, mask=mask) # Initialize some nodes randomly Q['X'].initialize_from_random() Q['W'].initialize_from_random() # Use a few VB-EM updates at the beginning Q.update(repeat=10) Q.save() # Standard VB-EM as a baseline Q.update(repeat=maxiter) if plot: bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-') # Restore initial state Q.load() # Pattern search method for comparison for n in range(maxiter): Q.pattern_search('W', 'tau', maxiter=3, collapsed=['X', 'alpha']) Q.update(repeat=20) if Q.has_converged(): break if plot: bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:') bpplt.pyplot.xlabel('CPU time (in seconds)') bpplt.pyplot.ylabel('VB lower bound') bpplt.pyplot.legend(['VB-EM', 'Pattern search'], loc='lower right')
def run(M=10, N=100, D_y=3, D=5): seed = 45 print('seed =', seed) np.random.seed(seed) # Check HDF5 version. if h5py.version.hdf5_version_tuple < (1,8,7): print("WARNING! Your HDF5 version is %s. HDF5 versions <1.8.7 are not " "able to save empty arrays, thus you may experience problems if " "you for instance try to save before running any iteration steps." % str(h5py.version.hdf5_version_tuple)) # Generate data w = np.random.normal(0, 1, size=(M,1,D_y)) x = np.random.normal(0, 1, size=(1,N,D_y)) f = misc.sum_product(w, x, axes_to_sum=[-1]) y = f + np.random.normal(0, 0.5, size=(M,N)) # Construct model (Y, WX, W, X, tau, alpha) = pca_model(M, N, D) # Data with missing values mask = random.mask(M, N, p=0.9) # randomly missing mask[:,20:40] = False # gap missing y[~mask] = np.nan Y.observe(y, mask=mask) # Construct inference machine Q = VB(Y, W, X, tau, alpha, autosave_iterations=5) # Initialize some nodes randomly X.initialize_from_value(X.random()) W.initialize_from_value(W.random()) # Save the state into a HDF5 file filename = tempfile.NamedTemporaryFile(suffix='hdf5').name Q.update(X, W, alpha, tau, repeat=1) Q.save(filename=filename) # Inference loop. Q.update(X, W, alpha, tau, repeat=10) # Reload the state from the HDF5 file Q.load(filename=filename) # Inference loop again. Q.update(X, W, alpha, tau, repeat=10) # NOTE: Saving and loading requires that you have the model # constructed. "Save" does not store the model structure nor does "load" # read it. They are just used for reading and writing the contents of the # nodes. Thus, if you want to load, you first need to construct the same # model that was used for saving and then use load to set the states of the # nodes. plt.clf() WX_params = WX.get_parameters() fh = WX_params[0] * np.ones(y.shape) err_fh = 2*np.sqrt(WX_params[1] + 1/tau.get_moments()[0]) * np.ones(y.shape) for m in range(M): plt.subplot(M,1,m+1) #errorplot(y, error=None, x=None, lower=None, upper=None): bpplt.errorplot(fh[m], x=np.arange(N), error=err_fh[m]) plt.plot(np.arange(N), f[m], 'g') plt.plot(np.arange(N), y[m], 'r+') plt.figure() Q.plot_iteration_by_nodes() plt.figure() plt.subplot(2,2,1) bpplt.binary_matrix(W.mask) plt.subplot(2,2,2) bpplt.binary_matrix(X.mask) plt.subplot(2,2,3) #bpplt.binary_matrix(WX.get_mask()) plt.subplot(2,2,4) bpplt.binary_matrix(Y.mask)