def run(M=10, N=100, D_y=3, D=5, seed=42, rotate=False, maxiter=100, debug=False): if seed is not None: np.random.seed(seed) # Generate data w = np.random.normal(0, 1, size=(M,1,D_y)) x = np.random.normal(0, 1, size=(1,N,D_y)) f = misc.sum_product(w, x, axes_to_sum=[-1]) y = f + np.random.normal(0, 0.2, size=(M,N)) # Construct model (Y, F, W, X, tau, alpha) = model(M, N, D) # Data with missing values mask = random.mask(M, N, p=0.5) # randomly missing y[~mask] = np.nan Y.observe(y, mask=mask) # Construct inference machine Q = VB(Y, W, X, tau, alpha) # Initialize some nodes randomly X.initialize_from_random() W.initialize_from_random() # Run inference algorithm if rotate: # Use rotations to speed up learning rotW = transformations.RotateGaussianARD(W, alpha) rotX = transformations.RotateGaussianARD(X) R = transformations.RotationOptimizer(rotW, rotX, D) for ind in range(maxiter): Q.update() if debug: R.rotate(check_bound=True, check_gradient=True) else: R.rotate() else: # Use standard VB-EM alone Q.update(repeat=maxiter) # Plot results plt.figure() bpplt.timeseries_normal(F, scale=2) bpplt.timeseries(f, color='g', linestyle='-') bpplt.timeseries(y, color='r', linestyle='None', marker='+') plt.show()
def gaussianmix_model(N, K, D): # N = number of data vectors # K = number of clusters # D = dimensionality # Construct the Gaussian mixture model # K prior weights (for components) alpha = nodes.Dirichlet(1e-3 * np.ones(K), name='alpha') # N K-dimensional cluster assignments (for data) z = nodes.Categorical(alpha, plates=(N, ), name='z') # K D-dimensional component means X = nodes.Gaussian(np.zeros(D), 0.01 * np.identity(D), plates=(K, ), name='X') # K D-dimensional component covariances Lambda = nodes.Wishart(D, 0.01 * np.identity(D), plates=(K, ), name='Lambda') # N D-dimensional observation vectors Y = nodes.Mixture(z, nodes.Gaussian, X, Lambda, plates=(N, ), name='Y') # TODO: Plates should be learned automatically if not given (it # would be the smallest shape broadcasted from the shapes of the # parents) z.initialize_from_random() return VB(Y, X, Lambda, z, alpha)
def model(M, N, D): # Construct the PCA model with ARD # ARD alpha = nodes.Gamma(1e-2, 1e-2, plates=(D, ), name='alpha') # Loadings W = nodes.GaussianARD(0, alpha, shape=(D, ), plates=(M, 1), name='W') # States X = nodes.GaussianARD(0, 1, shape=(D, ), plates=(1, N), name='X') # PCA F = nodes.SumMultiply('i,i', W, X, name='F') # Noise tau = nodes.Gamma(1e-2, 1e-2, name='tau') # Noisy observations Y = nodes.GaussianARD(F, tau, name='Y') # Initialize some nodes randomly X.initialize_from_random() W.initialize_from_random() return VB(Y, F, W, X, tau, alpha)
def model(M=10, N=100, D=3): """ Construct linear state-space model. See, for instance, the following publication: "Fast variational Bayesian linear state-space model" Luttinen (ECML 2013) """ # Dynamics matrix with ARD alpha = Gamma(1e-5, 1e-5, plates=(D, ), name='alpha') A = GaussianARD(0, alpha, shape=(D, ), plates=(D, ), plotter=bpplt.GaussianHintonPlotter(rows=0, cols=1, scale=0), name='A') A.initialize_from_value(np.identity(D)) # Latent states with dynamics X = GaussianMarkovChain( np.zeros(D), # mean of x0 1e-3 * np.identity(D), # prec of x0 A, # dynamics np.ones(D), # innovation n=N, # time instances plotter=bpplt.GaussianMarkovChainPlotter(scale=2), name='X') X.initialize_from_value(np.random.randn(N, D)) # Mixing matrix from latent space to observation space using ARD gamma = Gamma(1e-5, 1e-5, plates=(D, ), name='gamma') gamma.initialize_from_value(1e-2 * np.ones(D)) C = GaussianARD(0, gamma, shape=(D, ), plates=(M, 1), plotter=bpplt.GaussianHintonPlotter(rows=0, cols=2, scale=0), name='C') C.initialize_from_value(np.random.randn(M, 1, D)) # Observation noise tau = Gamma(1e-5, 1e-5, name='tau') tau.initialize_from_value(1e2) # Underlying noiseless function F = SumMultiply('i,i', C, X, name='F') # Noisy observations Y = GaussianARD(F, tau, name='Y') Q = VB(Y, F, C, gamma, X, A, alpha, tau, C) return Q
def run(M=10, N=100, D_y=3, D=5): seed = 45 print('seed =', seed) np.random.seed(seed) # Generate data w = np.random.normal(0, 1, size=(M,1,D_y)) x = np.random.normal(0, 1, size=(1,N,D_y)) f = utils.utils.sum_product(w, x, axes_to_sum=[-1]) y = f + np.random.normal(0, 0.5, size=(M,N)) # Construct model (Y, WX, W, X, tau, alpha) = pca_model(M, N, D) # Data with missing values mask = utils.random.mask(M, N, p=0.9) # randomly missing mask[:,20:40] = False # gap missing y[~mask] = np.nan Y.observe(y, mask=mask) # Construct inference machine Q = VB(Y, W, X, tau, alpha) # Initialize some nodes randomly X.initialize_from_value(X.random()) W.initialize_from_value(W.random()) # Inference loop. Q.update(repeat=100) # Plot results plt.clf() WX_params = WX.get_parameters() fh = WX_params[0] * np.ones(y.shape) err_fh = 2*np.sqrt(WX_params[1] + 1/tau.get_moments()[0]) * np.ones(y.shape) for m in range(M): plt.subplot(M,1,m+1) myplt.errorplot(fh[m], x=np.arange(N), error=err_fh[m]) plt.plot(np.arange(N), f[m], 'g') plt.plot(np.arange(N), y[m], 'r+') plt.show()
def run(M=30, D=5): # Generate data y = np.random.randint(D, size=(M, )) # Construct model p = nodes.Dirichlet(1 * np.ones(D), name='p') z = nodes.Categorical(p, plates=(M, ), name='z') # Observe the data with randomly missing values mask = random.mask(M, p=0.5) z.observe(y, mask=mask) # Run VB-EM Q = VB(p, z) Q.update() # Show results z.show() p.show()
def run(M=30, D=5): # Generate data y = np.random.randint(D, size=(M,)) # Construct model p = nodes.Dirichlet(1*np.ones(D), name='p') z = nodes.Categorical(p, plates=(M,), name='z') # Observe the data with randomly missing values mask = random.mask(M, p=0.5) z.observe(y, mask=mask) # Run VB-EM Q = VB(p, z) Q.update() # Show results z.show() p.show()
def mixture_model(distribution, *args, K=3, N=100): # Prior for state probabilities alpha = Dirichlet(1e-3 * np.ones(K), name='alpha') # Cluster assignments Z = Categorical(alpha, plates=(N, ), name='Z') # Observation distribution Y = Mixture(Z, distribution, *args, name='Y') Q = VB(Y, Z, alpha) return Q
def run(M=10, N=100, D_y=3, D=5, seed=42, rotate=False, maxiter=100, debug=False, plot=True): if seed is not None: np.random.seed(seed) # Generate data w = np.random.normal(0, 1, size=(M, 1, D_y)) x = np.random.normal(0, 1, size=(1, N, D_y)) f = misc.sum_product(w, x, axes_to_sum=[-1]) y = f + np.random.normal(0, 0.2, size=(M, N)) # Construct model (Y, F, W, X, tau, alpha) = model(M, N, D) # Data with missing values mask = random.mask(M, N, p=0.5) # randomly missing y[~mask] = np.nan Y.observe(y, mask=mask) # Construct inference machine Q = VB(Y, W, X, tau, alpha) # Initialize some nodes randomly X.initialize_from_random() W.initialize_from_random() # Run inference algorithm if rotate: # Use rotations to speed up learning rotW = transformations.RotateGaussianARD(W, alpha) rotX = transformations.RotateGaussianARD(X) R = transformations.RotationOptimizer(rotW, rotX, D) for ind in range(maxiter): Q.update() if debug: R.rotate(check_bound=True, check_gradient=True) else: R.rotate() else: # Use standard VB-EM alone Q.update(repeat=maxiter) # Plot results if plot: plt.figure() bpplt.timeseries_normal(F, scale=2) bpplt.timeseries(f, color='g', linestyle='-') bpplt.timeseries(y, color='r', linestyle='None', marker='+')
def hidden_markov_model(distribution, *args, K=3, N=100): # Prior for initial state probabilities alpha = Dirichlet(1e-3 * np.ones(K), name='alpha') # Prior for state transition probabilities A = Dirichlet(1e-3 * np.ones(K), plates=(K, ), name='A') # Hidden states (with unknown initial state probabilities and state # transition probabilities) Z = CategoricalMarkovChain(alpha, A, states=N, name='Z') # Emission/observation distribution Y = Mixture(Z, distribution, *args, name='Y') Q = VB(Y, Z, alpha, A) return Q
def gaussianmix_model(N, K, D, covariance='full'): # N = number of data vectors # K = number of clusters # D = dimensionality # Construct the Gaussian mixture model # K prior weights (for components) alpha = nodes.Dirichlet(1e-3*np.ones(K), name='alpha') # N K-dimensional cluster assignments (for data) z = nodes.Categorical(alpha, plates=(N,), name='z') # K D-dimensional component means X = nodes.GaussianARD(0, 1e-3, shape=(D,), plates=(K,), name='X') if covariance.lower() == 'full': # K D-dimensional component covariances Lambda = nodes.Wishart(D, 0.01*np.identity(D), plates=(K,), name='Lambda') # N D-dimensional observation vectors Y = nodes.Mixture(z, nodes.Gaussian, X, Lambda, plates=(N,), name='Y') elif covariance.lower() == 'diagonal': # Inverse variances Lambda = nodes.Gamma(1e-3, 1e-3, plates=(K, D), name='Lambda') # N D-dimensional observation vectors Y = nodes.Mixture(z, nodes.GaussianARD, X, Lambda, plates=(N,), name='Y') elif covariance.lower() == 'isotropic': # Inverse variances Lambda = nodes.Gamma(1e-3, 1e-3, plates=(K, 1), name='Lambda') # N D-dimensional observation vectors Y = nodes.Mixture(z, nodes.GaussianARD, X, Lambda, plates=(N,), name='Y') z.initialize_from_random() return VB(Y, X, Lambda, z, alpha)
def run(M=10, N=100, D=5, seed=42, maxiter=100, plot=True): """ Run deterministic annealing demo for 1-D Gaussian mixture. """ raise NotImplementedError("Black box variational inference not yet implemented, sorry") if seed is not None: np.random.seed(seed) # Generate data data = np.dot(np.random.randn(M,D), np.random.randn(D,N)) # Construct model C = GaussianARD(0, 1, shape=(2,), plates=(M,1), name='C') X = GaussianARD(0, 1, shape=(2,), plates=(1,N), name='X') F = Dot(C, X) # Some arbitrary log likelihood def logpdf(y, f): """ exp(f) / (1 + exp(f)) = 1/(1+exp(-f)) -log(1+exp(-f)) = -log(exp(0)+exp(-f)) also: 1 - exp(f) / (1 + exp(f)) = (1 + exp(f) - exp(f)) / (1 + exp(f)) = 1 / (1 + exp(f)) = -log(1+exp(f)) = -log(exp(0)+exp(f)) """ return -np.logaddexp(0, -f * np.where(y, -1, +1)) Y = LogPDF(logpdf, F, samples=10, shape=()) #Y = GaussianARD(F, 1) Y.observe(data) Q = VB(Y, C, X) Q.ignore_bound_checks = True delay = 1 forgetting_rate = 0.7 for n in range(maxiter): # Observe a mini-batch #subset = np.random.choice(N, N_batch) #Y.observe(data[subset,:]) # Learn intermediate variables #Q.update(Z) # Set step length step = (n + delay) ** (-forgetting_rate) # Stochastic gradient for the global variables Q.gradient_step(C, X, scale=step) if plot: bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:') bpplt.pyplot.xlabel('CPU time (in seconds)') bpplt.pyplot.ylabel('VB lower bound') return
def run(N=100000, N_batch=50, seed=42, maxiter=100, plot=True): """ Run deterministic annealing demo for 1-D Gaussian mixture. """ if seed is not None: np.random.seed(seed) # Number of clusters in the model K = 20 # Dimensionality of the data D = 5 # Generate data K_true = 10 spread = 5 means = spread * np.random.randn(K_true, D) z = random.categorical(np.ones(K_true), size=N) data = np.empty((N, D)) for n in range(N): data[n] = means[z[n]] + np.random.randn(D) # # Standard VB-EM algorithm # # Full model mu = Gaussian(np.zeros(D), np.identity(D), plates=(K, ), name='means') alpha = Dirichlet(np.ones(K), name='class probabilities') Z = Categorical(alpha, plates=(N, ), name='classes') Y = Mixture(Z, Gaussian, mu, np.identity(D), name='observations') # Break symmetry with random initialization of the means mu.initialize_from_random() # Put the data in Y.observe(data) # Run inference Q = VB(Y, Z, mu, alpha) Q.save(mu) Q.update(repeat=maxiter) if plot: bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-') max_cputime = np.sum(Q.cputime[~np.isnan(Q.cputime)]) # # Stochastic variational inference # # Construct smaller model (size of the mini-batch) mu = Gaussian(np.zeros(D), np.identity(D), plates=(K, ), name='means') alpha = Dirichlet(np.ones(K), name='class probabilities') Z = Categorical(alpha, plates=(N_batch, ), plates_multiplier=(N / N_batch, ), name='classes') Y = Mixture(Z, Gaussian, mu, np.identity(D), name='observations') # Break symmetry with random initialization of the means mu.initialize_from_random() # Inference engine Q = VB(Y, Z, mu, alpha, autosave_filename=Q.autosave_filename) Q.load(mu) # Because using mini-batches, messages need to be multiplied appropriately print("Stochastic variational inference...") Q.ignore_bound_checks = True maxiter *= int(N / N_batch) delay = 1 forgetting_rate = 0.7 for n in range(maxiter): # Observe a mini-batch subset = np.random.choice(N, N_batch) Y.observe(data[subset, :]) # Learn intermediate variables Q.update(Z) # Set step length step = (n + delay)**(-forgetting_rate) # Stochastic gradient for the global variables Q.gradient_step(mu, alpha, scale=step) if np.sum(Q.cputime[:n]) > max_cputime: break if plot: bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:') bpplt.pyplot.xlabel('CPU time (in seconds)') bpplt.pyplot.ylabel('VB lower bound') bpplt.pyplot.legend(['VB-EM', 'Stochastic inference'], loc='lower right') bpplt.pyplot.title('VB for Gaussian mixture model') return
def run(M=40, N=100, D_y=6, D=8, seed=42, rotate=False, maxiter=1000, debug=False, plot=True): """ Run pattern search demo for PCA. """ if seed is not None: np.random.seed(seed) # Generate data w = np.random.normal(0, 1, size=(M,1,D_y)) x = np.random.normal(0, 1, size=(1,N,D_y)) f = misc.sum_product(w, x, axes_to_sum=[-1]) y = f + np.random.normal(0, 0.2, size=(M,N)) # Construct model Q = VB(*(pca.model(M, N, D))) # Data with missing values mask = random.mask(M, N, p=0.5) # randomly missing y[~mask] = np.nan Q['Y'].observe(y, mask=mask) # Initialize some nodes randomly Q['X'].initialize_from_random() Q['W'].initialize_from_random() # Use a few VB-EM updates at the beginning Q.update(repeat=10) Q.save() # Standard VB-EM as a baseline Q.update(repeat=maxiter) if plot: bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-') # Restore initial state Q.load() # Pattern search method for comparison for n in range(maxiter): Q.pattern_search('W', 'tau', maxiter=3, collapsed=['X', 'alpha']) Q.update(repeat=20) if Q.has_converged(): break if plot: bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:') bpplt.pyplot.xlabel('CPU time (in seconds)') bpplt.pyplot.ylabel('VB lower bound') bpplt.pyplot.legend(['VB-EM', 'Pattern search'], loc='lower right')
def run(M=40, N=100, D_y=6, D=8, seed=42, rotate=False, maxiter=1000, debug=False, plot=True): """ Run pattern search demo for PCA. """ if seed is not None: np.random.seed(seed) # Generate data w = np.random.normal(0, 1, size=(M, 1, D_y)) x = np.random.normal(0, 1, size=(1, N, D_y)) f = misc.sum_product(w, x, axes_to_sum=[-1]) y = f + np.random.normal(0, 0.2, size=(M, N)) # Construct model Q = VB(*(pca.model(M, N, D))) # Data with missing values mask = random.mask(M, N, p=0.5) # randomly missing y[~mask] = np.nan Q['Y'].observe(y, mask=mask) # Initialize some nodes randomly Q['X'].initialize_from_random() Q['W'].initialize_from_random() # Use a few VB-EM updates at the beginning Q.update(repeat=10) Q.save() # Standard VB-EM as a baseline Q.update(repeat=maxiter) if plot: bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-') # Restore initial state Q.load() # Pattern search method for comparison for n in range(maxiter): Q.pattern_search('W', 'tau', maxiter=3, collapsed=['X', 'alpha']) Q.update(repeat=20) if Q.has_converged(): break if plot: bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:') bpplt.pyplot.xlabel('CPU time (in seconds)') bpplt.pyplot.ylabel('VB lower bound') bpplt.pyplot.legend(['VB-EM', 'Pattern search'], loc='lower right')
def run_dlssm(y, f, mask, D, K, maxiter): """ Run VB inference for linear state space model with drifting dynamics. """ (M, N) = np.shape(y) # Dynamics matrix with ARD # alpha : (D) x () alpha = Gamma(1e-5, 1e-5, plates=(K, ), name='alpha') # A : (K) x (K) A = Gaussian( np.zeros(K), #np.identity(K), diagonal(alpha), plates=(K, ), name='A_S') A.initialize_from_value(np.identity(K)) # rho ## rho = Gamma(1e-5, ## 1e-5, ## plates=(K,), ## name="rho") # S : () x (N-1,K) S = GaussianMarkovChain(np.ones(K), 1e-6 * np.identity(K), A, np.ones(K), n=N - 1, name='S') S.initialize_from_value(1 * np.ones((N - 1, K))) # Projection matrix of the dynamics matrix # beta : (K) x () beta = Gamma(1e-5, 1e-5, plates=(K, ), name='beta') # B : (D) x (D*K) B = Gaussian(np.zeros(D * K), diagonal(tile(beta, D)), plates=(D, ), name='B') b = np.zeros((D, D, K)) b[np.arange(D), np.arange(D), np.zeros(D, dtype=int)] = 1 B.initialize_from_value(np.reshape(1 * b, (D, D * K))) # A : (N-1,D) x (D) BS = MatrixDot(B, S.as_gaussian().add_plate_axis(-1), name='BS') # Latent states with dynamics # X : () x (N,D) X = GaussianMarkovChain( np.zeros(D), # mean of x0 1e-3 * np.identity(D), # prec of x0 BS, # dynamics np.ones(D), # innovation n=N, # time instances name='X', initialize=False) X.initialize_from_value(np.random.randn(N, D)) # Mixing matrix from latent space to observation space using ARD # gamma : (D) x () gamma = Gamma(1e-5, 1e-5, plates=(D, ), name='gamma') # C : (M,1) x (D) C = Gaussian(np.zeros(D), diagonal(gamma), plates=(M, 1), name='C') C.initialize_from_value(np.random.randn(M, 1, D)) # Observation noise # tau : () x () tau = Gamma(1e-5, 1e-5, name='tau') # Observations # Y : (M,N) x () CX = Dot(C, X.as_gaussian()) Y = Normal(CX, tau, name='Y') # # RUN INFERENCE # # Observe data Y.observe(y, mask=mask) # Construct inference machine Q = VB(Y, X, S, A, alpha, B, beta, C, gamma, tau) # # Run inference with rotations. # # Rotate the D-dimensional state space (C, X) rotB = transformations.RotateGaussianMatrixARD(B, beta, axis='rows') rotX = transformations.RotateDriftingMarkovChain(X, B, S, rotB) rotC = transformations.RotateGaussianARD(C, gamma) R_X = transformations.RotationOptimizer(rotX, rotC, D) # Rotate the K-dimensional latent dynamics space (B, S) rotA = transformations.RotateGaussianARD(A, alpha) rotS = transformations.RotateGaussianMarkovChain(S, A, rotA) rotB = transformations.RotateGaussianMatrixARD(B, beta, axis='cols') R_S = transformations.RotationOptimizer(rotS, rotB, K) # Iterate for ind in range(int(maxiter / 5)): Q.update(repeat=5) #Q.update(X, S, A, alpha, rho, B, beta, C, gamma, tau, repeat=maxiter) R_X.rotate() R_S.rotate() ## R_X.rotate( ## check_bound=Q.compute_lowerbound, ## check_bound_terms=Q.compute_lowerbound_terms, ## check_gradient=True ## ) ## R_S.rotate( ## check_bound=Q.compute_lowerbound, ## check_bound_terms=Q.compute_lowerbound_terms, ## check_gradient=True ## ) # # SHOW RESULTS # # Mean and standard deviation of the posterior (f_mean, f_squared) = CX.get_moments() f_std = np.sqrt(f_squared - f_mean**2) # Plot observations space for m in range(M): plt.subplot(M, 1, m + 1) plt.plot(y[m, :], 'r.') plt.plot(f[m, :], 'b-') bpplt.errorplot(y=f_mean[m, :], error=2 * f_std[m, :])
def run(M=50, N=200, D_y=10, D=20, maxiter=100): seed = 45 print("seed =", seed) np.random.seed(seed) # Generate data (covariance eigenvalues: 1,1,...,1,2^2,3^2,...,(D_y+1)^2 (q, r) = scipy.linalg.qr(np.random.randn(M, M)) C = np.diag(np.arange(2, 2 + D_y)) C = np.ones(M) C[:D_y] += np.arange(1, 1 + D_y) y = C[:, np.newaxis] * np.random.randn(M, N) y = np.dot(q, y) # Construct model (Y, WX, W, X, tau, alpha) = pca_model(M, N, D) # Data with missing values mask = utils.random.mask(M, N, p=0.9) # randomly missing mask[:, 20:40] = False # gap missing y[~mask] = np.nan Y.observe(y, mask=mask) # Construct inference machine Q = VB(Y, W, X, tau, alpha, autosave_filename=utils.utils.tempfile()) # Initialize nodes (from prior and randomly) X.initialize_from_value(X.random()) W.initialize_from_value(W.random()) Q.update(repeat=1) Q.save() # # Run inference with rotations. # rotX = transformations.RotateGaussian(X) rotW = transformations.RotateGaussianARD(W, alpha) R = transformations.RotationOptimizer(rotX, rotW, D) for ind in range(maxiter): Q.update() R.rotate( check_gradient=False, maxiter=10, verbose=False, check_bound=Q.compute_lowerbound, check_bound_terms=Q.compute_lowerbound_terms, ) L_rot = Q.L # # Re-run inference without rotations. # Q.load() Q.update(repeat=maxiter) L_norot = Q.L # # Plot comparison # plt.plot(L_rot) plt.plot(L_norot) plt.legend(["With rotations", "Without rotations"], loc="lower right") plt.show()
def run_lssm(y, f, mask, D, maxiter): """ Run VB inference for linear state space model. """ (M, N) = np.shape(y) # # CONSTRUCT THE MODEL # # Dynamic matrix # alpha: (D) x () alpha = Gamma(1e-5, 1e-5, plates=(D,), name='alpha') # A : (D) x (D) A = Gaussian(np.zeros(D), diagonal(alpha), plates=(D,), name='A') A.initialize_from_value(np.identity(D)) # Latent states with dynamics # X : () x (N,D) X = GaussianMarkovChain(np.zeros(D), # mean of x0 1e-3*np.identity(D), # prec of x0 A, # dynamics np.ones(D), # innovation n=N, # time instances name='X', initialize=False) X.initialize_from_value(np.random.randn(N,D)) # Mixing matrix from latent space to observation space using ARD # gamma : (D) x () gamma = Gamma(1e-5, 1e-5, plates=(D,), name='gamma') # C : (M,1) x (D) C = Gaussian(np.zeros(D), diagonal(gamma), plates=(M,1), name='C') C.initialize_from_value(np.random.randn(M,1,D)) # Observation noise # tau : () x () tau = Gamma(1e-5, 1e-5, name='tau') # Observations # Y : (M,N) x () CX = Dot(C, X.as_gaussian()) Y = Normal(CX, tau, name='Y') # Rotate the D-dimensional latent space rotA = transformations.RotateGaussianARD(A, alpha) rotX = transformations.RotateGaussianMarkovChain(X, A, rotA) rotC = transformations.RotateGaussianARD(C, gamma) R = transformations.RotationOptimizer(rotX, rotC, D) # # RUN INFERENCE # # Observe data Y.observe(y, mask=mask) # Construct inference machine Q = VB(Y, X, A, alpha, C, gamma, tau) # Iterate for ind in range(maxiter): Q.update(X, A, alpha, C, gamma, tau) R.rotate() # # SHOW RESULTS # plt.figure() bpplt.timeseries_normal(CX) bpplt.timeseries(f, 'b-') bpplt.timeseries(y, 'r.')
def pca(): np.random.seed(41) M = 10 N = 3000 D = 5 # Construct the PCA model alpha = Gamma(1e-3, 1e-3, plates=(D, ), name='alpha') W = GaussianARD(0, alpha, plates=(M, 1), shape=(D, ), name='W') X = GaussianARD(0, 1, plates=(1, N), shape=(D, ), name='X') tau = Gamma(1e-3, 1e-3, name='tau') W.initialize_from_random() F = SumMultiply('d,d->', W, X) Y = GaussianARD(F, tau, name='Y') # Observe data data = np.sum(np.random.randn(M, 1, D - 1) * np.random.randn(1, N, D - 1), axis=-1) + 1e-1 * np.random.randn(M, N) Y.observe(data) # Initialize VB engine Q = VB(Y, X, W, alpha, tau) # Take one update step (so phi is ok) Q.update(repeat=1) Q.save() # Run VB-EM Q.update(repeat=200) bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-') # Restore the state Q.load() # Run Riemannian conjugate gradient #Q.optimize(X, alpha, maxiter=100, collapsed=[W, tau]) Q.optimize(W, tau, maxiter=100, collapsed=[X, alpha]) bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:') bpplt.pyplot.show()
X, name='F') Y = GaussianARD(F, tau, name='Y') # # An inference machine using variational Bayesian inference with variational # message passing is then construced as # # In[3]: from bayespy.inference.vmp.vmp import VB Q = VB(X, C, gamma, A, alpha, tau, Y) # # Observe the data partially (80% is marked missing): # # In[4]: from bayespy.utils import random # Add missing values randomly (keep only 20%) mask = random.mask(M, N, p=0.2) Y.observe(y, mask=mask)
def run_lssm(y, f, mask, D, maxiter): """ Run VB inference for linear state space model. """ (M, N) = np.shape(y) # # CONSTRUCT THE MODEL # # Dynamic matrix # alpha: (D) x () alpha = Gamma(1e-5, 1e-5, plates=(D, ), name='alpha') # A : (D) x (D) A = Gaussian(np.zeros(D), diagonal(alpha), plates=(D, ), name='A') A.initialize_from_value(np.identity(D)) # Latent states with dynamics # X : () x (N,D) X = GaussianMarkovChain( np.zeros(D), # mean of x0 1e-3 * np.identity(D), # prec of x0 A, # dynamics np.ones(D), # innovation n=N, # time instances name='X', initialize=False) X.initialize_from_value(np.random.randn(N, D)) # Mixing matrix from latent space to observation space using ARD # gamma : (D) x () gamma = Gamma(1e-5, 1e-5, plates=(D, ), name='gamma') # C : (M,1) x (D) C = Gaussian(np.zeros(D), diagonal(gamma), plates=(M, 1), name='C') C.initialize_from_value(np.random.randn(M, 1, D)) # Observation noise # tau : () x () tau = Gamma(1e-5, 1e-5, name='tau') # Observations # Y : (M,N) x () CX = Dot(C, X.as_gaussian()) Y = Normal(CX, tau, name='Y') # # RUN INFERENCE # # Observe data Y.observe(y, mask=mask) # Construct inference machine Q = VB(Y, X, A, alpha, C, gamma, tau) # Iterate Q.update(X, A, alpha, C, gamma, tau, repeat=maxiter) # # SHOW RESULTS # # Mean and standard deviation of the posterior (f_mean, f_squared) = CX.get_moments() f_std = np.sqrt(f_squared - f_mean**2) # Plot observations space #plt.figure() for m in range(M): plt.subplot(M, 1, m + 1) plt.plot(y[m, :], 'r.') plt.plot(f[m, :], 'b-') bpplt.errorplot(y=f_mean[m, :], error=2 * f_std[m, :])
def run(maxiter=100): seed = 496 #np.random.randint(1000) print("seed = ", seed) np.random.seed(seed) # Simulate some data D = 3 M = 6 N = 200 c = np.random.randn(M, D) w = 0.3 a = np.array([[np.cos(w), -np.sin(w), 0], [np.sin(w), np.cos(w), 0], [0, 0, 1]]) x = np.empty((N, D)) f = np.empty((M, N)) y = np.empty((M, N)) x[0] = 10 * np.random.randn(D) f[:, 0] = np.dot(c, x[0]) y[:, 0] = f[:, 0] + 3 * np.random.randn(M) for n in range(N - 1): x[n + 1] = np.dot(a, x[n]) + np.random.randn(D) f[:, n + 1] = np.dot(c, x[n + 1]) y[:, n + 1] = f[:, n + 1] + 3 * np.random.randn(M) # Create the model (Y, CX, X, tau, C, gamma, A, alpha) = linear_state_space_model(D, N, M) # Add missing values randomly mask = random.mask(M, N, p=0.3) # Add missing values to a period of time mask[:, 30:80] = False y[~mask] = np.nan # BayesPy doesn't require this. Just for plotting. # Observe the data Y.observe(y, mask=mask) # Initialize nodes (must use some randomness for C) C.initialize_from_random() # Run inference Q = VB(Y, X, C, gamma, A, alpha, tau) # # Run inference with rotations. # rotA = transformations.RotateGaussianARD(A, alpha) rotX = transformations.RotateGaussianMarkovChain(X, A, rotA) rotC = transformations.RotateGaussianARD(C, gamma) R = transformations.RotationOptimizer(rotX, rotC, D) #maxiter = 84 for ind in range(maxiter): Q.update() #print('C term', C.lower_bound_contribution()) R.rotate( maxiter=10, check_gradient=True, verbose=False, check_bound=Q.compute_lowerbound, #check_bound=None, check_bound_terms=Q.compute_lowerbound_terms) #check_bound_terms=None) X_vb = X.u[0] varX_vb = utils.diagonal(X.u[1] - X_vb[..., np.newaxis, :] * X_vb[..., :, np.newaxis]) u_CX = CX.get_moments() CX_vb = u_CX[0] varCX_vb = u_CX[1] - CX_vb**2 # Show results plt.figure(3) plt.clf() for m in range(M): plt.subplot(M, 1, m + 1) plt.plot(y[m, :], 'r.') plt.plot(f[m, :], 'b-') bpplt.errorplot(y=CX_vb[m, :], error=2 * np.sqrt(varCX_vb[m, :])) plt.figure() Q.plot_iteration_by_nodes()
def model(M, N, D, K): """ Construct the linear state-space model with time-varying dynamics For reference, see the following publication: (TODO) """ # # The model block for the latent mixing weight process # # Dynamics matrix with ARD # beta : (K) x () beta = Gamma(1e-5, 1e-5, plates=(K, ), name='beta') # B : (K) x (K) B = GaussianARD(np.identity(K), beta, shape=(K, ), plates=(K, ), name='B', plotter=bpplt.GaussianHintonPlotter(rows=0, cols=1, scale=0), initialize=False) B.initialize_from_value(np.identity(K)) # Mixing weight process, that is, the weights in the linear combination of # state dynamics matrices # S : () x (N,K) S = GaussianMarkovChain(np.ones(K), 1e-6 * np.identity(K), B, np.ones(K), n=N, name='S', plotter=bpplt.GaussianMarkovChainPlotter(scale=2), initialize=False) s = 10 * np.random.randn(N, K) s[:, 0] = 10 S.initialize_from_value(s) # # The model block for the latent states # # Projection matrix of the dynamics matrix # alpha : (K) x () alpha = Gamma(1e-5, 1e-5, plates=(D, K), name='alpha') alpha.initialize_from_value(1 * np.ones((D, K))) # A : (D) x (D,K) A = GaussianARD(0, alpha, shape=(D, K), plates=(D, ), name='A', plotter=bpplt.GaussianHintonPlotter(rows=0, cols=1, scale=0), initialize=False) # Initialize S and A such that A*S is almost an identity matrix a = np.zeros((D, D, K)) a[np.arange(D), np.arange(D), np.zeros(D, dtype=int)] = 1 a[:, :, 0] = np.identity(D) / s[0, 0] a[:, :, 1:] = 0.1 / s[0, 0] * np.random.randn(D, D, K - 1) A.initialize_from_value(a) # Latent states with dynamics # X : () x (N,D) X = VaryingGaussianMarkovChain( np.zeros(D), # mean of x0 1e-3 * np.identity(D), # prec of x0 A, # dynamics matrices S._convert(GaussianMoments)[1:], # temporal weights np.ones(D), # innovation n=N, # time instances name='X', plotter=bpplt.GaussianMarkovChainPlotter(scale=2), initialize=False) X.initialize_from_value(np.random.randn(N, D)) # # The model block for observations # # Mixing matrix from latent space to observation space using ARD # gamma : (D) x () gamma = Gamma(1e-5, 1e-5, plates=(D, ), name='gamma') gamma.initialize_from_value(1e-2 * np.ones(D)) # C : (M,1) x (D) C = GaussianARD(0, gamma, shape=(D, ), plates=(M, 1), name='C', plotter=bpplt.GaussianHintonPlotter(rows=0, cols=2, scale=0)) C.initialize_from_value(np.random.randn(M, 1, D)) # Noiseless process # F : (M,N) x () F = SumMultiply('d,d', C, X, name='F') # Observation noise # tau : () x () tau = Gamma(1e-5, 1e-5, name='tau') tau.initialize_from_value(1e2) # Observations # Y: (M,N) x () Y = GaussianARD(F, tau, name='Y') # Construct inference machine Q = VB(Y, F, C, gamma, X, A, alpha, tau, S, B, beta) return Q
def pca(): np.random.seed(41) M = 10 N = 3000 D = 5 # Construct the PCA model alpha = Gamma(1e-3, 1e-3, plates=(D,), name='alpha') W = GaussianARD(0, alpha, plates=(M,1), shape=(D,), name='W') X = GaussianARD(0, 1, plates=(1,N), shape=(D,), name='X') tau = Gamma(1e-3, 1e-3, name='tau') W.initialize_from_random() F = SumMultiply('d,d->', W, X) Y = GaussianARD(F, tau, name='Y') # Observe data data = np.sum(np.random.randn(M,1,D-1) * np.random.randn(1,N,D-1), axis=-1) + 1e-1 * np.random.randn(M,N) Y.observe(data) # Initialize VB engine Q = VB(Y, X, W, alpha, tau) # Take one update step (so phi is ok) Q.update(repeat=1) Q.save() # Run VB-EM Q.update(repeat=200) bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-') # Restore the state Q.load() # Run Riemannian conjugate gradient #Q.optimize(X, alpha, maxiter=100, collapsed=[W, tau]) Q.optimize(W, tau, maxiter=100, collapsed=[X, alpha]) bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:') bpplt.pyplot.show()
def run(M=10, N=100, D_y=3, D=5): seed = 45 print('seed =', seed) np.random.seed(seed) # Check HDF5 version. if h5py.version.hdf5_version_tuple < (1, 8, 7): print( "WARNING! Your HDF5 version is %s. HDF5 versions <1.8.7 are not " "able to save empty arrays, thus you may experience problems if " "you for instance try to save before running any iteration steps." % str(h5py.version.hdf5_version_tuple)) # Generate data w = np.random.normal(0, 1, size=(M, 1, D_y)) x = np.random.normal(0, 1, size=(1, N, D_y)) f = misc.sum_product(w, x, axes_to_sum=[-1]) y = f + np.random.normal(0, 0.5, size=(M, N)) # Construct model (Y, WX, W, X, tau, alpha) = pca_model(M, N, D) # Data with missing values mask = random.mask(M, N, p=0.9) # randomly missing mask[:, 20:40] = False # gap missing y[~mask] = np.nan Y.observe(y, mask=mask) # Construct inference machine Q = VB(Y, W, X, tau, alpha, autosave_iterations=5) # Initialize some nodes randomly X.initialize_from_value(X.random()) W.initialize_from_value(W.random()) # Save the state into a HDF5 file filename = tempfile.NamedTemporaryFile(suffix='hdf5').name Q.update(X, W, alpha, tau, repeat=1) Q.save(filename=filename) # Inference loop. Q.update(X, W, alpha, tau, repeat=10) # Reload the state from the HDF5 file Q.load(filename=filename) # Inference loop again. Q.update(X, W, alpha, tau, repeat=10) # NOTE: Saving and loading requires that you have the model # constructed. "Save" does not store the model structure nor does "load" # read it. They are just used for reading and writing the contents of the # nodes. Thus, if you want to load, you first need to construct the same # model that was used for saving and then use load to set the states of the # nodes. plt.clf() WX_params = WX.get_parameters() fh = WX_params[0] * np.ones(y.shape) err_fh = 2 * np.sqrt(WX_params[1] + 1 / tau.get_moments()[0]) * np.ones( y.shape) for m in range(M): plt.subplot(M, 1, m + 1) #errorplot(y, error=None, x=None, lower=None, upper=None): bpplt.errorplot(fh[m], x=np.arange(N), error=err_fh[m]) plt.plot(np.arange(N), f[m], 'g') plt.plot(np.arange(N), y[m], 'r+') plt.figure() Q.plot_iteration_by_nodes() plt.figure() plt.subplot(2, 2, 1) bpplt.binary_matrix(W.mask) plt.subplot(2, 2, 2) bpplt.binary_matrix(X.mask) plt.subplot(2, 2, 3) #bpplt.binary_matrix(WX.get_mask()) plt.subplot(2, 2, 4) bpplt.binary_matrix(Y.mask)
def run(M=50, N=200, D_y=10, D=20, maxiter=100): seed = 45 print('seed =', seed) np.random.seed(seed) # Generate data (covariance eigenvalues: 1,1,...,1,2^2,3^2,...,(D_y+1)^2 (q, r) = scipy.linalg.qr(np.random.randn(M, M)) C = np.diag(np.arange(2, 2 + D_y)) C = np.ones(M) C[:D_y] += np.arange(1, 1 + D_y) y = C[:, np.newaxis] * np.random.randn(M, N) y = np.dot(q, y) # Construct model (Y, WX, W, X, tau, alpha) = pca_model(M, N, D) # Data with missing values mask = utils.random.mask(M, N, p=0.9) # randomly missing mask[:, 20:40] = False # gap missing y[~mask] = np.nan Y.observe(y, mask=mask) # Construct inference machine Q = VB(Y, W, X, tau, alpha, autosave_filename=utils.utils.tempfile()) # Initialize nodes (from prior and randomly) X.initialize_from_value(X.random()) W.initialize_from_value(W.random()) Q.update(repeat=1) Q.save() # # Run inference with rotations. # rotX = transformations.RotateGaussian(X) rotW = transformations.RotateGaussianARD(W, alpha) R = transformations.RotationOptimizer(rotX, rotW, D) for ind in range(maxiter): Q.update() R.rotate(check_gradient=False, maxiter=10, verbose=False, check_bound=Q.compute_lowerbound, check_bound_terms=Q.compute_lowerbound_terms) L_rot = Q.L # # Re-run inference without rotations. # Q.load() Q.update(repeat=maxiter) L_norot = Q.L # # Plot comparison # plt.plot(L_rot) plt.plot(L_norot) plt.legend(['With rotations', 'Without rotations'], loc='lower right') plt.show()
def model(M=20, N=100, D=10, K=3): """ Construct the linear state-space model with switching dynamics. """ # # Switching dynamics (HMM) # # Prior for initial state probabilities rho = Dirichlet(1e-3 * np.ones(K), name='rho') # Prior for state transition probabilities V = Dirichlet(1e-3 * np.ones(K), plates=(K, ), name='V') v = 10 * np.identity(K) + 1 * np.ones((K, K)) v /= np.sum(v, axis=-1, keepdims=True) V.initialize_from_value(v) # Hidden states (with unknown initial state probabilities and state # transition probabilities) Z = CategoricalMarkovChain(rho, V, states=N - 1, name='Z', plotter=bpplt.CategoricalMarkovChainPlotter(), initialize=False) Z.u[0] = np.random.dirichlet(np.ones(K)) Z.u[1] = np.reshape( np.random.dirichlet(0.5 * np.ones(K * K), size=(N - 2)), (N - 2, K, K)) # # Linear state-space models # # Dynamics matrix with ARD # (K,D) x () alpha = Gamma(1e-5, 1e-5, plates=(K, 1, D), name='alpha') # (K,1,1,D) x (D) A = GaussianARD(0, alpha, shape=(D, ), plates=(K, D), name='A', plotter=bpplt.GaussianHintonPlotter()) A.initialize_from_value( np.identity(D) * np.ones((K, D, D)) + 0.1 * np.random.randn(K, D, D)) # Latent states with dynamics # (K,1) x (N,D) X = SwitchingGaussianMarkovChain( np.zeros(D), # mean of x0 1e-3 * np.identity(D), # prec of x0 A, # dynamics Z, # dynamics selection np.ones(D), # innovation n=N, # time instances name='X', plotter=bpplt.GaussianMarkovChainPlotter()) X.initialize_from_value(10 * np.random.randn(N, D)) # Mixing matrix from latent space to observation space using ARD # (K,1,1,D) x () gamma = Gamma(1e-5, 1e-5, plates=(D, ), name='gamma') # (K,M,1) x (D) C = GaussianARD(0, gamma, shape=(D, ), plates=(M, 1), name='C', plotter=bpplt.GaussianHintonPlotter(rows=-3, cols=-1)) C.initialize_from_value(np.random.randn(M, 1, D)) # Underlying noiseless function # (K,M,N) x () F = SumMultiply('i,i', C, X, name='F') # # Mixing the models # # Observation noise tau = Gamma(1e-5, 1e-5, name='tau') tau.initialize_from_value(1e2) # Emission/observation distribution Y = GaussianARD(F, tau, name='Y') Q = VB(Y, F, Z, rho, V, C, gamma, X, A, alpha, tau) return Q
def run(N=500, seed=42, maxiter=100, plot=True): """ Run deterministic annealing demo for 1-D Gaussian mixture. """ if seed is not None: np.random.seed(seed) mu = GaussianARD(0, 1, plates=(2,), name='means') Z = Categorical([0.3, 0.7], plates=(N,), name='classes') Y = Mixture(Z, GaussianARD, mu, 1, name='observations') # Generate data z = Z.random() data = np.empty(N) for n in range(N): data[n] = [4, -4][z[n]] Y.observe(data) # Initialize means closer to the inferior local optimum in which the # cluster means are swapped mu.initialize_from_value([0, 6]) Q = VB(Y, Z, mu) Q.save() # # Standard VB-EM algorithm # Q.update(repeat=maxiter) mu_vbem = mu.u[0].copy() L_vbem = Q.compute_lowerbound() # # VB-EM with deterministic annealing # Q.load() beta = 0.01 while beta < 1.0: beta = min(beta*1.2, 1.0) print("Set annealing to %.2f" % beta) Q.set_annealing(beta) Q.update(repeat=maxiter, tol=1e-4) mu_anneal = mu.u[0].copy() L_anneal = Q.compute_lowerbound() print("==============================") print("RESULTS FOR VB-EM vs ANNEALING") print("Fixed component probabilities:", np.array([0.3, 0.7])) print("True component means:", np.array([4, -4])) print("VB-EM component means:", mu_vbem) print("VB-EM lower bound:", L_vbem) print("Annealed VB-EM component means:", mu_anneal) print("Annealed VB-EM lower bound:", L_anneal) return
def run_dlssm(y, f, mask, D, K, maxiter): """ Run VB inference for linear state space model with drifting dynamics. """ (M, N) = np.shape(y) # Dynamics matrix with ARD # alpha : (D) x () alpha = Gamma(1e-5, 1e-5, plates=(K,), name='alpha') # A : (K) x (K) A = Gaussian(np.zeros(K), #np.identity(K), diagonal(alpha), plates=(K,), name='A_S') A.initialize_from_value(np.identity(K)) # rho ## rho = Gamma(1e-5, ## 1e-5, ## plates=(K,), ## name="rho") # S : () x (N-1,K) S = GaussianMarkovChain(np.ones(K), 1e-6*np.identity(K), A, np.ones(K), n=N-1, name='S') S.initialize_from_value(1*np.ones((N-1,K))) # Projection matrix of the dynamics matrix # beta : (K) x () beta = Gamma(1e-5, 1e-5, plates=(K,), name='beta') # B : (D) x (D*K) B = Gaussian(np.zeros(D*K), diagonal(tile(beta, D)), plates=(D,), name='B') b = np.zeros((D,D,K)) b[np.arange(D),np.arange(D),np.zeros(D,dtype=int)] = 1 B.initialize_from_value(np.reshape(1*b, (D,D*K))) # A : (N-1,D) x (D) BS = MatrixDot(B, S.as_gaussian().add_plate_axis(-1), name='BS') # Latent states with dynamics # X : () x (N,D) X = GaussianMarkovChain(np.zeros(D), # mean of x0 1e-3*np.identity(D), # prec of x0 BS, # dynamics np.ones(D), # innovation n=N, # time instances name='X', initialize=False) X.initialize_from_value(np.random.randn(N,D)) # Mixing matrix from latent space to observation space using ARD # gamma : (D) x () gamma = Gamma(1e-5, 1e-5, plates=(D,), name='gamma') # C : (M,1) x (D) C = Gaussian(np.zeros(D), diagonal(gamma), plates=(M,1), name='C') C.initialize_from_value(np.random.randn(M,1,D)) # Observation noise # tau : () x () tau = Gamma(1e-5, 1e-5, name='tau') # Observations # Y : (M,N) x () CX = Dot(C, X.as_gaussian()) Y = Normal(CX, tau, name='Y') # # RUN INFERENCE # # Observe data Y.observe(y, mask=mask) # Construct inference machine Q = VB(Y, X, S, A, alpha, B, beta, C, gamma, tau) # # Run inference with rotations. # # Rotate the D-dimensional state space (C, X) rotB = transformations.RotateGaussianMatrixARD(B, beta, axis='rows') rotX = transformations.RotateDriftingMarkovChain(X, B, S, rotB) rotC = transformations.RotateGaussianARD(C, gamma) R_X = transformations.RotationOptimizer(rotX, rotC, D) # Rotate the K-dimensional latent dynamics space (B, S) rotA = transformations.RotateGaussianARD(A, alpha) rotS = transformations.RotateGaussianMarkovChain(S, A, rotA) rotB = transformations.RotateGaussianMatrixARD(B, beta, axis='cols') R_S = transformations.RotationOptimizer(rotS, rotB, K) # Iterate for ind in range(int(maxiter/5)): Q.update(repeat=5) #Q.update(X, S, A, alpha, rho, B, beta, C, gamma, tau, repeat=maxiter) R_X.rotate() R_S.rotate() ## R_X.rotate( ## check_bound=Q.compute_lowerbound, ## check_bound_terms=Q.compute_lowerbound_terms, ## check_gradient=True ## ) ## R_S.rotate( ## check_bound=Q.compute_lowerbound, ## check_bound_terms=Q.compute_lowerbound_terms, ## check_gradient=True ## ) # # SHOW RESULTS # # Mean and standard deviation of the posterior (f_mean, f_squared) = CX.get_moments() f_std = np.sqrt(f_squared - f_mean**2) # Plot observations space for m in range(M): plt.subplot(M,1,m+1) plt.plot(y[m,:], 'r.') plt.plot(f[m,:], 'b-') bpplt.errorplot(y=f_mean[m,:], error=2*f_std[m,:])
def run_dlssm(y, f, mask, D, K, maxiter): """ Run VB inference for linear state space model with drifting dynamics. """ (M, N) = np.shape(y) # Dynamics matrix with ARD # alpha : (K) x () alpha = Gamma(1e-5, 1e-5, plates=(K,), name='alpha') # A : (K) x (K) A = GaussianArrayARD(np.identity(K), alpha, shape=(K,), plates=(K,), name='A_S', initialize=False) A.initialize_from_value(np.identity(K)) # State of the drift # S : () x (N,K) S = GaussianMarkovChain(np.ones(K), 1e-6*np.identity(K), A, np.ones(K), n=N, name='S', initialize=False) S.initialize_from_value(np.ones((N,K))) # Projection matrix of the dynamics matrix # Initialize S and B such that BS is identity matrix # beta : (K) x () beta = Gamma(1e-5, 1e-5, plates=(D,K), name='beta') # B : (D) x (D,K) b = np.zeros((D,D,K)) b[np.arange(D),np.arange(D),np.zeros(D,dtype=int)] = 1 B = GaussianArrayARD(0, beta, plates=(D,), name='B', initialize=False) B.initialize_from_value(np.reshape(1*b, (D,D,K))) # BS : (N-1,D) x (D) # TODO/FIXME: Implement __getitem__ method BS = SumMultiply('dk,k->d', B, S.as_gaussian()[...,np.newaxis], # iterator_axis=0, name='BS') # Latent states with dynamics # X : () x (N,D) X = GaussianMarkovChain(np.zeros(D), # mean of x0 1e-3*np.identity(D), # prec of x0 BS, # dynamics np.ones(D), # innovation n=N+1, # time instances name='X', initialize=False) X.initialize_from_value(np.random.randn(N+1,D)) # Mixing matrix from latent space to observation space using ARD # gamma : (D) x () gamma = Gamma(1e-5, 1e-5, plates=(D,K), name='gamma') # C : (M,1) x (D,K) C = GaussianArrayARD(0, gamma, plates=(M,1), name='C', initialize=False) C.initialize_from_random() # Observation noise # tau : () x () tau = Gamma(1e-5, 1e-5, name='tau') # Observations # Y : (M,N) x () F = SumMultiply('dk,d,k', C, X.as_gaussian()[1:], S.as_gaussian(), name='F') Y = GaussianArrayARD(F, tau, name='Y') # # RUN INFERENCE # # Observe data Y.observe(y, mask=mask) # Construct inference machine Q = VB(Y, X, S, A, alpha, B, beta, C, gamma, tau) # # Run inference with rotations. # rotate = False if rotate: # Rotate the D-dimensional state space (C, X) rotB = transformations.RotateGaussianMatrixARD(B, beta, D, K, axis='rows') rotX = transformations.RotateDriftingMarkovChain(X, B, S, rotB) rotC = transformations.RotateGaussianARD(C, gamma) R_X = transformations.RotationOptimizer(rotX, rotC, D) # Rotate the K-dimensional latent dynamics space (B, S) rotA = transformations.RotateGaussianARD(A, alpha) rotS = transformations.RotateGaussianMarkovChain(S, A, rotA) rotB = transformations.RotateGaussianMatrixARD(B, beta, D, K, axis='cols') R_S = transformations.RotationOptimizer(rotS, rotB, K) # Iterate for ind in range(maxiter): print("X update") Q.update(X) print("S update") Q.update(S) print("A update") Q.update(A) print("alpha update") Q.update(alpha) print("B update") Q.update(B) print("beta update") Q.update(beta) print("C update") Q.update(C) print("gamma update") Q.update(gamma) print("tau update") Q.update(tau) if rotate: if ind >= 0: R_X.rotate() if ind >= 0: R_S.rotate() Q.plot_iteration_by_nodes() # # SHOW RESULTS # # Plot observations space plt.figure() bpplt.timeseries_normal(F) bpplt.timeseries(f, 'b-') bpplt.timeseries(y, 'r.') # Plot latent space plt.figure() bpplt.timeseries_gaussian_mc(X, scale=2) # Plot drift space plt.figure() bpplt.timeseries_gaussian_mc(S, scale=2)
def run(maxiter=100): seed = 496#np.random.randint(1000) print("seed = ", seed) np.random.seed(seed) # Simulate some data D = 3 M = 6 N = 200 c = np.random.randn(M,D) w = 0.3 a = np.array([[np.cos(w), -np.sin(w), 0], [np.sin(w), np.cos(w), 0], [0, 0, 1]]) x = np.empty((N,D)) f = np.empty((M,N)) y = np.empty((M,N)) x[0] = 10*np.random.randn(D) f[:,0] = np.dot(c,x[0]) y[:,0] = f[:,0] + 3*np.random.randn(M) for n in range(N-1): x[n+1] = np.dot(a,x[n]) + np.random.randn(D) f[:,n+1] = np.dot(c,x[n+1]) y[:,n+1] = f[:,n+1] + 3*np.random.randn(M) # Create the model (Y, CX, X, tau, C, gamma, A, alpha) = linear_state_space_model(D, N, M) # Add missing values randomly mask = random.mask(M, N, p=0.3) # Add missing values to a period of time mask[:,30:80] = False y[~mask] = np.nan # BayesPy doesn't require this. Just for plotting. # Observe the data Y.observe(y, mask=mask) # Initialize nodes (must use some randomness for C) C.initialize_from_random() # Run inference Q = VB(Y, X, C, gamma, A, alpha, tau) # # Run inference with rotations. # rotA = transformations.RotateGaussianARD(A, alpha) rotX = transformations.RotateGaussianMarkovChain(X, A, rotA) rotC = transformations.RotateGaussianARD(C, gamma) R = transformations.RotationOptimizer(rotX, rotC, D) #maxiter = 84 for ind in range(maxiter): Q.update() #print('C term', C.lower_bound_contribution()) R.rotate(maxiter=10, check_gradient=True, verbose=False, check_bound=Q.compute_lowerbound, #check_bound=None, check_bound_terms=Q.compute_lowerbound_terms) #check_bound_terms=None) X_vb = X.u[0] varX_vb = utils.diagonal(X.u[1] - X_vb[...,np.newaxis,:] * X_vb[...,:,np.newaxis]) u_CX = CX.get_moments() CX_vb = u_CX[0] varCX_vb = u_CX[1] - CX_vb**2 # Show results plt.figure(3) plt.clf() for m in range(M): plt.subplot(M,1,m+1) plt.plot(y[m,:], 'r.') plt.plot(f[m,:], 'b-') bpplt.errorplot(y=CX_vb[m,:], error=2*np.sqrt(varCX_vb[m,:])) plt.figure() Q.plot_iteration_by_nodes()
def run(N=100000, N_batch=50, seed=42, maxiter=100, plot=True): """ Run deterministic annealing demo for 1-D Gaussian mixture. """ if seed is not None: np.random.seed(seed) # Number of clusters in the model K = 20 # Dimensionality of the data D = 5 # Generate data K_true = 10 spread = 5 means = spread * np.random.randn(K_true, D) z = random.categorical(np.ones(K_true), size=N) data = np.empty((N,D)) for n in range(N): data[n] = means[z[n]] + np.random.randn(D) # # Standard VB-EM algorithm # # Full model mu = Gaussian(np.zeros(D), np.identity(D), plates=(K,), name='means') alpha = Dirichlet(np.ones(K), name='class probabilities') Z = Categorical(alpha, plates=(N,), name='classes') Y = Mixture(Z, Gaussian, mu, np.identity(D), name='observations') # Break symmetry with random initialization of the means mu.initialize_from_random() # Put the data in Y.observe(data) # Run inference Q = VB(Y, Z, mu, alpha) Q.save(mu) Q.update(repeat=maxiter) if plot: bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'k-') max_cputime = np.sum(Q.cputime[~np.isnan(Q.cputime)]) # # Stochastic variational inference # # Construct smaller model (size of the mini-batch) mu = Gaussian(np.zeros(D), np.identity(D), plates=(K,), name='means') alpha = Dirichlet(np.ones(K), name='class probabilities') Z = Categorical(alpha, plates=(N_batch,), plates_multiplier=(N/N_batch,), name='classes') Y = Mixture(Z, Gaussian, mu, np.identity(D), name='observations') # Break symmetry with random initialization of the means mu.initialize_from_random() # Inference engine Q = VB(Y, Z, mu, alpha, autosave_filename=Q.autosave_filename) Q.load(mu) # Because using mini-batches, messages need to be multiplied appropriately print("Stochastic variational inference...") Q.ignore_bound_checks = True maxiter *= int(N/N_batch) delay = 1 forgetting_rate = 0.7 for n in range(maxiter): # Observe a mini-batch subset = np.random.choice(N, N_batch) Y.observe(data[subset,:]) # Learn intermediate variables Q.update(Z) # Set step length step = (n + delay) ** (-forgetting_rate) # Stochastic gradient for the global variables Q.gradient_step(mu, alpha, scale=step) if np.sum(Q.cputime[:n]) > max_cputime: break if plot: bpplt.pyplot.plot(np.cumsum(Q.cputime), Q.L, 'r:') bpplt.pyplot.xlabel('CPU time (in seconds)') bpplt.pyplot.ylabel('VB lower bound') bpplt.pyplot.legend(['VB-EM', 'Stochastic inference'], loc='lower right') bpplt.pyplot.title('VB for Gaussian mixture model') return
def run(M=10, N=100, D_y=3, D=5): seed = 45 print('seed =', seed) np.random.seed(seed) # Check HDF5 version. if h5py.version.hdf5_version_tuple < (1,8,7): print("WARNING! Your HDF5 version is %s. HDF5 versions <1.8.7 are not " "able to save empty arrays, thus you may experience problems if " "you for instance try to save before running any iteration steps." % str(h5py.version.hdf5_version_tuple)) # Generate data w = np.random.normal(0, 1, size=(M,1,D_y)) x = np.random.normal(0, 1, size=(1,N,D_y)) f = misc.sum_product(w, x, axes_to_sum=[-1]) y = f + np.random.normal(0, 0.5, size=(M,N)) # Construct model (Y, WX, W, X, tau, alpha) = pca_model(M, N, D) # Data with missing values mask = random.mask(M, N, p=0.9) # randomly missing mask[:,20:40] = False # gap missing y[~mask] = np.nan Y.observe(y, mask=mask) # Construct inference machine Q = VB(Y, W, X, tau, alpha, autosave_iterations=5) # Initialize some nodes randomly X.initialize_from_value(X.random()) W.initialize_from_value(W.random()) # Save the state into a HDF5 file filename = tempfile.NamedTemporaryFile(suffix='hdf5').name Q.update(X, W, alpha, tau, repeat=1) Q.save(filename=filename) # Inference loop. Q.update(X, W, alpha, tau, repeat=10) # Reload the state from the HDF5 file Q.load(filename=filename) # Inference loop again. Q.update(X, W, alpha, tau, repeat=10) # NOTE: Saving and loading requires that you have the model # constructed. "Save" does not store the model structure nor does "load" # read it. They are just used for reading and writing the contents of the # nodes. Thus, if you want to load, you first need to construct the same # model that was used for saving and then use load to set the states of the # nodes. plt.clf() WX_params = WX.get_parameters() fh = WX_params[0] * np.ones(y.shape) err_fh = 2*np.sqrt(WX_params[1] + 1/tau.get_moments()[0]) * np.ones(y.shape) for m in range(M): plt.subplot(M,1,m+1) #errorplot(y, error=None, x=None, lower=None, upper=None): bpplt.errorplot(fh[m], x=np.arange(N), error=err_fh[m]) plt.plot(np.arange(N), f[m], 'g') plt.plot(np.arange(N), y[m], 'r+') plt.figure() Q.plot_iteration_by_nodes() plt.figure() plt.subplot(2,2,1) bpplt.binary_matrix(W.mask) plt.subplot(2,2,2) bpplt.binary_matrix(X.mask) plt.subplot(2,2,3) #bpplt.binary_matrix(WX.get_mask()) plt.subplot(2,2,4) bpplt.binary_matrix(Y.mask)
def run(M=6, N=200, D=3, maxiter=100, debug=False, seed=42, rotate=False, precompute=False): # Use deterministic random numbers if seed is not None: np.random.seed(seed) # Simulate some data K = 3 c = np.random.randn(M,K) w = 0.3 a = np.array([[np.cos(w), -np.sin(w), 0], [np.sin(w), np.cos(w), 0], [0, 0, 1]]) x = np.empty((N,K)) f = np.empty((M,N)) y = np.empty((M,N)) x[0] = 10*np.random.randn(K) f[:,0] = np.dot(c,x[0]) y[:,0] = f[:,0] + 3*np.random.randn(M) for n in range(N-1): x[n+1] = np.dot(a,x[n]) + np.random.randn(K) f[:,n+1] = np.dot(c,x[n+1]) y[:,n+1] = f[:,n+1] + 3*np.random.randn(M) # Create the model (Y, CX, X, tau, C, gamma, A, alpha) = linear_state_space_model(D=D, N=N, M=M) # Add missing values randomly mask = random.mask(M, N, p=0.3) # Add missing values to a period of time mask[:,30:80] = False y[~mask] = np.nan # BayesPy doesn't require this. Just for plotting. # Observe the data Y.observe(y, mask=mask) # Initialize nodes (must use some randomness for C) C.initialize_from_random() # Run inference Q = VB(Y, X, C, gamma, A, alpha, tau) # # Run inference with rotations. # if rotate: rotA = transformations.RotateGaussianArrayARD(A, alpha, precompute=precompute) rotX = transformations.RotateGaussianMarkovChain(X, A, rotA) rotC = transformations.RotateGaussianArrayARD(C, gamma) R = transformations.RotationOptimizer(rotX, rotC, D) for ind in range(maxiter): Q.update() if debug: R.rotate(maxiter=10, check_gradient=True, check_bound=True) else: R.rotate() else: Q.update(repeat=maxiter) # Show results plt.figure() bpplt.timeseries_normal(CX, scale=2) bpplt.timeseries(f, 'b-') bpplt.timeseries(y, 'r.') plt.show()
def run_lssm(y, f, mask, D, maxiter): """ Run VB inference for linear state space model. """ (M, N) = np.shape(y) # # CONSTRUCT THE MODEL # # Dynamic matrix # alpha: (D) x () alpha = Gamma(1e-5, 1e-5, plates=(D,), name='alpha') # A : (D) x (D) A = Gaussian(np.zeros(D), diagonal(alpha), plates=(D,), name='A') A.initialize_from_value(np.identity(D)) # Latent states with dynamics # X : () x (N,D) X = GaussianMarkovChain(np.zeros(D), # mean of x0 1e-3*np.identity(D), # prec of x0 A, # dynamics np.ones(D), # innovation n=N, # time instances name='X', initialize=False) X.initialize_from_value(np.random.randn(N,D)) # Mixing matrix from latent space to observation space using ARD # gamma : (D) x () gamma = Gamma(1e-5, 1e-5, plates=(D,), name='gamma') # C : (M,1) x (D) C = Gaussian(np.zeros(D), diagonal(gamma), plates=(M,1), name='C') C.initialize_from_value(np.random.randn(M,1,D)) # Observation noise # tau : () x () tau = Gamma(1e-5, 1e-5, name='tau') # Observations # Y : (M,N) x () CX = Dot(C, X.as_gaussian()) Y = Normal(CX, tau, name='Y') # # RUN INFERENCE # # Observe data Y.observe(y, mask=mask) # Construct inference machine Q = VB(Y, X, A, alpha, C, gamma, tau) # Iterate Q.update(X, A, alpha, C, gamma, tau, repeat=maxiter) # # SHOW RESULTS # # Mean and standard deviation of the posterior (f_mean, f_squared) = CX.get_moments() f_std = np.sqrt(f_squared - f_mean**2) # Plot observations space #plt.figure() for m in range(M): plt.subplot(M,1,m+1) plt.plot(y[m,:], 'r.') plt.plot(f[m,:], 'b-') bpplt.errorplot(y=f_mean[m,:], error=2*f_std[m,:])