def model(M=10, N=100, D=3): """ Construct linear state-space model. See, for instance, the following publication: "Fast variational Bayesian linear state-space model" Luttinen (ECML 2013) """ # Dynamics matrix with ARD alpha = Gamma(1e-5, 1e-5, plates=(D, ), name='alpha') A = GaussianARD(0, alpha, shape=(D, ), plates=(D, ), plotter=bpplt.GaussianHintonPlotter(rows=0, cols=1, scale=0), name='A') A.initialize_from_value(np.identity(D)) # Latent states with dynamics X = GaussianMarkovChain( np.zeros(D), # mean of x0 1e-3 * np.identity(D), # prec of x0 A, # dynamics np.ones(D), # innovation n=N, # time instances plotter=bpplt.GaussianMarkovChainPlotter(scale=2), name='X') X.initialize_from_value(np.random.randn(N, D)) # Mixing matrix from latent space to observation space using ARD gamma = Gamma(1e-5, 1e-5, plates=(D, ), name='gamma') gamma.initialize_from_value(1e-2 * np.ones(D)) C = GaussianARD(0, gamma, shape=(D, ), plates=(M, 1), plotter=bpplt.GaussianHintonPlotter(rows=0, cols=2, scale=0), name='C') C.initialize_from_value(np.random.randn(M, 1, D)) # Observation noise tau = Gamma(1e-5, 1e-5, name='tau') tau.initialize_from_value(1e2) # Underlying noiseless function F = SumMultiply('i,i', C, X, name='F') # Noisy observations Y = GaussianARD(F, tau, name='Y') Q = VB(Y, F, C, gamma, X, A, alpha, tau, C) return Q
def test_initialization(self): """ Test initialization methods of GaussianARD """ X = GaussianARD(1, 2, shape=(2, ), plates=(3, )) # Prior initialization mu = 1 * np.ones((3, 2)) alpha = 2 * np.ones((3, 2)) X.initialize_from_prior() u = X._message_to_child() self.assertAllClose(u[0] * np.ones((3, 2)), mu) self.assertAllClose( u[1] * np.ones((3, 2, 2)), linalg.outer(mu, mu, ndim=1) + misc.diag(1 / alpha, ndim=1)) # Parameter initialization mu = np.random.randn(3, 2) alpha = np.random.rand(3, 2) X.initialize_from_parameters(mu, alpha) u = X._message_to_child() self.assertAllClose(u[0], mu) self.assertAllClose( u[1], linalg.outer(mu, mu, ndim=1) + misc.diag(1 / alpha, ndim=1)) # Value initialization x = np.random.randn(3, 2) X.initialize_from_value(x) u = X._message_to_child() self.assertAllClose(u[0], x) self.assertAllClose(u[1], linalg.outer(x, x, ndim=1)) # Random initialization X.initialize_from_random() pass
def test_initialization(self): """ Test initialization methods of GaussianARD """ X = GaussianARD(1, 2, shape=(2,), plates=(3,)) # Prior initialization mu = 1 * np.ones((3, 2)) alpha = 2 * np.ones((3, 2)) X.initialize_from_prior() u = X._message_to_child() self.assertAllClose(u[0]*np.ones((3,2)), mu) self.assertAllClose(u[1]*np.ones((3,2,2)), linalg.outer(mu, mu, ndim=1) + misc.diag(1/alpha, ndim=1)) # Parameter initialization mu = np.random.randn(3, 2) alpha = np.random.rand(3, 2) X.initialize_from_parameters(mu, alpha) u = X._message_to_child() self.assertAllClose(u[0], mu) self.assertAllClose(u[1], linalg.outer(mu, mu, ndim=1) + misc.diag(1/alpha, ndim=1)) # Value initialization x = np.random.randn(3, 2) X.initialize_from_value(x) u = X._message_to_child() self.assertAllClose(u[0], x) self.assertAllClose(u[1], linalg.outer(x, x, ndim=1)) # Random initialization X.initialize_from_random() pass
def model(M=10, N=100, D=3): """ Construct linear state-space model. See, for instance, the following publication: "Fast variational Bayesian linear state-space model" Luttinen (ECML 2013) """ # Dynamics matrix with ARD alpha = Gamma(1e-5, 1e-5, plates=(D,), name='alpha') A = GaussianARD(0, alpha, shape=(D,), plates=(D,), plotter=bpplt.GaussianHintonPlotter(rows=0, cols=1, scale=0), name='A') A.initialize_from_value(np.identity(D)) # Latent states with dynamics X = GaussianMarkovChain(np.zeros(D), # mean of x0 1e-3*np.identity(D), # prec of x0 A, # dynamics np.ones(D), # innovation n=N, # time instances plotter=bpplt.GaussianMarkovChainPlotter(scale=2), name='X') X.initialize_from_value(np.random.randn(N,D)) # Mixing matrix from latent space to observation space using ARD gamma = Gamma(1e-5, 1e-5, plates=(D,), name='gamma') gamma.initialize_from_value(1e-2*np.ones(D)) C = GaussianARD(0, gamma, shape=(D,), plates=(M,1), plotter=bpplt.GaussianHintonPlotter(rows=0, cols=2, scale=0), name='C') C.initialize_from_value(np.random.randn(M,1,D)) # Observation noise tau = Gamma(1e-5, 1e-5, name='tau') tau.initialize_from_value(1e2) # Underlying noiseless function F = SumMultiply('i,i', C, X, name='F') # Noisy observations Y = GaussianARD(F, tau, name='Y') Q = VB(Y, F, C, gamma, X, A, alpha, tau, C) return Q
def model(M=20, N=100, D=10, K=3): """ Construct the linear state-space model with switching dynamics. """ # # Switching dynamics (HMM) # # Prior for initial state probabilities rho = Dirichlet(1e-3 * np.ones(K), name='rho') # Prior for state transition probabilities V = Dirichlet(1e-3 * np.ones(K), plates=(K, ), name='V') v = 10 * np.identity(K) + 1 * np.ones((K, K)) v /= np.sum(v, axis=-1, keepdims=True) V.initialize_from_value(v) # Hidden states (with unknown initial state probabilities and state # transition probabilities) Z = CategoricalMarkovChain(rho, V, states=N - 1, name='Z', plotter=bpplt.CategoricalMarkovChainPlotter(), initialize=False) Z.u[0] = np.random.dirichlet(np.ones(K)) Z.u[1] = np.reshape( np.random.dirichlet(0.5 * np.ones(K * K), size=(N - 2)), (N - 2, K, K)) # # Linear state-space models # # Dynamics matrix with ARD # (K,D) x () alpha = Gamma(1e-5, 1e-5, plates=(K, 1, D), name='alpha') # (K,1,1,D) x (D) A = GaussianARD(0, alpha, shape=(D, ), plates=(K, D), name='A', plotter=bpplt.GaussianHintonPlotter()) A.initialize_from_value( np.identity(D) * np.ones((K, D, D)) + 0.1 * np.random.randn(K, D, D)) # Latent states with dynamics # (K,1) x (N,D) X = SwitchingGaussianMarkovChain( np.zeros(D), # mean of x0 1e-3 * np.identity(D), # prec of x0 A, # dynamics Z, # dynamics selection np.ones(D), # innovation n=N, # time instances name='X', plotter=bpplt.GaussianMarkovChainPlotter()) X.initialize_from_value(10 * np.random.randn(N, D)) # Mixing matrix from latent space to observation space using ARD # (K,1,1,D) x () gamma = Gamma(1e-5, 1e-5, plates=(D, ), name='gamma') # (K,M,1) x (D) C = GaussianARD(0, gamma, shape=(D, ), plates=(M, 1), name='C', plotter=bpplt.GaussianHintonPlotter(rows=-3, cols=-1)) C.initialize_from_value(np.random.randn(M, 1, D)) # Underlying noiseless function # (K,M,N) x () F = SumMultiply('i,i', C, X, name='F') # # Mixing the models # # Observation noise tau = Gamma(1e-5, 1e-5, name='tau') tau.initialize_from_value(1e2) # Emission/observation distribution Y = GaussianARD(F, tau, name='Y') Q = VB(Y, F, Z, rho, V, C, gamma, X, A, alpha, tau) return Q
def model(M=20, N=100, D=10, K=3): """ Construct the linear state-space model with switching dynamics. """ # # Switching dynamics (HMM) # # Prior for initial state probabilities rho = Dirichlet(1e-3*np.ones(K), name='rho') # Prior for state transition probabilities V = Dirichlet(1e-3*np.ones(K), plates=(K,), name='V') v = 10*np.identity(K) + 1*np.ones((K,K)) v /= np.sum(v, axis=-1, keepdims=True) V.initialize_from_value(v) # Hidden states (with unknown initial state probabilities and state # transition probabilities) Z = CategoricalMarkovChain(rho, V, states=N-1, name='Z', plotter=bpplt.CategoricalMarkovChainPlotter(), initialize=False) Z.u[0] = np.random.dirichlet(np.ones(K)) Z.u[1] = np.reshape(np.random.dirichlet(0.5*np.ones(K*K), size=(N-2)), (N-2, K, K)) # # Linear state-space models # # Dynamics matrix with ARD # (K,D) x () alpha = Gamma(1e-5, 1e-5, plates=(K,1,D), name='alpha') # (K,1,1,D) x (D) A = GaussianARD(0, alpha, shape=(D,), plates=(K,D), name='A', plotter=bpplt.GaussianHintonPlotter()) A.initialize_from_value(np.identity(D)*np.ones((K,D,D)) + 0.1*np.random.randn(K,D,D)) # Latent states with dynamics # (K,1) x (N,D) X = SwitchingGaussianMarkovChain(np.zeros(D), # mean of x0 1e-3*np.identity(D), # prec of x0 A, # dynamics Z, # dynamics selection np.ones(D), # innovation n=N, # time instances name='X', plotter=bpplt.GaussianMarkovChainPlotter()) X.initialize_from_value(10*np.random.randn(N,D)) # Mixing matrix from latent space to observation space using ARD # (K,1,1,D) x () gamma = Gamma(1e-5, 1e-5, plates=(D,), name='gamma') # (K,M,1) x (D) C = GaussianARD(0, gamma, shape=(D,), plates=(M,1), name='C', plotter=bpplt.GaussianHintonPlotter(rows=-3,cols=-1)) C.initialize_from_value(np.random.randn(M,1,D)) # Underlying noiseless function # (K,M,N) x () F = SumMultiply('i,i', C, X, name='F') # # Mixing the models # # Observation noise tau = Gamma(1e-5, 1e-5, name='tau') tau.initialize_from_value(1e2) # Emission/observation distribution Y = GaussianARD(F, tau, name='Y') Q = VB(Y, F, Z, rho, V, C, gamma, X, A, alpha, tau) return Q
def model(M, N, D, K): """ Construct the linear state-space model with time-varying dynamics For reference, see the following publication: (TODO) """ # # The model block for the latent mixing weight process # # Dynamics matrix with ARD # beta : (K) x () beta = Gamma(1e-5, 1e-5, plates=(K,), name='beta') # B : (K) x (K) B = GaussianARD(np.identity(K), beta, shape=(K,), plates=(K,), name='B', plotter=bpplt.GaussianHintonPlotter(rows=0, cols=1, scale=0), initialize=False) B.initialize_from_value(np.identity(K)) # Mixing weight process, that is, the weights in the linear combination of # state dynamics matrices # S : () x (N,K) S = GaussianMarkovChain(np.ones(K), 1e-6*np.identity(K), B, np.ones(K), n=N, name='S', plotter=bpplt.GaussianMarkovChainPlotter(scale=2), initialize=False) s = 10*np.random.randn(N,K) s[:,0] = 10 S.initialize_from_value(s) # # The model block for the latent states # # Projection matrix of the dynamics matrix # alpha : (K) x () alpha = Gamma(1e-5, 1e-5, plates=(D,K), name='alpha') alpha.initialize_from_value(1*np.ones((D,K))) # A : (D) x (D,K) A = GaussianARD(0, alpha, shape=(D,K), plates=(D,), name='A', plotter=bpplt.GaussianHintonPlotter(rows=0, cols=1, scale=0), initialize=False) # Initialize S and A such that A*S is almost an identity matrix a = np.zeros((D,D,K)) a[np.arange(D),np.arange(D),np.zeros(D,dtype=int)] = 1 a[:,:,0] = np.identity(D) / s[0,0] a[:,:,1:] = 0.1/s[0,0]*np.random.randn(D,D,K-1) A.initialize_from_value(a) # Latent states with dynamics # X : () x (N,D) X = VaryingGaussianMarkovChain(np.zeros(D), # mean of x0 1e-3*np.identity(D), # prec of x0 A, # dynamics matrices S._convert(GaussianMoments)[1:], # temporal weights np.ones(D), # innovation n=N, # time instances name='X', plotter=bpplt.GaussianMarkovChainPlotter(scale=2), initialize=False) X.initialize_from_value(np.random.randn(N,D)) # # The model block for observations # # Mixing matrix from latent space to observation space using ARD # gamma : (D) x () gamma = Gamma(1e-5, 1e-5, plates=(D,), name='gamma') gamma.initialize_from_value(1e-2*np.ones(D)) # C : (M,1) x (D) C = GaussianARD(0, gamma, shape=(D,), plates=(M,1), name='C', plotter=bpplt.GaussianHintonPlotter(rows=0, cols=2, scale=0)) C.initialize_from_value(np.random.randn(M,1,D)) # Noiseless process # F : (M,N) x () F = SumMultiply('d,d', C, X, name='F') # Observation noise # tau : () x () tau = Gamma(1e-5, 1e-5, name='tau') tau.initialize_from_value(1e2) # Observations # Y: (M,N) x () Y = GaussianARD(F, tau, name='Y') # Construct inference machine Q = VB(Y, F, C, gamma, X, A, alpha, tau, S, B, beta) return Q
def model(M, N, D, K): """ Construct the linear state-space model with time-varying dynamics For reference, see the following publication: (TODO) """ # # The model block for the latent mixing weight process # # Dynamics matrix with ARD # beta : (K) x () beta = Gamma(1e-5, 1e-5, plates=(K, ), name='beta') # B : (K) x (K) B = GaussianARD(np.identity(K), beta, shape=(K, ), plates=(K, ), name='B', plotter=bpplt.GaussianHintonPlotter(rows=0, cols=1, scale=0), initialize=False) B.initialize_from_value(np.identity(K)) # Mixing weight process, that is, the weights in the linear combination of # state dynamics matrices # S : () x (N,K) S = GaussianMarkovChain(np.ones(K), 1e-6 * np.identity(K), B, np.ones(K), n=N, name='S', plotter=bpplt.GaussianMarkovChainPlotter(scale=2), initialize=False) s = 10 * np.random.randn(N, K) s[:, 0] = 10 S.initialize_from_value(s) # # The model block for the latent states # # Projection matrix of the dynamics matrix # alpha : (K) x () alpha = Gamma(1e-5, 1e-5, plates=(D, K), name='alpha') alpha.initialize_from_value(1 * np.ones((D, K))) # A : (D) x (D,K) A = GaussianARD(0, alpha, shape=(D, K), plates=(D, ), name='A', plotter=bpplt.GaussianHintonPlotter(rows=0, cols=1, scale=0), initialize=False) # Initialize S and A such that A*S is almost an identity matrix a = np.zeros((D, D, K)) a[np.arange(D), np.arange(D), np.zeros(D, dtype=int)] = 1 a[:, :, 0] = np.identity(D) / s[0, 0] a[:, :, 1:] = 0.1 / s[0, 0] * np.random.randn(D, D, K - 1) A.initialize_from_value(a) # Latent states with dynamics # X : () x (N,D) X = VaryingGaussianMarkovChain( np.zeros(D), # mean of x0 1e-3 * np.identity(D), # prec of x0 A, # dynamics matrices S._convert(GaussianMoments)[1:], # temporal weights np.ones(D), # innovation n=N, # time instances name='X', plotter=bpplt.GaussianMarkovChainPlotter(scale=2), initialize=False) X.initialize_from_value(np.random.randn(N, D)) # # The model block for observations # # Mixing matrix from latent space to observation space using ARD # gamma : (D) x () gamma = Gamma(1e-5, 1e-5, plates=(D, ), name='gamma') gamma.initialize_from_value(1e-2 * np.ones(D)) # C : (M,1) x (D) C = GaussianARD(0, gamma, shape=(D, ), plates=(M, 1), name='C', plotter=bpplt.GaussianHintonPlotter(rows=0, cols=2, scale=0)) C.initialize_from_value(np.random.randn(M, 1, D)) # Noiseless process # F : (M,N) x () F = SumMultiply('d,d', C, X, name='F') # Observation noise # tau : () x () tau = Gamma(1e-5, 1e-5, name='tau') tau.initialize_from_value(1e2) # Observations # Y: (M,N) x () Y = GaussianARD(F, tau, name='Y') # Construct inference machine Q = VB(Y, F, C, gamma, X, A, alpha, tau, S, B, beta) return Q
def run(N=500, seed=42, maxiter=100, plot=True): """ Run deterministic annealing demo for 1-D Gaussian mixture. """ if seed is not None: np.random.seed(seed) mu = GaussianARD(0, 1, plates=(2,), name='means') Z = Categorical([0.3, 0.7], plates=(N,), name='classes') Y = Mixture(Z, GaussianARD, mu, 1, name='observations') # Generate data z = Z.random() data = np.empty(N) for n in range(N): data[n] = [4, -4][z[n]] Y.observe(data) # Initialize means closer to the inferior local optimum in which the # cluster means are swapped mu.initialize_from_value([0, 6]) Q = VB(Y, Z, mu) Q.save() # # Standard VB-EM algorithm # Q.update(repeat=maxiter) mu_vbem = mu.u[0].copy() L_vbem = Q.compute_lowerbound() # # VB-EM with deterministic annealing # Q.load() beta = 0.01 while beta < 1.0: beta = min(beta*1.2, 1.0) print("Set annealing to %.2f" % beta) Q.set_annealing(beta) Q.update(repeat=maxiter, tol=1e-4) mu_anneal = mu.u[0].copy() L_anneal = Q.compute_lowerbound() print("==============================") print("RESULTS FOR VB-EM vs ANNEALING") print("Fixed component probabilities:", np.array([0.3, 0.7])) print("True component means:", np.array([4, -4])) print("VB-EM component means:", mu_vbem) print("VB-EM lower bound:", L_vbem) print("Annealed VB-EM component means:", mu_anneal) print("Annealed VB-EM lower bound:", L_anneal) return