def model(M=10, N=100, D=3): """ Construct linear state-space model. See, for instance, the following publication: "Fast variational Bayesian linear state-space model" Luttinen (ECML 2013) """ # Dynamics matrix with ARD alpha = Gamma(1e-5, 1e-5, plates=(D, ), name='alpha') A = GaussianARD(0, alpha, shape=(D, ), plates=(D, ), plotter=bpplt.GaussianHintonPlotter(rows=0, cols=1, scale=0), name='A') A.initialize_from_value(np.identity(D)) # Latent states with dynamics X = GaussianMarkovChain( np.zeros(D), # mean of x0 1e-3 * np.identity(D), # prec of x0 A, # dynamics np.ones(D), # innovation n=N, # time instances plotter=bpplt.GaussianMarkovChainPlotter(scale=2), name='X') X.initialize_from_value(np.random.randn(N, D)) # Mixing matrix from latent space to observation space using ARD gamma = Gamma(1e-5, 1e-5, plates=(D, ), name='gamma') gamma.initialize_from_value(1e-2 * np.ones(D)) C = GaussianARD(0, gamma, shape=(D, ), plates=(M, 1), plotter=bpplt.GaussianHintonPlotter(rows=0, cols=2, scale=0), name='C') C.initialize_from_value(np.random.randn(M, 1, D)) # Observation noise tau = Gamma(1e-5, 1e-5, name='tau') tau.initialize_from_value(1e2) # Underlying noiseless function F = SumMultiply('i,i', C, X, name='F') # Noisy observations Y = GaussianARD(F, tau, name='Y') Q = VB(Y, F, C, gamma, X, A, alpha, tau, C) return Q
def model(M=10, N=100, D=3): """ Construct linear state-space model. See, for instance, the following publication: "Fast variational Bayesian linear state-space model" Luttinen (ECML 2013) """ # Dynamics matrix with ARD alpha = Gamma(1e-5, 1e-5, plates=(D,), name='alpha') A = GaussianARD(0, alpha, shape=(D,), plates=(D,), plotter=bpplt.GaussianHintonPlotter(rows=0, cols=1, scale=0), name='A') A.initialize_from_value(np.identity(D)) # Latent states with dynamics X = GaussianMarkovChain(np.zeros(D), # mean of x0 1e-3*np.identity(D), # prec of x0 A, # dynamics np.ones(D), # innovation n=N, # time instances plotter=bpplt.GaussianMarkovChainPlotter(scale=2), name='X') X.initialize_from_value(np.random.randn(N,D)) # Mixing matrix from latent space to observation space using ARD gamma = Gamma(1e-5, 1e-5, plates=(D,), name='gamma') gamma.initialize_from_value(1e-2*np.ones(D)) C = GaussianARD(0, gamma, shape=(D,), plates=(M,1), plotter=bpplt.GaussianHintonPlotter(rows=0, cols=2, scale=0), name='C') C.initialize_from_value(np.random.randn(M,1,D)) # Observation noise tau = Gamma(1e-5, 1e-5, name='tau') tau.initialize_from_value(1e2) # Underlying noiseless function F = SumMultiply('i,i', C, X, name='F') # Noisy observations Y = GaussianARD(F, tau, name='Y') Q = VB(Y, F, C, gamma, X, A, alpha, tau, C) return Q
def model(M=20, N=100, D=10, K=3): """ Construct the linear state-space model with switching dynamics. """ # # Switching dynamics (HMM) # # Prior for initial state probabilities rho = Dirichlet(1e-3 * np.ones(K), name='rho') # Prior for state transition probabilities V = Dirichlet(1e-3 * np.ones(K), plates=(K, ), name='V') v = 10 * np.identity(K) + 1 * np.ones((K, K)) v /= np.sum(v, axis=-1, keepdims=True) V.initialize_from_value(v) # Hidden states (with unknown initial state probabilities and state # transition probabilities) Z = CategoricalMarkovChain(rho, V, states=N - 1, name='Z', plotter=bpplt.CategoricalMarkovChainPlotter(), initialize=False) Z.u[0] = np.random.dirichlet(np.ones(K)) Z.u[1] = np.reshape( np.random.dirichlet(0.5 * np.ones(K * K), size=(N - 2)), (N - 2, K, K)) # # Linear state-space models # # Dynamics matrix with ARD # (K,D) x () alpha = Gamma(1e-5, 1e-5, plates=(K, 1, D), name='alpha') # (K,1,1,D) x (D) A = GaussianARD(0, alpha, shape=(D, ), plates=(K, D), name='A', plotter=bpplt.GaussianHintonPlotter()) A.initialize_from_value( np.identity(D) * np.ones((K, D, D)) + 0.1 * np.random.randn(K, D, D)) # Latent states with dynamics # (K,1) x (N,D) X = SwitchingGaussianMarkovChain( np.zeros(D), # mean of x0 1e-3 * np.identity(D), # prec of x0 A, # dynamics Z, # dynamics selection np.ones(D), # innovation n=N, # time instances name='X', plotter=bpplt.GaussianMarkovChainPlotter()) X.initialize_from_value(10 * np.random.randn(N, D)) # Mixing matrix from latent space to observation space using ARD # (K,1,1,D) x () gamma = Gamma(1e-5, 1e-5, plates=(D, ), name='gamma') # (K,M,1) x (D) C = GaussianARD(0, gamma, shape=(D, ), plates=(M, 1), name='C', plotter=bpplt.GaussianHintonPlotter(rows=-3, cols=-1)) C.initialize_from_value(np.random.randn(M, 1, D)) # Underlying noiseless function # (K,M,N) x () F = SumMultiply('i,i', C, X, name='F') # # Mixing the models # # Observation noise tau = Gamma(1e-5, 1e-5, name='tau') tau.initialize_from_value(1e2) # Emission/observation distribution Y = GaussianARD(F, tau, name='Y') Q = VB(Y, F, Z, rho, V, C, gamma, X, A, alpha, tau) return Q
def model(M=20, N=100, D=10, K=3): """ Construct the linear state-space model with switching dynamics. """ # # Switching dynamics (HMM) # # Prior for initial state probabilities rho = Dirichlet(1e-3*np.ones(K), name='rho') # Prior for state transition probabilities V = Dirichlet(1e-3*np.ones(K), plates=(K,), name='V') v = 10*np.identity(K) + 1*np.ones((K,K)) v /= np.sum(v, axis=-1, keepdims=True) V.initialize_from_value(v) # Hidden states (with unknown initial state probabilities and state # transition probabilities) Z = CategoricalMarkovChain(rho, V, states=N-1, name='Z', plotter=bpplt.CategoricalMarkovChainPlotter(), initialize=False) Z.u[0] = np.random.dirichlet(np.ones(K)) Z.u[1] = np.reshape(np.random.dirichlet(0.5*np.ones(K*K), size=(N-2)), (N-2, K, K)) # # Linear state-space models # # Dynamics matrix with ARD # (K,D) x () alpha = Gamma(1e-5, 1e-5, plates=(K,1,D), name='alpha') # (K,1,1,D) x (D) A = GaussianARD(0, alpha, shape=(D,), plates=(K,D), name='A', plotter=bpplt.GaussianHintonPlotter()) A.initialize_from_value(np.identity(D)*np.ones((K,D,D)) + 0.1*np.random.randn(K,D,D)) # Latent states with dynamics # (K,1) x (N,D) X = SwitchingGaussianMarkovChain(np.zeros(D), # mean of x0 1e-3*np.identity(D), # prec of x0 A, # dynamics Z, # dynamics selection np.ones(D), # innovation n=N, # time instances name='X', plotter=bpplt.GaussianMarkovChainPlotter()) X.initialize_from_value(10*np.random.randn(N,D)) # Mixing matrix from latent space to observation space using ARD # (K,1,1,D) x () gamma = Gamma(1e-5, 1e-5, plates=(D,), name='gamma') # (K,M,1) x (D) C = GaussianARD(0, gamma, shape=(D,), plates=(M,1), name='C', plotter=bpplt.GaussianHintonPlotter(rows=-3,cols=-1)) C.initialize_from_value(np.random.randn(M,1,D)) # Underlying noiseless function # (K,M,N) x () F = SumMultiply('i,i', C, X, name='F') # # Mixing the models # # Observation noise tau = Gamma(1e-5, 1e-5, name='tau') tau.initialize_from_value(1e2) # Emission/observation distribution Y = GaussianARD(F, tau, name='Y') Q = VB(Y, F, Z, rho, V, C, gamma, X, A, alpha, tau) return Q
def model(M, N, D, K): """ Construct the linear state-space model with time-varying dynamics For reference, see the following publication: (TODO) """ # # The model block for the latent mixing weight process # # Dynamics matrix with ARD # beta : (K) x () beta = Gamma(1e-5, 1e-5, plates=(K,), name='beta') # B : (K) x (K) B = GaussianARD(np.identity(K), beta, shape=(K,), plates=(K,), name='B', plotter=bpplt.GaussianHintonPlotter(rows=0, cols=1, scale=0), initialize=False) B.initialize_from_value(np.identity(K)) # Mixing weight process, that is, the weights in the linear combination of # state dynamics matrices # S : () x (N,K) S = GaussianMarkovChain(np.ones(K), 1e-6*np.identity(K), B, np.ones(K), n=N, name='S', plotter=bpplt.GaussianMarkovChainPlotter(scale=2), initialize=False) s = 10*np.random.randn(N,K) s[:,0] = 10 S.initialize_from_value(s) # # The model block for the latent states # # Projection matrix of the dynamics matrix # alpha : (K) x () alpha = Gamma(1e-5, 1e-5, plates=(D,K), name='alpha') alpha.initialize_from_value(1*np.ones((D,K))) # A : (D) x (D,K) A = GaussianARD(0, alpha, shape=(D,K), plates=(D,), name='A', plotter=bpplt.GaussianHintonPlotter(rows=0, cols=1, scale=0), initialize=False) # Initialize S and A such that A*S is almost an identity matrix a = np.zeros((D,D,K)) a[np.arange(D),np.arange(D),np.zeros(D,dtype=int)] = 1 a[:,:,0] = np.identity(D) / s[0,0] a[:,:,1:] = 0.1/s[0,0]*np.random.randn(D,D,K-1) A.initialize_from_value(a) # Latent states with dynamics # X : () x (N,D) X = VaryingGaussianMarkovChain(np.zeros(D), # mean of x0 1e-3*np.identity(D), # prec of x0 A, # dynamics matrices S._convert(GaussianMoments)[1:], # temporal weights np.ones(D), # innovation n=N, # time instances name='X', plotter=bpplt.GaussianMarkovChainPlotter(scale=2), initialize=False) X.initialize_from_value(np.random.randn(N,D)) # # The model block for observations # # Mixing matrix from latent space to observation space using ARD # gamma : (D) x () gamma = Gamma(1e-5, 1e-5, plates=(D,), name='gamma') gamma.initialize_from_value(1e-2*np.ones(D)) # C : (M,1) x (D) C = GaussianARD(0, gamma, shape=(D,), plates=(M,1), name='C', plotter=bpplt.GaussianHintonPlotter(rows=0, cols=2, scale=0)) C.initialize_from_value(np.random.randn(M,1,D)) # Noiseless process # F : (M,N) x () F = SumMultiply('d,d', C, X, name='F') # Observation noise # tau : () x () tau = Gamma(1e-5, 1e-5, name='tau') tau.initialize_from_value(1e2) # Observations # Y: (M,N) x () Y = GaussianARD(F, tau, name='Y') # Construct inference machine Q = VB(Y, F, C, gamma, X, A, alpha, tau, S, B, beta) return Q
def lssm(M, N, D, K=1, drift_C=False, drift_A=False): if (drift_C or drift_A) and not K > 0: raise ValueError("K must be positive integer when using drift") # Drift weights if drift_A or drift_C: # Dynamics matrix with ARD # beta : (K) x () beta = Gamma(1e-5, 1e-5, plates=(K,), name='beta') # B : (K) x (K) B = GaussianArrayARD(np.identity(K), beta, shape=(K,), plates=(K,), name='B', plotter=bpplt.GaussianHintonPlotter(rows=0, cols=1, scale=0), initialize=False) B.initialize_from_value(np.identity(K)) #B.initialize_from_mean_and_covariance(np.identity(K), # 0.1*np.identity(K)) # State of the drift, that is, temporal weights for dynamics matrices # S : () x (N,K) S = GaussianMarkovChain(np.ones(K), 1e-6*np.identity(K), B, np.ones(K), n=N, name='S', plotter=bpplt.GaussianMarkovChainPlotter(scale=2), initialize=False) #s = np.cumsum(np.random.randn(N,K), axis=0) s = np.random.randn(N,K) s[:,0] = 10 S.initialize_from_value(s) #S.initialize_from_value(np.ones((N,K))+0.01*np.random.randn(N,K)) if not drift_A: # Dynamic matrix # alpha: (D) x () alpha = Gamma(1e-5, 1e-5, plates=(D,), name='alpha') # A : (D) x (D) A = GaussianArrayARD(0, alpha, shape=(D,), plates=(D,), name='A', plotter=bpplt.GaussianHintonPlotter(rows=0, cols=1, scale=0), initialize=False) A.initialize_from_value(np.identity(D)) # Latent states with dynamics # X : () x (N,D) X = GaussianMarkovChain(np.zeros(D), # mean of x0 1e-3*np.identity(D), # prec of x0 A, # dynamics np.ones(D), # innovation n=N, # time instances name='X', plotter=bpplt.GaussianMarkovChainPlotter(), initialize=False) X.initialize_from_value(np.random.randn(N,D)) else: # Projection matrix of the dynamics matrix # alpha : (K) x () alpha = Gamma(1e-5, 1e-5, plates=(D,K), name='alpha') # A : (D) x (D,K) A = GaussianArrayARD(0, alpha, shape=(D,K), plates=(D,), name='A', plotter=bpplt.GaussianHintonPlotter(rows=0, cols=1, scale=0), initialize=False) # Initialize S and A such that A*S is almost an identity matrix a = np.zeros((D,D,K)) a[np.arange(D),np.arange(D),np.zeros(D,dtype=int)] = 1 a[:,:,0] = np.identity(D) / s[0,0] a[:,:,1:] = 0.1/s[0,0]*np.random.randn(D,D,K-1) A.initialize_from_value(a) #A.initialize_from_mean_and_covariance(a, # 0.1/s[0,0]**2*utils.identity(D,K)) #A.initialize_from_value(a + 0.01*np.random.randn(D,D,K)) # Latent states with dynamics # X : () x (N,D) X = DriftingGaussianMarkovChain(np.zeros(D), # mean of x0 1e-3*np.identity(D), # prec of x0 A, # dynamics matrices S.as_gaussian()[1:], # temporal weights np.ones(D), # innovation n=N, # time instances name='X', plotter=bpplt.GaussianMarkovChainPlotter(scale=2), initialize=False) X.initialize_from_value(np.random.randn(N,D)) if not drift_C: # Mixing matrix from latent space to observation space using ARD # gamma : (D) x () gamma = Gamma(1e-5, 1e-5, plates=(D,), name='gamma') # C : (M,1) x (D) C = GaussianArrayARD(0, gamma, shape=(D,), plates=(M,1), name='C', plotter=bpplt.GaussianHintonPlotter(rows=0, cols=2, scale=0)) C.initialize_from_value(np.random.randn(M,1,D)) #C.initialize_from_random() #C.initialize_from_mean_and_covariance(C.random(), # 0.1*utils.identity(D)) # Noiseless process # F : (M,N) x () F = SumMultiply('d,d', C, X.as_gaussian(), name='F') else: # Mixing matrix from latent space to observation space using ARD # gamma : (D,K) x () gamma = Gamma(1e-5, 1e-5, plates=(D,K), name='gamma') # C : (M,1) x (D,K) C = GaussianArrayARD(0, gamma, shape=(D,K), plates=(M,1), name='C', plotter=bpplt.GaussianHintonPlotter(rows=0, cols=2, scale=0)) C.initialize_from_random() #C.initialize_from_mean_and_covariance(C.random(), # 0.1*utils.identity(D, K)) # Noiseless process # F : (M,N) x () F = SumMultiply('dk,d,k', C, X.as_gaussian(), S.as_gaussian(), name='F') # Observation noise # tau : () x () tau = Gamma(1e-5, 1e-5, name='tau') tau.initialize_from_value(1e2) # Observations # Y: (M,N) x () Y = GaussianArrayARD(F, tau, name='Y') # Construct inference machine if drift_C or drift_A: Q = VB(Y, F, C, gamma, X, A, alpha, tau, S, B, beta) else: Q = VB(Y, F, C, gamma, X, A, alpha, tau) return Q
def model(M, N, D, K): """ Construct the linear state-space model with time-varying dynamics For reference, see the following publication: (TODO) """ # # The model block for the latent mixing weight process # # Dynamics matrix with ARD # beta : (K) x () beta = Gamma(1e-5, 1e-5, plates=(K, ), name='beta') # B : (K) x (K) B = GaussianARD(np.identity(K), beta, shape=(K, ), plates=(K, ), name='B', plotter=bpplt.GaussianHintonPlotter(rows=0, cols=1, scale=0), initialize=False) B.initialize_from_value(np.identity(K)) # Mixing weight process, that is, the weights in the linear combination of # state dynamics matrices # S : () x (N,K) S = GaussianMarkovChain(np.ones(K), 1e-6 * np.identity(K), B, np.ones(K), n=N, name='S', plotter=bpplt.GaussianMarkovChainPlotter(scale=2), initialize=False) s = 10 * np.random.randn(N, K) s[:, 0] = 10 S.initialize_from_value(s) # # The model block for the latent states # # Projection matrix of the dynamics matrix # alpha : (K) x () alpha = Gamma(1e-5, 1e-5, plates=(D, K), name='alpha') alpha.initialize_from_value(1 * np.ones((D, K))) # A : (D) x (D,K) A = GaussianARD(0, alpha, shape=(D, K), plates=(D, ), name='A', plotter=bpplt.GaussianHintonPlotter(rows=0, cols=1, scale=0), initialize=False) # Initialize S and A such that A*S is almost an identity matrix a = np.zeros((D, D, K)) a[np.arange(D), np.arange(D), np.zeros(D, dtype=int)] = 1 a[:, :, 0] = np.identity(D) / s[0, 0] a[:, :, 1:] = 0.1 / s[0, 0] * np.random.randn(D, D, K - 1) A.initialize_from_value(a) # Latent states with dynamics # X : () x (N,D) X = VaryingGaussianMarkovChain( np.zeros(D), # mean of x0 1e-3 * np.identity(D), # prec of x0 A, # dynamics matrices S._convert(GaussianMoments)[1:], # temporal weights np.ones(D), # innovation n=N, # time instances name='X', plotter=bpplt.GaussianMarkovChainPlotter(scale=2), initialize=False) X.initialize_from_value(np.random.randn(N, D)) # # The model block for observations # # Mixing matrix from latent space to observation space using ARD # gamma : (D) x () gamma = Gamma(1e-5, 1e-5, plates=(D, ), name='gamma') gamma.initialize_from_value(1e-2 * np.ones(D)) # C : (M,1) x (D) C = GaussianARD(0, gamma, shape=(D, ), plates=(M, 1), name='C', plotter=bpplt.GaussianHintonPlotter(rows=0, cols=2, scale=0)) C.initialize_from_value(np.random.randn(M, 1, D)) # Noiseless process # F : (M,N) x () F = SumMultiply('d,d', C, X, name='F') # Observation noise # tau : () x () tau = Gamma(1e-5, 1e-5, name='tau') tau.initialize_from_value(1e2) # Observations # Y: (M,N) x () Y = GaussianARD(F, tau, name='Y') # Construct inference machine Q = VB(Y, F, C, gamma, X, A, alpha, tau, S, B, beta) return Q