def test_gaussian(): prior_data = 2*np.random.randn(5,2) + np.array([1.,3.]) a = Gaussian().empirical_bayes(prior_data) # data = a.rvs(10) gibbs_statistics = [] for itr in range(20000): a.resample() # a.resample(data) gibbs_statistics.append(a.mu) gibbs_statistics = np.array(gibbs_statistics) b = AnnealedGaussianModel().empirical_bayes(prior_data) # b.add_data(data) pt = ParallelTempering(b,[5.]) pt_samples = pt.run(20000,1) pt_statistics = np.array([m.mu for m in pt_samples]) fig = plt.figure() testing.populations_eq_quantile_plot(gibbs_statistics,pt_statistics,fig=fig) plt.savefig('gaussian_test.png') testing.assert_populations_eq_moments(gibbs_statistics,pt_statistics), \ 'Annealing MAY have failed, check FIGURES'
def test_gaussian(): prior_data = 2 * np.random.randn(5, 2) + np.array([1., 3.]) a = Gaussian().empirical_bayes(prior_data) # data = a.rvs(10) gibbs_statistics = [] for itr in range(20000): a.resample() # a.resample(data) gibbs_statistics.append(a.mu) gibbs_statistics = np.array(gibbs_statistics) b = AnnealedGaussianModel().empirical_bayes(prior_data) # b.add_data(data) pt = ParallelTempering(b, [5.]) pt_samples = pt.run(20000, 1) pt_statistics = np.array([m.mu for m in pt_samples]) fig = plt.figure() testing.populations_eq_quantile_plot(gibbs_statistics, pt_statistics, fig=fig) plt.savefig('gaussian_test.png') testing.assert_populations_eq_moments(gibbs_statistics,pt_statistics), \ 'Annealing MAY have failed, check FIGURES'
class StickbreakingCorrelatedLDA(_LDABase): "Correlated LDA with the stick breaking representation" def __init__(self, data, T, alpha_beta): mu, sigma = compute_uniform_mean_psi(T) self.theta_prior = Gaussian( mu=mu, sigma=sigma, mu_0=mu, sigma_0=T * sigma / 10.0, nu_0=T / 10.0, kappa_0=1.0 / 10 ) self.ppgs = initialize_polya_gamma_samplers() self.omega = np.zeros((data.shape[0], T - 1)) super(StickbreakingCorrelatedLDA, self).__init__(data, T, alpha_beta) @property def theta(self): return psi_to_pi(self.psi) @theta.setter def theta(self, theta): self.psi = pi_to_psi(theta) def initialize_theta(self): self.psi = np.tile(self.theta_prior.mu, (self.D, 1)) def resample_theta(self): self.resample_omega() self.resample_psi() def resample(self): super(StickbreakingCorrelatedLDA, self).resample() self.resample_theta_prior() def resample_omega(self): pgdrawvpar( self.ppgs, N_vec(self.doc_topic_counts).astype("float64").ravel(), self.psi.ravel(), self.omega.ravel() ) np.clip(self.omega, 1e-32, np.inf, out=self.omega) def resample_psi(self): Lmbda = np.linalg.inv(self.theta_prior.sigma) h = Lmbda.dot(self.theta_prior.mu) randvec = np.random.randn(self.D, self.T - 1) # pre-generate randomness for d, c in enumerate(self.doc_topic_counts): self.psi[d] = sample_infogaussian(Lmbda + np.diag(self.omega[d]), h + kappa_vec(c), randvec[d]) def resample_theta_prior(self): self.theta_prior.resample(self.psi) def copy_sample(self): new = copy.copy(self) new.beta = self.beta.copy() new.psi = self.psi.copy() new.theta_prior = self.theta_prior.copy_sample() del new.z del new.omega return new
class NIWGaussianWeightDistribution(GaussianWeightDistribution, GibbsSampling): """ Gaussian weight distribution with a normal inverse-Wishart prior. """ # TODO: Specify the self weight parameters in the constructor def __init__(self, N, B=1, mu_0=None, Sigma_0=None, nu_0=None, kappa_0=None): super(NIWGaussianWeightDistribution, self).__init__(N) self.B = B if mu_0 is None: mu_0 = np.zeros(B) if Sigma_0 is None: Sigma_0 = np.eye(B) if nu_0 is None: nu_0 = B + 2 if kappa_0 is None: kappa_0 = 1.0 self._gaussian = Gaussian(mu_0=mu_0, sigma_0=Sigma_0, nu_0=nu_0, kappa_0=kappa_0) # Special case self-weights (along the diagonal) self._self_gaussian = Gaussian(mu_0=mu_0, sigma_0=Sigma_0, nu_0=nu_0, kappa_0=kappa_0) @property def Mu(self): mu = self._gaussian.mu Mu = np.tile(mu[None,None,:], (self.N, self.N,1)) for n in xrange(self.N): Mu[n,n,:] = self._self_gaussian.mu return Mu @property def Sigma(self): sig = self._gaussian.sigma Sig = np.tile(sig[None,None,:,:], (self.N, self.N,1,1)) for n in xrange(self.N): Sig[n,n,:,:] = self._self_gaussian.sigma return Sig def initialize_from_prior(self): self._gaussian.resample() self._self_gaussian.resample() def initialize_hypers(self, W): # self.B = W.shape[2] mu_0 = W.mean(axis=(0,1)) sigma_0 = np.diag(W.var(axis=(0,1))) self._gaussian.mu_0 = mu_0 self._gaussian.sigma_0 = sigma_0 self._gaussian.resample() # self._gaussian.nu_0 = self.B + 2 W_self = W[np.arange(self.N), np.arange(self.N)] self._self_gaussian.mu_0 = W_self.mean(axis=0) self._self_gaussian.sigma_0 = np.diag(W_self.var(axis=0)) self._self_gaussian.resample() # self._self_gaussian.nu_0 = self.B + 2 def log_prior(self): from graphistician.internals.utils import normal_inverse_wishart_log_prob lp = 0 lp += normal_inverse_wishart_log_prob(self._gaussian) lp += normal_inverse_wishart_log_prob(self._self_gaussian) return lp def sample_predictive_parameters(self): Murow = Mucol = np.tile(self._gaussian.mu[None,:], (self.N+1,1)) Lrow = Lcol = np.tile(self._gaussian.sigma_chol[None,:,:], (self.N+1,1,1)) Murow[-1,:] = self._self_gaussian.mu Mucol[-1,:] = self._self_gaussian.mu Lrow[-1,:,:] = self._self_gaussian.sigma_chol Lcol[-1,:,:] = self._self_gaussian.sigma_chol return Murow, Mucol, Lrow, Lcol def resample(self, (A,W)): # Resample the Normal-inverse Wishart prior over mu and W # given W for which A=1 A_offdiag = A.copy() np.fill_diagonal(A_offdiag, 0) A_ondiag = A * np.eye(self.N) self._gaussian.resample(W[A_offdiag==1]) self._self_gaussian.resample(W[A_ondiag==1])
class SBMGaussianWeightDistribution(GaussianWeightDistribution, GibbsSampling): """ A stochastic block model is a clustered network model with C: Number of blocks m[c]: Probability that a node belongs block c mu[c,c']: Mean weight from node in block c to node in block c' Sig[c,c']: Cov of weight from node in block c to node in block c' It has hyperparameters: pi: Parameter of Dirichlet prior over m mu0, nu0, kappa0, Sigma0: Parameters of NIW prior over (mu,Sig) """ # TODO: Specify the self weight parameters in the constructor def __init__(self, N, B=1, C=2, pi=10.0, mu_0=None, Sigma_0=None, nu_0=None, kappa_0=None, special_case_self_conns=True): """ Initialize SBM with parameters defined above. """ super(SBMGaussianWeightDistribution, self).__init__(N) self.B = B assert isinstance(C, int) and C >= 1, "C must be a positive integer number of blocks" self.C = C if isinstance(pi, (int, float)): self.pi = pi * np.ones(C) else: assert isinstance(pi, np.ndarray) and pi.shape == (C,), "pi must be a sclar or a C-vector" self.pi = pi self.m = np.random.dirichlet(self.pi) self.c = np.random.choice(self.C, p=self.m, size=(self.N)) if mu_0 is None: mu_0 = np.zeros(B) if Sigma_0 is None: Sigma_0 = np.eye(B) if nu_0 is None: nu_0 = B + 2 if kappa_0 is None: kappa_0 = 1.0 self._gaussians = [[Gaussian(mu_0=mu_0, nu_0=nu_0, kappa_0=kappa_0, sigma_0=Sigma_0) for _ in xrange(C)] for _ in xrange(C)] # Special case self-weights (along the diagonal) self.special_case_self_conns = special_case_self_conns if special_case_self_conns: self._self_gaussian = Gaussian(mu_0=mu_0, sigma_0=Sigma_0, nu_0=nu_0, kappa_0=kappa_0) @property def _Mu(self): return np.array([[self._gaussians[c1][c2].mu for c2 in xrange(self.C)] for c1 in xrange(self.C)]) @property def _Sigma(self): return np.array([[self._gaussians[c1][c2].sigma for c2 in xrange(self.C)] for c1 in xrange(self.C)]) @property def Mu(self): """ Get the NxNxB matrix of weight means :return: """ _Mu = self._Mu Mu = _Mu[np.ix_(self.c, self.c)] if self.special_case_self_conns: for n in xrange(self.N): Mu[n,n] = self._self_gaussian.mu return Mu @property def Sigma(self): """ Get the NxNxBxB matrix of weight covariances :return: """ _Sigma = self._Sigma Sigma = _Sigma[np.ix_(self.c, self.c)] if self.special_case_self_conns: for n in xrange(self.N): Sigma[n,n] = self._self_gaussian.sigma return Sigma def initialize_from_prior(self): self.m = np.random.dirichlet(self.pi) self.c = np.random.choice(self.C, p=self.m, size=(self.N)) for c1 in xrange(self.C): for c2 in xrange(self.C): self._gaussians[c1][c2].resample() if self.special_case_self_conns: self._self_gaussian.resample() def initialize_hypers(self, W): mu_0 = W.mean(axis=(0,1)) sigma_0 = np.diag(W.var(axis=(0,1))) for c1 in xrange(self.C): for c2 in xrange(self.C): nu_0 = self._gaussians[c1][c2].nu_0 self._gaussians[c1][c2].mu_0 = mu_0 self._gaussians[c1][c2].sigma_0 = sigma_0 * (nu_0 - self.B - 1) / self.C self._gaussians[c1][c2].resample() if self.special_case_self_conns: W_self = W[np.arange(self.N), np.arange(self.N)] self._self_gaussian.mu_0 = W_self.mean(axis=0) self._self_gaussian.sigma_0 = np.diag(W_self.var(axis=0)) self._self_gaussian.resample() # Cluster the neurons based on their rows and columns from sklearn.cluster import KMeans features = np.hstack((W[:,:,0], W[:,:,0].T)) km = KMeans(n_clusters=self.C) km.fit(features) self.c = km.labels_.astype(np.int) print "Initial c: ", self.c def _get_mask(self, A, c1, c2): mask = ((self.c==c1)[:,None] * (self.c==c2)[None,:]) mask &= A.astype(np.bool) if self.special_case_self_conns: mask &= True - np.eye(self.N, dtype=np.bool) return mask def log_likelihood(self, (A,W)): N = self.N assert A.shape == (N,N) assert W.shape == (N,N,self.B) ll = 0 for c1 in xrange(self.C): for c2 in xrange(self.C): mask = self._get_mask(A, c1, c2) ll += self._gaussians[c1][c2].log_likelihood(W[mask]).sum() if self.special_case_self_conns: mask = np.eye(self.N).astype(np.bool) & A.astype(np.bool) ll += self._self_gaussian.log_likelihood(W[mask]).sum() return ll
class NIWGaussianWeightDistribution(GaussianWeightDistribution, GibbsSampling): """ Gaussian weight distribution with a normal inverse-Wishart prior. """ # TODO: Specify the self weight parameters in the constructor def __init__(self, N, B=1, mu_0=None, Sigma_0=None, nu_0=None, kappa_0=None): super(NIWGaussianWeightDistribution, self).__init__(N) self.B = B if mu_0 is None: mu_0 = np.zeros(B) if Sigma_0 is None: Sigma_0 = np.eye(B) if nu_0 is None: nu_0 = B + 2 if kappa_0 is None: kappa_0 = 1.0 self._gaussian = Gaussian(mu_0=mu_0, sigma_0=Sigma_0, nu_0=nu_0, kappa_0=kappa_0) # Special case self-weights (along the diagonal) self._self_gaussian = Gaussian(mu_0=mu_0, sigma_0=Sigma_0, nu_0=nu_0, kappa_0=kappa_0) @property def Mu(self): mu = self._gaussian.mu Mu = np.tile(mu[None, None, :], (self.N, self.N, 1)) for n in xrange(self.N): Mu[n, n, :] = self._self_gaussian.mu return Mu @property def Sigma(self): sig = self._gaussian.sigma Sig = np.tile(sig[None, None, :, :], (self.N, self.N, 1, 1)) for n in xrange(self.N): Sig[n, n, :, :] = self._self_gaussian.sigma return Sig def initialize_from_prior(self): self._gaussian.resample() self._self_gaussian.resample() def initialize_hypers(self, W): # self.B = W.shape[2] mu_0 = W.mean(axis=(0, 1)) sigma_0 = np.diag(W.var(axis=(0, 1))) self._gaussian.mu_0 = mu_0 self._gaussian.sigma_0 = sigma_0 self._gaussian.resample() # self._gaussian.nu_0 = self.B + 2 W_self = W[np.arange(self.N), np.arange(self.N)] self._self_gaussian.mu_0 = W_self.mean(axis=0) self._self_gaussian.sigma_0 = np.diag(W_self.var(axis=0)) self._self_gaussian.resample() # self._self_gaussian.nu_0 = self.B + 2 def log_prior(self): from graphistician.internals.utils import normal_inverse_wishart_log_prob lp = 0 lp += normal_inverse_wishart_log_prob(self._gaussian) lp += normal_inverse_wishart_log_prob(self._self_gaussian) return lp def sample_predictive_parameters(self): Murow = Mucol = np.tile(self._gaussian.mu[None, :], (self.N + 1, 1)) Lrow = Lcol = np.tile(self._gaussian.sigma_chol[None, :, :], (self.N + 1, 1, 1)) Murow[-1, :] = self._self_gaussian.mu Mucol[-1, :] = self._self_gaussian.mu Lrow[-1, :, :] = self._self_gaussian.sigma_chol Lcol[-1, :, :] = self._self_gaussian.sigma_chol return Murow, Mucol, Lrow, Lcol def resample(self, (A, W)): # Resample the Normal-inverse Wishart prior over mu and W # given W for which A=1 A_offdiag = A.copy() np.fill_diagonal(A_offdiag, 0) A_ondiag = A * np.eye(self.N) self._gaussian.resample(W[A_offdiag == 1]) self._self_gaussian.resample(W[A_ondiag == 1])
class SBMGaussianWeightDistribution(GaussianWeightDistribution, GibbsSampling): """ A stochastic block model is a clustered network model with C: Number of blocks m[c]: Probability that a node belongs block c mu[c,c']: Mean weight from node in block c to node in block c' Sig[c,c']: Cov of weight from node in block c to node in block c' It has hyperparameters: pi: Parameter of Dirichlet prior over m mu0, nu0, kappa0, Sigma0: Parameters of NIW prior over (mu,Sig) """ # TODO: Specify the self weight parameters in the constructor def __init__(self, N, B=1, C=2, pi=10.0, mu_0=None, Sigma_0=None, nu_0=None, kappa_0=None, special_case_self_conns=True): """ Initialize SBM with parameters defined above. """ super(SBMGaussianWeightDistribution, self).__init__(N) self.B = B assert isinstance( C, int) and C >= 1, "C must be a positive integer number of blocks" self.C = C if isinstance(pi, (int, float)): self.pi = pi * np.ones(C) else: assert isinstance(pi, np.ndarray) and pi.shape == ( C, ), "pi must be a sclar or a C-vector" self.pi = pi self.m = np.random.dirichlet(self.pi) self.c = np.random.choice(self.C, p=self.m, size=(self.N)) if mu_0 is None: mu_0 = np.zeros(B) if Sigma_0 is None: Sigma_0 = np.eye(B) if nu_0 is None: nu_0 = B + 2 if kappa_0 is None: kappa_0 = 1.0 self._gaussians = [[ Gaussian(mu_0=mu_0, nu_0=nu_0, kappa_0=kappa_0, sigma_0=Sigma_0) for _ in xrange(C) ] for _ in xrange(C)] # Special case self-weights (along the diagonal) self.special_case_self_conns = special_case_self_conns if special_case_self_conns: self._self_gaussian = Gaussian(mu_0=mu_0, sigma_0=Sigma_0, nu_0=nu_0, kappa_0=kappa_0) @property def _Mu(self): return np.array([[self._gaussians[c1][c2].mu for c2 in xrange(self.C)] for c1 in xrange(self.C)]) @property def _Sigma(self): return np.array( [[self._gaussians[c1][c2].sigma for c2 in xrange(self.C)] for c1 in xrange(self.C)]) @property def Mu(self): """ Get the NxNxB matrix of weight means :return: """ _Mu = self._Mu Mu = _Mu[np.ix_(self.c, self.c)] if self.special_case_self_conns: for n in xrange(self.N): Mu[n, n] = self._self_gaussian.mu return Mu @property def Sigma(self): """ Get the NxNxBxB matrix of weight covariances :return: """ _Sigma = self._Sigma Sigma = _Sigma[np.ix_(self.c, self.c)] if self.special_case_self_conns: for n in xrange(self.N): Sigma[n, n] = self._self_gaussian.sigma return Sigma def initialize_from_prior(self): self.m = np.random.dirichlet(self.pi) self.c = np.random.choice(self.C, p=self.m, size=(self.N)) for c1 in xrange(self.C): for c2 in xrange(self.C): self._gaussians[c1][c2].resample() if self.special_case_self_conns: self._self_gaussian.resample() def initialize_hypers(self, W): mu_0 = W.mean(axis=(0, 1)) sigma_0 = np.diag(W.var(axis=(0, 1))) for c1 in xrange(self.C): for c2 in xrange(self.C): nu_0 = self._gaussians[c1][c2].nu_0 self._gaussians[c1][c2].mu_0 = mu_0 self._gaussians[c1][c2].sigma_0 = sigma_0 * (nu_0 - self.B - 1) / self.C self._gaussians[c1][c2].resample() if self.special_case_self_conns: W_self = W[np.arange(self.N), np.arange(self.N)] self._self_gaussian.mu_0 = W_self.mean(axis=0) self._self_gaussian.sigma_0 = np.diag(W_self.var(axis=0)) self._self_gaussian.resample() # Cluster the neurons based on their rows and columns from sklearn.cluster import KMeans features = np.hstack((W[:, :, 0], W[:, :, 0].T)) km = KMeans(n_clusters=self.C) km.fit(features) self.c = km.labels_.astype(np.int) print "Initial c: ", self.c def _get_mask(self, A, c1, c2): mask = ((self.c == c1)[:, None] * (self.c == c2)[None, :]) mask &= A.astype(np.bool) if self.special_case_self_conns: mask &= True - np.eye(self.N, dtype=np.bool) return mask def log_likelihood(self, (A, W)): N = self.N assert A.shape == (N, N) assert W.shape == (N, N, self.B) ll = 0 for c1 in xrange(self.C): for c2 in xrange(self.C): mask = self._get_mask(A, c1, c2) ll += self._gaussians[c1][c2].log_likelihood(W[mask]).sum() if self.special_case_self_conns: mask = np.eye(self.N).astype(np.bool) & A.astype(np.bool) ll += self._self_gaussian.log_likelihood(W[mask]).sum() return ll
class LinearDynamicalSystemBackground(Component): """ Linear Dynamical System model for the background activation. Since the potentials for the activation are of a Gaussian form, we can perform conjugate Gibbs sampling or variational inference for a Gaussian LDS model. """ def __init__(self, population, D=2, A=None, C=None, sigma_states=None, sigma_C=1.0): self.population = population self.activation = population.activation_model self.N = self.population.N self.D = D from pybasicbayes.distributions import Gaussian self.init_dynamics_distn = Gaussian(mu_0=np.ones(D), kappa_0=1.0, sigma_0=0.000001 * np.eye(D), nu_0=3.0) from autoregressive.distributions import AutoRegression self.dynamics_distn = AutoRegression(A=A, sigma=sigma_states, nu_0=D + 1.0, S_0=0.5 * np.eye(D), M_0=np.zeros((D, D)), K_0=0.5 * np.eye(D)) # Initialize the emission matrix if C is None: self.C = sigma_C * np.random.randn(self.N, self.D) else: assert C.shape == (self.N, self.D) self.C = C self.sigma_C = sigma_C def augment_data(self, augmented_data): # Add a latent state sequence augmented_data["states"] = self.generate_states(augmented_data["T"]) def log_likelihood(self, augmented_data): raise NotImplementedError def generate(self, T): states = self.generate_states(T) return states.dot(self.C.T) def generate_states(self, T): stateseq = np.empty((T, self.D)) stateseq[0] = self.init_dynamics_distn.rvs() chol = np.linalg.cholesky(self.dynamics_distn.sigma) randseq = np.random.randn(T - 1, self.D) for t in xrange(1, T): stateseq[t] = \ self.dynamics_distn.A.dot(stateseq[t-1]) \ + chol.dot(randseq[t-1]) return stateseq def mean_background_activation(self, augmented_data): return augmented_data["states"].dot(self.C.T) def resample(self, augmented_data_list): self.resample_states(augmented_data_list) self.resample_parameters(augmented_data_list) def resample_states(self, augmented_data_list): from pylds.lds_messages import filter_and_sample for data in augmented_data_list: # Compute the residual activation from other components psi = self.activation.compute_psi(data) psi_residual = psi - self.mean_background_activation(data) # Get the observed mean and variance mu_obs = self.activation.new_mean(data) prec_obs = self.activation.new_precision(data) # Subtract off the activation from other components mu_obs -= psi_residual # Convert prec_obs into an array of diagonal covariance matrices sigma_obs = np.empty((data["T"], self.N, self.N), order="C") for t in xrange(data["T"]): sigma_obs[t, :, :] = np.diag(1. / prec_obs[t, :]) data["states"] = filter_and_sample(self.init_dynamics_distn.mu, self.init_dynamics_distn.sigma, self.dynamics_distn.A, self.dynamics_distn.sigma, self.C, sigma_obs, mu_obs) def resample_parameters(self, augmented_data_list): self.resample_init_dynamics_distn(augmented_data_list) self.resample_dynamics_distn(augmented_data_list) self.resample_emission_distn(augmented_data_list) def resample_init_dynamics_distn(self, augmented_data_list): states_list = [ad["states"][0] for ad in augmented_data_list] self.init_dynamics_distn.resample(states_list) def resample_dynamics_distn(self, augmented_data_list): from pyhsmm.util.general import AR_striding states_list = [ad["states"] for ad in augmented_data_list] strided_states_list = [AR_striding(s, 1) for s in states_list] self.dynamics_distn.resample(strided_states_list) def resample_emission_distn(self, augmented_data_list): """ Resample the observation vectors. Since the emission noise is diagonal, we can resample the columns of C independently :return: """ # Get the prior prior_precision = 1. / self.sigma_C * np.eye(self.D) prior_mean = np.zeros(self.D) prior_mean_dot_precision = prior_mean.dot(prior_precision) # Get the sufficient statistics from the likelihood lkhd_precision = np.zeros((self.N, self.D, self.D)) lkhd_mean_dot_precision = np.zeros((self.N, self.D)) for data in augmented_data_list: # Compute the residual activation from other components psi = self.activation.compute_psi(data) psi_residual = psi - self.mean_background_activation(data) # Get the observed mean and variance mu_obs = self.activation.new_mean(data) prec_obs = self.activation.new_precision(data) # Subtract off the residual mu_obs -= psi_residual # Update the sufficient statistics for each neuron for n in xrange(self.N): lkhd_precision[n, :, :] += (data["states"] * prec_obs[:, n][:, None]).T.dot( data["states"]) lkhd_mean_dot_precision[n,:] += \ (mu_obs[:,n] * prec_obs[:,n]).T.dot(data["states"]) # Sample each column of C for n in xrange(self.N): post_prec = prior_precision + lkhd_precision[n, :, :] post_cov = np.linalg.inv(post_prec) post_mu = (prior_mean_dot_precision + lkhd_mean_dot_precision[n, :]).dot(post_cov) post_mu = post_mu.ravel() self.C[n, :] = np.random.multivariate_normal(post_mu, post_cov) ### Variational inference def meanfieldupdate(self, augmented_data): raise NotImplementedError def get_vlb(self, augmented_data): raise NotImplementedError def resample_from_mf(self, augmented_data): raise NotImplementedError def svi_step(self, augmented_data, minibatchfrac, stepsize): raise NotImplementedError
class LogisticNormalCorrelatedLDA(_LDABase): "Correlated LDA with the stick breaking representation" def __init__(self, data, T, alpha_beta): mu, sigma = np.zeros(T), np.eye(T) self.theta_prior = \ Gaussian( mu=mu, sigma=sigma, mu_0=mu, sigma_0=T*sigma/10., nu_0=T/10., kappa_0=10.) self.ppgs = initialize_polya_gamma_samplers() self.omega = np.zeros((data.shape[0], T)) super(LogisticNormalCorrelatedLDA, self).__init__(data, T, alpha_beta) @property def theta(self): return ln_psi_to_pi(self.psi) @theta.setter def theta(self, theta): self.psi = ln_pi_to_psi(theta) def initialize_theta(self): self.psi = np.tile(self.theta_prior.mu, (self.D, 1)) def resample_theta(self): self.resample_psi_and_omega() def resample(self): super(LogisticNormalCorrelatedLDA, self).resample() self.resample_theta_prior() def resample_psi_and_omega(self): Lmbda = np.linalg.inv(self.theta_prior.sigma) for d in xrange(self.D): N = self.data[d].sum() c = self.doc_topic_counts[d] for t in xrange(self.T): self.omega[d, t] = self.ppgs[0].pgdraw( N, self._conditional_omega(d, t)) mu_cond, sigma_cond = self._conditional_psi(d, t, Lmbda, N, c) self.psi[d, t] = np.random.normal(mu_cond, np.sqrt(sigma_cond)) def _conditional_psi(self, d, t, Lmbda, N, c): nott = np.arange(self.T) != t psi = self.psi[d] omega = self.omega[d] mu = self.theta_prior.mu zetat = logsumexp(psi[nott]) mut_marg = mu[t] - 1. / Lmbda[t, t] * Lmbda[t, nott].dot(psi[nott] - mu[nott]) sigmat_marg = 1. / Lmbda[t, t] sigmat_cond = 1. / (omega[t] + 1. / sigmat_marg) # kappa is the mean dot precision, i.e. the sufficient statistic of a Gaussian # therefore we can sum over datapoints kappa = (c[t] - N / 2.0).sum() mut_cond = sigmat_cond * (kappa + mut_marg / sigmat_marg + omega[t] * zetat) return mut_cond, sigmat_cond def _conditional_omega(self, d, t): nott = np.arange(self.T) != t psi = self.psi[d] zetat = logsumexp(psi[nott]) return psi[t] - zetat def resample_theta_prior(self): self.theta_prior.resample(self.psi) def copy_sample(self): new = copy.copy(self) new.beta = self.beta.copy() new.psi = self.psi.copy() new.theta_prior = self.theta_prior.copy_sample() del new.z del new.omega return new
class StickbreakingCorrelatedLDA(_LDABase): "Correlated LDA with the stick breaking representation" def __init__(self, data, T, alpha_beta): mu, sigma = compute_uniform_mean_psi(T) self.theta_prior = Gaussian(mu=mu, sigma=sigma, mu_0=mu, sigma_0=T * sigma / 10., nu_0=T / 10., kappa_0=1. / 10) self.ppgs = initialize_polya_gamma_samplers() self.omega = np.zeros((data.shape[0], T - 1)) super(StickbreakingCorrelatedLDA, self).__init__(data, T, alpha_beta) @property def theta(self): return psi_to_pi(self.psi) @theta.setter def theta(self, theta): self.psi = pi_to_psi(theta) def initialize_theta(self): self.psi = np.tile(self.theta_prior.mu, (self.D, 1)) def resample_theta(self): self.resample_omega() self.resample_psi() def resample(self): super(StickbreakingCorrelatedLDA, self).resample() self.resample_theta_prior() def resample_omega(self): pgdrawvpar(self.ppgs, N_vec(self.doc_topic_counts).astype('float64').ravel(), self.psi.ravel(), self.omega.ravel()) np.clip(self.omega, 1e-32, np.inf, out=self.omega) def resample_psi(self): Lmbda = np.linalg.inv(self.theta_prior.sigma) h = Lmbda.dot(self.theta_prior.mu) randvec = np.random.randn(self.D, self.T - 1) # pre-generate randomness for d, c in enumerate(self.doc_topic_counts): self.psi[d] = sample_infogaussian(Lmbda + np.diag(self.omega[d]), h + kappa_vec(c), randvec[d]) def resample_theta_prior(self): self.theta_prior.resample(self.psi) def copy_sample(self): new = copy.copy(self) new.beta = self.beta.copy() new.psi = self.psi.copy() new.theta_prior = self.theta_prior.copy_sample() del new.z del new.omega return new
class LinearDynamicalSystemBackground(Component): """ Linear Dynamical System model for the background activation. Since the potentials for the activation are of a Gaussian form, we can perform conjugate Gibbs sampling or variational inference for a Gaussian LDS model. """ def __init__(self, population, D=2, A=None, C=None, sigma_states=None, sigma_C=1.0): self.population = population self.activation = population.activation_model self.N = self.population.N self.D = D from pybasicbayes.distributions import Gaussian self.init_dynamics_distn = Gaussian(mu_0=np.ones(D), kappa_0=1.0, sigma_0=0.000001 * np.eye(D), nu_0=3.0) from autoregressive.distributions import AutoRegression self.dynamics_distn = AutoRegression(A=A, sigma=sigma_states, nu_0=D+1.0, S_0=0.5 * np.eye(D), M_0=np.zeros((D,D)), K_0=0.5 * np.eye(D)) # Initialize the emission matrix if C is None: self.C = sigma_C * np.random.randn(self.N, self.D) else: assert C.shape == (self.N, self.D) self.C = C self.sigma_C = sigma_C def augment_data(self, augmented_data): # Add a latent state sequence augmented_data["states"] = self.generate_states(augmented_data["T"]) def log_likelihood(self, augmented_data): raise NotImplementedError def generate(self,T): states = self.generate_states(T) return states.dot(self.C.T) def generate_states(self, T): stateseq = np.empty((T,self.D)) stateseq[0] = self.init_dynamics_distn.rvs() chol = np.linalg.cholesky(self.dynamics_distn.sigma) randseq = np.random.randn(T-1,self.D) for t in xrange(1,T): stateseq[t] = \ self.dynamics_distn.A.dot(stateseq[t-1]) \ + chol.dot(randseq[t-1]) return stateseq def mean_background_activation(self, augmented_data): return augmented_data["states"].dot(self.C.T) def resample(self, augmented_data_list): self.resample_states(augmented_data_list) self.resample_parameters(augmented_data_list) def resample_states(self, augmented_data_list): from pylds.lds_messages import filter_and_sample for data in augmented_data_list: # Compute the residual activation from other components psi = self.activation.compute_psi(data) psi_residual = psi - self.mean_background_activation(data) # Get the observed mean and variance mu_obs = self.activation.new_mean(data) prec_obs = self.activation.new_precision(data) # Subtract off the activation from other components mu_obs -= psi_residual # Convert prec_obs into an array of diagonal covariance matrices sigma_obs = np.empty((data["T"], self.N, self.N), order="C") for t in xrange(data["T"]): sigma_obs[t,:,:] = np.diag(1./prec_obs[t,:]) data["states"] = filter_and_sample( self.init_dynamics_distn.mu, self.init_dynamics_distn.sigma, self.dynamics_distn.A, self.dynamics_distn.sigma, self.C, sigma_obs, mu_obs) def resample_parameters(self, augmented_data_list): self.resample_init_dynamics_distn(augmented_data_list) self.resample_dynamics_distn(augmented_data_list) self.resample_emission_distn(augmented_data_list) def resample_init_dynamics_distn(self, augmented_data_list): states_list = [ad["states"][0] for ad in augmented_data_list] self.init_dynamics_distn.resample(states_list) def resample_dynamics_distn(self, augmented_data_list): from pyhsmm.util.general import AR_striding states_list = [ad["states"] for ad in augmented_data_list] strided_states_list = [AR_striding(s,1) for s in states_list] self.dynamics_distn.resample(strided_states_list) def resample_emission_distn(self, augmented_data_list): """ Resample the observation vectors. Since the emission noise is diagonal, we can resample the columns of C independently :return: """ # Get the prior prior_precision = 1./self.sigma_C * np.eye(self.D) prior_mean = np.zeros(self.D) prior_mean_dot_precision = prior_mean.dot(prior_precision) # Get the sufficient statistics from the likelihood lkhd_precision = np.zeros((self.N, self.D, self.D)) lkhd_mean_dot_precision = np.zeros((self.N, self.D)) for data in augmented_data_list: # Compute the residual activation from other components psi = self.activation.compute_psi(data) psi_residual = psi - self.mean_background_activation(data) # Get the observed mean and variance mu_obs = self.activation.new_mean(data) prec_obs = self.activation.new_precision(data) # Subtract off the residual mu_obs -= psi_residual # Update the sufficient statistics for each neuron for n in xrange(self.N): lkhd_precision[n,:,:] += (data["states"] * prec_obs[:,n][:,None]).T.dot(data["states"]) lkhd_mean_dot_precision[n,:] += \ (mu_obs[:,n] * prec_obs[:,n]).T.dot(data["states"]) # Sample each column of C for n in xrange(self.N): post_prec = prior_precision + lkhd_precision[n,:,:] post_cov = np.linalg.inv(post_prec) post_mu = (prior_mean_dot_precision + lkhd_mean_dot_precision[n,:]).dot(post_cov) post_mu = post_mu.ravel() self.C[n,:] = np.random.multivariate_normal(post_mu, post_cov) ### Variational inference def meanfieldupdate(self, augmented_data): raise NotImplementedError def get_vlb(self, augmented_data): raise NotImplementedError def resample_from_mf(self, augmented_data): raise NotImplementedError def svi_step(self, augmented_data, minibatchfrac, stepsize): raise NotImplementedError
class _IndependentGaussianMixin(_NetworkModel): """ Each weight is an independent Gaussian with a shared NIW prior. Special case the self-connections. """ def __init__(self, N, B, mu_0=0.0, sigma_0=1.0, kappa_0=1.0, nu_0=3.0, is_diagonal_weight_special=True, **kwargs): super(_IndependentGaussianMixin, self).__init__(N, B) mu_0 = expand_scalar(mu_0, (B,)) sigma_0 = expand_cov(sigma_0, (B,B)) self._gaussian = Gaussian(mu_0=mu_0, sigma_0=sigma_0, kappa_0=kappa_0, nu_0=max(nu_0, B+2.)) self.is_diagonal_weight_special = is_diagonal_weight_special if is_diagonal_weight_special: self._self_gaussian = \ Gaussian(mu_0=mu_0, sigma_0=sigma_0, kappa_0=kappa_0, nu_0=nu_0) @property def mu_W(self): N, B = self.N, self.B mu = np.zeros((N, N, B)) if self.is_diagonal_weight_special: # Set off-diagonal weights mask = np.ones((N, N), dtype=bool) mask[np.diag_indices(N)] = False mu[mask] = self._gaussian.mu # set diagonal weights mask = np.eye(N).astype(bool) mu[mask] = self._self_gaussian.mu else: mu = np.tile(self._gaussian.mu[None,None,:], (N, N, 1)) return mu @property def sigma_W(self): N, B = self.N, self.B if self.is_diagonal_weight_special: sigma = np.zeros((N, N, B, B)) # Set off-diagonal weights mask = np.ones((N, N), dtype=bool) mask[np.diag_indices(N)] = False sigma[mask] = self._gaussian.sigma # set diagonal weights mask = np.eye(N).astype(bool) sigma[mask] = self._self_gaussian.sigma else: sigma = np.tile(self._gaussian.mu[None, None, :, :], (N, N, 1, 1)) return sigma def resample(self, data=[]): super(_IndependentGaussianMixin, self).resample(data) A, W = data N, B = self.N, self.B if self.is_diagonal_weight_special: # Resample prior for off-diagonal weights mask = np.ones((N, N), dtype=bool) mask[np.diag_indices(N)] = False mask = mask & A self._gaussian.resample(W[mask]) # Resample prior for diagonal weights mask = np.eye(N).astype(bool) & A self._self_gaussian.resample(W[mask]) else: # Resample prior for all weights self._gaussian.resample(W[A])
class LogisticNormalCorrelatedLDA(_LDABase): "Correlated LDA with the stick breaking representation" def __init__(self, data, T, alpha_beta): mu, sigma = np.zeros(T), np.eye(T) self.theta_prior = \ Gaussian( mu=mu, sigma=sigma, mu_0=mu, sigma_0=T*sigma/10., nu_0=T/10., kappa_0=10.) self.ppgs = initialize_polya_gamma_samplers() self.omega = np.zeros((data.shape[0], T)) super(LogisticNormalCorrelatedLDA, self).__init__(data, T, alpha_beta) @property def theta(self): return ln_psi_to_pi(self.psi) @theta.setter def theta(self, theta): self.psi = ln_pi_to_psi(theta) def initialize_theta(self): self.psi = np.tile(self.theta_prior.mu, (self.D, 1)) def resample_theta(self): self.resample_psi_and_omega() def resample(self): super(LogisticNormalCorrelatedLDA, self).resample() self.resample_theta_prior() def resample_psi_and_omega(self): Lmbda = np.linalg.inv(self.theta_prior.sigma) for d in xrange(self.D): N = self.data[d].sum() c = self.doc_topic_counts[d] for t in xrange(self.T): self.omega[d,t] = self.ppgs[0].pgdraw( N, self._conditional_omega(d,t)) mu_cond, sigma_cond = self._conditional_psi(d, t, Lmbda, N, c) self.psi[d,t] = np.random.normal(mu_cond, np.sqrt(sigma_cond)) def _conditional_psi(self, d, t, Lmbda, N, c): nott = np.arange(self.T) != t psi = self.psi[d] omega = self.omega[d] mu = self.theta_prior.mu zetat = logsumexp(psi[nott]) mut_marg = mu[t] - 1./Lmbda[t,t] * Lmbda[t,nott].dot(psi[nott] - mu[nott]) sigmat_marg = 1./Lmbda[t,t] sigmat_cond = 1./(omega[t] + 1./sigmat_marg) # kappa is the mean dot precision, i.e. the sufficient statistic of a Gaussian # therefore we can sum over datapoints kappa = (c[t] - N/2.0).sum() mut_cond = sigmat_cond * (kappa + mut_marg / sigmat_marg + omega[t]*zetat) return mut_cond, sigmat_cond def _conditional_omega(self, d, t): nott = np.arange(self.T) != t psi = self.psi[d] zetat = logsumexp(psi[nott]) return psi[t] - zetat def resample_theta_prior(self): self.theta_prior.resample(self.psi) def copy_sample(self): new = copy.copy(self) new.beta = self.beta.copy() new.psi = self.psi.copy() new.theta_prior = self.theta_prior.copy_sample() del new.z del new.omega return new
class _LatentDistanceModelGaussianMixin(_NetworkModel): """ l_n ~ N(0, sigma^2 I) W_{n', n} ~ N(A * ||l_{n'} - l_{n}||_2^2 + b, ) for n' != n """ def __init__(self, N, B=1, dim=2, b=0.5, sigma=None, Sigma_0=None, nu_0=None, mu_self=0.0, eta=0.01): super(_LatentDistanceModelGaussianMixin, self).__init__(N, B) self.B = B self.dim = dim self.b = b self.eta = eta self.L = np.sqrt(eta) * np.random.randn(N, dim) if Sigma_0 is None: Sigma_0 = np.eye(B) if nu_0 is None: nu_0 = B + 2 self.cov = GaussianFixedMean(mu=np.zeros(B), sigma=sigma, lmbda_0=Sigma_0, nu_0=nu_0) # Special case self-weights (along the diagonal) self._self_gaussian = Gaussian(mu_0=mu_self * np.ones(B), sigma_0=Sigma_0, nu_0=nu_0, kappa_0=1.0) @property def D(self): # return np.sqrt(((self.L[:, None, :] - self.L[None, :, :]) ** 2).sum(2)) return ((self.L[:, None, :] - self.L[None, :, :])**2).sum(2) @property def mu_W(self): Mu = -self.D + self.b Mu = np.tile(Mu[:, :, None], (1, 1, self.B)) for n in range(self.N): Mu[n, n, :] = self._self_gaussian.mu return Mu @property def sigma_W(self): sig = self.cov.sigma Sig = np.tile(sig[None, None, :, :], (self.N, self.N, 1, 1)) for n in range(self.N): Sig[n, n, :, :] = self._self_gaussian.sigma return Sig def initialize_from_prior(self): self.L = np.sqrt(self.eta) * np.random.randn(self.N, self.dim) self.cov.resample() def initialize_hypers(self, W): # Optimize the initial locations self._optimize_L(np.ones((self.N, self.N)), W) def _hmc_log_probability(self, L, b, A, W): """ Compute the log probability as a function of L. This allows us to take the gradients wrt L using autograd. :param L: :param A: :return: """ assert self.B == 1 import autograd.numpy as atnp # Compute pairwise distance L1 = atnp.reshape(L, (self.N, 1, self.dim)) L2 = atnp.reshape(L, (1, self.N, self.dim)) # Mu = a * anp.sqrt(anp.sum((L1-L2)**2, axis=2)) + b Mu = -atnp.sum((L1 - L2)**2, axis=2) + b Aoff = A * (1 - atnp.eye(self.N)) X = (W - Mu[:, :, None]) * Aoff[:, :, None] # Get the covariance and precision Sig = self.cov.sigma[0, 0] Lmb = 1. / Sig lp = atnp.sum(-0.5 * X**2 * Lmb) # Log prior of L under spherical Gaussian prior lp += -0.5 * atnp.sum(L * L / self.eta) # Log prior of mu0 under standardGaussian prior lp += -0.5 * b**2 return lp def resample(self, data=[]): super(_LatentDistanceModelGaussianMixin, self).resample(data) A, W = data N, B = self.N, self.B self._resample_L(A, W) self._resample_b(A, W) self._resample_cov(A, W) self._resample_self_gaussian(A, W) self._resample_eta() # print "eta: ", self.eta, "\tb: ", self.b def _resample_L(self, A, W): """ Resample the locations given A :return: """ from autograd import grad from hips.inference.hmc import hmc lp = lambda L: self._hmc_log_probability(L, self.b, A, W) dlp = grad(lp) stepsz = 0.005 nsteps = 10 # lp0 = lp(self.L) self.L = hmc(lp, dlp, stepsz, nsteps, self.L.copy(), negative_log_prob=False) # lpf = lp(self.L) # print "diff lp: ", (lpf - lp0) def _optimize_L(self, A, W): """ Resample the locations given A :return: """ import autograd.numpy as atnp from autograd import grad from scipy.optimize import minimize lp = lambda Lflat: -self._hmc_log_probability( atnp.reshape(Lflat, (self.N, 2)), self.b, A, W) dlp = grad(lp) res = minimize(lp, np.ravel(self.L), jac=dlp, method="bfgs") self.L = np.reshape(res.x, (self.N, 2)) def _resample_b_hmc(self, A, W): """ Resample the distance dependence offset :return: """ # TODO: We could sample from the exact Gaussian conditional from autograd import grad from hips.inference.hmc import hmc lp = lambda b: self._hmc_log_probability(self.L, b, A, W) dlp = grad(lp) stepsz = 0.0001 nsteps = 10 b = hmc(lp, dlp, stepsz, nsteps, np.array(self.b), negative_log_prob=False) self.b = float(b) print("b: ", self.b) def _resample_b(self, A, W): """ Resample the distance dependence offset W ~ N(mu, sigma) = N(-D + b, sigma) implies W + D ~ N(b, sigma). If b ~ N(0, 1), we can compute the Gaussian conditional in closed form. """ D = self.D sigma = self.cov.sigma[0, 0] Aoff = (A * (1 - np.eye(self.N))).astype(np.bool) X = (W + D[:, :, None])[Aoff] # Now X ~ N(b, sigma) mu0, sigma0 = 0.0, 1.0 N = X.size sigma_post = 1. / (1. / sigma0 + N / sigma) mu_post = sigma_post * (mu0 / sigma0 + X.sum() / sigma) self.b = mu_post + np.sqrt(sigma_post) * np.random.randn() # print "b: ", self.b def _resample_cov(self, A, W): # Resample covariance matrix Mu = self.Mu mask = (True - np.eye(self.N, dtype=np.bool)) & A.astype(np.bool) self.cov.resample(W[mask] - Mu[mask]) def _resample_self_gaussian(self, A, W): # Resample self connection mask = np.eye(self.N, dtype=np.bool) & A.astype(np.bool) self._self_gaussian.resample(W[mask]) def _resample_eta(self): """ Resample sigma under an inverse gamma prior, sigma ~ IG(1,1) :return: """ L = self.L a_prior = 1.0 b_prior = 1.0 a_post = a_prior + L.size / 2.0 b_post = b_prior + (L**2).sum() / 2.0 from scipy.stats import invgamma self.eta = invgamma.rvs(a=a_post, scale=b_post)