def __call__(self, data, gamma, mu_prior_mean, Phi0, nu0): self.new_Sigma = np.zeros_like(self.Sigma) self.new_mu = np.zeros((self.Sigma.shape[0], self.Sigma.shape[1]), dtype=np.float) if isinstance(self.labels, list): self.count = np.zeros((len(self.labels), len(self.comps)), dtype=np.int) else: self.count = np.zeros(len(self.comps), dtype=np.int) jj = -1 for j in self.comps: jj += 1 if isinstance(self.labels, list): nobs = data.shape[0] mask = np.zeros(nobs, dtype=np.bool) cumobs = 0 ii = 0 for labs in self.labels: submask = labs == j mask[cumobs : (cumobs + len(labs))] = submask self.count[ii, jj] = np.sum(submask) cumobs += len(labs) ii += 1 else: mask = self.labels == j self.count[jj] = np.sum(mask) Xj = data[mask] nj, ndim = Xj.shape sumxj = Xj.sum(0) gam = gamma[j] mu_hyper = mu_prior_mean post_mean = (mu_hyper / gam + sumxj) / (1 / gam + nj) post_cov = 1 / (1 / gam + nj) * self.Sigma[jj] new_mu = npr.multivariate_normal(post_mean, post_cov) Xj_demeaned = Xj - new_mu mu_SS = np.outer(new_mu - mu_hyper, new_mu - mu_hyper) / gam data_SS = np.dot(Xj_demeaned.T, Xj_demeaned) post_Phi = data_SS + mu_SS + Phi0[j] # symmetrize post_Phi = (post_Phi + post_Phi.T) / 2 # P(Sigma) ~ IW(nu + 2, nu * Phi) # P(Sigma | theta, Y) ~ post_nu = nj + ndim + nu0 + 2 # pymc rinverse_wishart takes new_Sigma = invwishartrand_prec(post_nu, post_Phi) # store new results self.new_Sigma[jj] = new_Sigma self.new_mu[jj] = new_mu del self.labels del self.Sigma
def _set_initial_values(self, alpha0, nu0, Phi0, mu0, Sigma0, weights0, e0, f0): if nu0 is None: nu0 = 1 if Phi0 is None: Phi0 = np.empty((self.ncomp, self.ndim, self.ndim)) Phi0[:] = np.eye(self.ndim) * nu0 if Sigma0 is None: # draw from prior .. bad idea for vague prior ??? Sigma0 = np.empty((self.ncomp, self.ndim, self.ndim)) for j in xrange(self.ncomp): Sigma0[j] = invwishartrand_prec(nu0 + 1 + self.ndim, Phi0[j]) # starting values, are these sensible? if mu0 is None: mu0 = np.empty((self.ncomp, self.ndim)) for j in xrange(self.ncomp): mu0[j] = npr.multivariate_normal(np.zeros(self.ndim), self.gamma[j] * Sigma0[j]) if weights0 is None: weights0 = (1 / self.ncomp) * np.ones((self.ncomp, 1)) self._alpha0 = alpha0 self.e = e0 self.f = f0 self._weights0 = weights0 self._mu0 = mu0 self._Sigma0 = Sigma0 self._nu0 = nu0 # prior degrees of freedom self._Phi0 = Phi0 # prior location for Sigma_j's
def _set_initial_values(self, alpha0, nu0, Phi0, mu0, Sigma0, weights0, e0, f0): if nu0 is None: nu0 = 1 if Phi0 is None: Phi0 = np.empty((self.ncomp, self.ndim, self.ndim)) Phi0[:] = np.eye(self.ndim) * nu0 if Sigma0 is None: # draw from prior .. bad idea for vague prior ??? Sigma0 = np.empty((self.ncomp, self.ndim, self.ndim)) for j in xrange(self.ncomp): Sigma0[j] = invwishartrand_prec(nu0 + 1 + self.ndim, Phi0[j]) #Sigma0 = Phi0.copy() # starting values, are these sensible? if mu0 is None: mu0 = np.empty((self.ncomp, self.ndim)) for j in xrange(self.ncomp): mu0[j] = npr.multivariate_normal(np.zeros((self.ndim)), self.gamma[j]*Sigma0[j]) if weights0 is None: weights0 = (1/self.ncomp)*np.ones((self.ncomp, 1)) #_, weights0 = stick_break_proc(1, 1, size=self.ncomp - 1) self._alpha0 = alpha0 self.e = e0 self.f = f0 self._weights0 = weights0 self._mu0 = mu0 self._Sigma0 = Sigma0 self._nu0 = nu0 # prior degrees of freedom self._Phi0 = Phi0 # prior location for Sigma_j's
def __call__(self, data, gamma, mu_prior_mean, Phi0, nu0): self.new_Sigma = np.zeros_like(self.Sigma) self.new_mu = np.zeros((self.Sigma.shape[0], self.Sigma.shape[1]), dtype=np.float) if isinstance(self.labels, list): self.count = np.zeros((len(self.labels), len(self.comps)), dtype=np.int) else: self.count = np.zeros(len(self.comps), dtype=np.int) jj = -1 for j in self.comps: jj += 1 if isinstance(self.labels, list): nobs = data.shape[0] mask = np.zeros(nobs, dtype=np.bool) cumobs = 0 ii = 0 for labs in self.labels: submask = labs == j mask[cumobs:(cumobs + len(labs))] = submask self.count[ii, jj] = np.sum(submask) cumobs += len(labs) ii += 1 else: mask = self.labels == j self.count[jj] = np.sum(mask) Xj = data[mask] nj, ndim = Xj.shape sumxj = Xj.sum(0) gam = gamma[j] mu_hyper = mu_prior_mean post_mean = (mu_hyper / gam + sumxj) / (1 / gam + nj) post_cov = 1 / (1 / gam + nj) * self.Sigma[jj] new_mu = npr.multivariate_normal(post_mean, post_cov) Xj_demeaned = Xj - new_mu mu_SS = np.outer(new_mu - mu_hyper, new_mu - mu_hyper) / gam data_SS = np.dot(Xj_demeaned.T, Xj_demeaned) post_Phi = data_SS + mu_SS + Phi0[j] # symmetrize post_Phi = (post_Phi + post_Phi.T) / 2 # P(Sigma) ~ IW(nu + 2, nu * Phi) # P(Sigma | theta, Y) ~ post_nu = nj + ndim + nu0 + 2 # pymc rinverse_wishart takes new_Sigma = invwishartrand_prec(post_nu, post_Phi) # store new results self.new_Sigma[jj] = new_Sigma self.new_mu[jj] = new_mu del self.labels del self.Sigma
N = 10000 nu = 6.0 phi = 2 * np.identity(4) phi[1, 2] = 1 phi[2, 1] = 1 mu = np.zeros(4, dtype=np.float64) Sigma = sampler.pyiwishartrand_prec(nu, phi) res = np.zeros((4, 4)) t1 = time.time() for i in range(N): res += sampler.pyiwishartrand_prec(nu, phi) print res / N print time.time() - t1 t2 = time.time() res = np.zeros((4, 4)) for i in range(N): res += wishart.invwishartrand_prec(nu, phi) print res / N print time.time() - t2 t3 = time.time() res = np.zeros(4) for i in range(N): s = sampler.pymvnorm(mu, Sigma) res += s print res / N print time.time() - t3
def _update_mu_Sigma(self, Sigma, labels, other_dat=None): is_hdp = isinstance(labels, list) mu_output = np.zeros((self.ncomp, self.ndim)) Sigma_output = np.zeros((self.ncomp, self.ndim, self.ndim)) if is_hdp: ct = np.zeros((len(labels), self.ncomp), dtype=np.int) else: ct = np.zeros(self.ncomp, dtype=np.int) if other_dat is None: data = self.data else: data = other_dat if self.parallel: num_jobs = len(self.compsdevmap) for tid in self.compsdevmap: rng = self.compsdevmap[tid] self.work_queue[tid].put( CompUpdate(np.arange(rng[0], rng[1]), labels, Sigma[rng[0]:rng[1]])) #while num_jobs: for tid in self.compsdevmap: result = self.result_queue[tid].get() newcomps = result.comps rng = (newcomps[0], newcomps[-1] + 1) mu_output[rng[0]:rng[1]] = result.new_mu Sigma_output[rng[0]:rng[1]] = result.new_Sigma if is_hdp: ct[:, rng[0]:rng[1]] = result.count else: ct[rng[0]:rng[1]] = result.count num_jobs -= 1 else: for j in xrange(self.ncomp): if is_hdp: nobs = data.shape[0] mask = np.zeros(nobs, dtype=np.bool) count = np.zeros(len(labels), dtype=np.int) cumobs = 0 ii = 0 for labs in labels: submask = labs == j mask[cumobs:(cumobs + len(labs))] = submask count[ii] = np.sum(submask) cumobs += len(labs) ii += 1 else: mask = labels == j count = np.sum(mask) Xj = data[mask] nj = len(Xj) sumxj = Xj.sum(0) gam = self.gamma[j] mu_hyper = self.mu_prior_mean post_mean = (mu_hyper / gam + sumxj) / (1 / gam + nj) post_cov = 1 / (1 / gam + nj) * Sigma[j] new_mu = npr.multivariate_normal(post_mean, post_cov) Xj_demeaned = Xj - new_mu mu_SS = np.outer(new_mu - mu_hyper, new_mu - mu_hyper) / gam data_SS = np.dot(Xj_demeaned.T, Xj_demeaned) post_Phi = data_SS + mu_SS + self._Phi0[j] # symmetrize post_Phi = (post_Phi + post_Phi.T) / 2 # P(Sigma) ~ IW(nu + 2, nu * Phi) # P(Sigma | theta, Y) ~ post_nu = nj + self.ndim + self._nu0 + 2 new_Sigma = invwishartrand_prec(post_nu, post_Phi) mu_output[j] = new_mu Sigma_output[j] = new_Sigma if is_hdp: ct[:, j] = count else: ct[j] = count return mu_output, Sigma_output, ct
if __name__ == '__main__': N = 10000 nu = 6.0 phi = 2*np.identity(4) phi[1,2]=1; phi[2,1]=1; mu = np.zeros(4, dtype=np.float64) Sigma = sampler.pyiwishartrand_prec(nu, phi) res = np.zeros((4,4)) t1 = time.time() for i in range(N): res += sampler.pyiwishartrand_prec(nu, phi) print res / N print time.time() - t1 t2 = time.time() res = np.zeros((4,4)) for i in range(N): res += wishart.invwishartrand_prec(nu,phi) print res / N print time.time() - t2 t3 = time.time() res = np.zeros(4) for i in range(N): s = sampler.pymvnorm(mu, Sigma) res += s print res / N print time.time() - t3
def _update_mu_Sigma(self, Sigma, labels, other_dat=None): is_hdp = isinstance(labels, list) mu_output = np.zeros((self.ncomp, self.ndim)) Sigma_output = np.zeros((self.ncomp, self.ndim, self.ndim)) if is_hdp: ct = np.zeros((len(labels), self.ncomp), dtype=np.int) else: ct = np.zeros(self.ncomp, dtype=np.int) if other_dat is None: data = self.data else: data = other_dat if self.parallel: num_jobs = len(self.compsdevmap) for tid in self.compsdevmap: rng = self.compsdevmap[tid] self.work_queue[tid].put(CompUpdate(np.arange(rng[0], rng[1]), labels, Sigma[rng[0]:rng[1]])) #while num_jobs: for tid in self.compsdevmap: result = self.result_queue[tid].get() newcomps = result.comps rng = (newcomps[0], newcomps[-1]+1) mu_output[rng[0]:rng[1]] = result.new_mu Sigma_output[rng[0]:rng[1]] = result.new_Sigma if is_hdp: ct[:,rng[0]:rng[1]] = result.count else: ct[rng[0]:rng[1]] = result.count num_jobs -= 1 else: for j in xrange(self.ncomp): if is_hdp: nobs = data.shape[0] mask = np.zeros(nobs, dtype=np.bool) count = np.zeros(len(labels), dtype=np.int) cumobs = 0; ii = 0 for labs in labels: submask = labs == j mask[cumobs:(cumobs+len(labs))] = submask count[ii] = np.sum(submask); cumobs+=len(labs); ii+=1 else: mask = labels == j count = np.sum(mask) Xj = data[mask] nj = len(Xj) sumxj = Xj.sum(0) gam = self.gamma[j] mu_hyper = self.mu_prior_mean post_mean = (mu_hyper / gam + sumxj) / (1 / gam + nj) post_cov = 1 / (1 / gam + nj) * Sigma[j] new_mu = npr.multivariate_normal(post_mean, post_cov) Xj_demeaned = Xj - new_mu mu_SS = np.outer(new_mu - mu_hyper, new_mu - mu_hyper) / gam data_SS = np.dot(Xj_demeaned.T, Xj_demeaned) post_Phi = data_SS + mu_SS + self._Phi0[j] # symmetrize post_Phi = (post_Phi + post_Phi.T) / 2 # P(Sigma) ~ IW(nu + 2, nu * Phi) # P(Sigma | theta, Y) ~ post_nu = nj + self.ndim + self._nu0 + 2 new_Sigma = invwishartrand_prec(post_nu, post_Phi) mu_output[j] = new_mu Sigma_output[j] = new_Sigma if is_hdp: ct[:, j] = count else: ct[j] = count return mu_output, Sigma_output, ct