def sample_delta(self, delta, zeta, eta): curr_cluster_state = bincount2D_vectorized(delta, self.max_clust_count) cand_cluster_state = (curr_cluster_state == 0) log_likelihood = self.log_delta_likelihood(zeta) tidx = np.arange(self.nTemp) p = uniform(size=(self.nDat, self.nTemp)) p += tidx[None, :] scratch = np.empty(curr_cluster_state.shape) for i in range(self.nDat): curr_cluster_state[tidx, delta.T[i]] -= 1 scratch[:] = 0 scratch += curr_cluster_state scratch += cand_cluster_state * ( eta / (cand_cluster_state.sum(axis=1) + 1e-9))[:, None] with np.errstate(divide='ignore', invalid='ignore'): np.log(scratch, out=scratch) scratch += log_likelihood[i] np.nan_to_num(scratch, False, -np.inf) scratch -= scratch.max(axis=1)[:, None] with np.errstate(under='ignore'): np.exp(scratch, out=scratch) np.cumsum(scratch, axis=1, out=scratch) scratch /= scratch.T[-1][:, None] scratch += tidx[:, None] delta.T[i] = np.searchsorted(scratch.ravel(), p[i]) % self.max_clust_count curr_cluster_state[tidx, delta.T[i]] += 1 cand_cluster_state[tidx, delta.T[i]] = False return
def iter_sample(self): # current cluster assignments; number of new candidate clusters delta = self.curr_delta.copy() alpha = self.curr_alpha beta = self.curr_beta zeta = self.curr_zeta.copy() eta = self.curr_eta # Adaptive Metropolis Update self.update_am_cov() # Advance the iterator self.curr_iter += 1 ci = self.curr_iter # Sample new candidate clusters cand_clusters = np.where( bincount2D_vectorized(delta, self.max_clust_count) == 0) zeta[cand_clusters] = self.sample_zeta_new(alpha, beta)[cand_clusters] # Update cluster assignments and re-index self.sample_delta(delta, zeta, eta) self.clean_delta_zeta(delta, zeta) self.samples.delta[ci] = delta # do rest of sampling extant_clusters = bincount2D_vectorized(self.curr_delta, self.max_clust_count) > 0 self.samples.zeta[ci] = self.sample_zeta( zeta, self.curr_delta, alpha, beta, ) self.samples.alpha[ci] = self.sample_alpha(self.curr_zeta, alpha, extant_clusters) self.samples.beta[ci] = self.sample_beta( self.curr_zeta, self.curr_alpha, extant_clusters, ) self.samples.eta[ci] = self.sample_eta(eta, self.curr_delta) # Attempt Swap: if self.curr_iter >= self.swap_start: self.try_tempering_swap() return
def log_tempering_prior(self): out = np.zeros(self.nTemp) extant_clusters = (bincount2D_vectorized(self.curr_delta, self.max_clust_count) > 0) with np.errstate(invalid='ignore'): out += np.nansum( extant_clusters * pt_logd_prodgamma_my_st( self.curr_zeta, self.curr_alpha, self.curr_beta, ), axis=1, ) out += logd_gamma_my(self.curr_alpha, *self.priors.alpha).sum(axis=1) out += logd_gamma_my(self.curr_beta, *self.priors.beta).sum(axis=1) out += logd_gamma_my(self.curr_eta, *self.priors.eta) return out
def log_tempering_prior(self): out = np.zeros(self.nTemp) Sigma_cho = cholesky(self.curr_Sigma) Sigma_inv = inv(self.curr_Sigma) extant_clusters = (bincount2D_vectorized(self.curr_delta, self.max_clust_count) > 0) with np.errstate(divide='ignore', invalid='ignore'): out += np.nansum( extant_clusters * pt_logd_mvnormal_mx_st( np.log(self.curr_zeta), self.curr_mu, Sigma_cho, Sigma_inv, ), axis=1, ) out += logd_mvnormal_mx_st(self.curr_mu, *self.priors.mu) out += logd_invwishart_ms(self.curr_Sigma, *self.priors.Sigma) out += logd_gamma_my(self.curr_eta, *self.priors.eta) return out
def iter_sample(self): # current cluster assignments; number of new candidate clusters delta = self.curr_delta.copy() zeta = self.curr_zeta.copy() mu = self.curr_mu Sigma = self.curr_Sigma Sigma_cho = cholesky(self.curr_Sigma) Sigma_inv = inv(Sigma) eta = self.curr_eta # Adaptive Metropolis Update self.update_am_cov() # Advance the iterator self.curr_iter += 1 ci = self.curr_iter # Sample new candidate clusters cluster_state = bincount2D_vectorized(delta, self.max_clust_count) cand_clusters = np.where(cluster_state == 0) zeta[cand_clusters] = self.sample_zeta_new(mu, Sigma_cho)[cand_clusters] # Update cluster assignments and re-index self.sample_delta(delta, zeta, eta) self.clean_delta_zeta(delta, zeta) self.samples.delta[ci] = delta # do rest of sampling extant_clusters = (cluster_state > 0) self.samples.zeta[ci] = self.sample_zeta(zeta, delta, mu, Sigma_cho, Sigma_inv) self.samples.mu[ci] = self.sample_mu(zeta, Sigma_inv, extant_clusters) self.samples.Sigma[ci] = self.sample_Sigma(zeta, mu, extant_clusters) self.samples.eta[ci] = self.sample_eta(eta, self.curr_delta) # Attempt Swap: if self.curr_iter >= self.swap_start: self.try_tempering_swap() return
def sample_zeta(self, zeta, delta, mu, Sigma_chol, Sigma_inv): """ zeta : (t x J x D) delta : (t x n) r : (t x n) mu : (t x D) Sigma_cho : (t x D x D) Sigma_inv : (t x D x D) """ curr_cluster_state = bincount2D_vectorized(delta, self.max_clust_count) cand_cluster_state = (curr_cluster_state == 0) delta_ind_mat = delta[:, :, None] == range(self.max_clust_count) idx = np.where(~cand_cluster_state) covs = self.am_covariance_matrices(delta, idx) am_alpha = np.zeros((self.nTemp, self.max_clust_count)) am_alpha[:] = -np.inf am_alpha[idx] = 0. zcurr = zeta.copy() with np.errstate(divide='ignore'): lzcurr = np.log(zeta) lzcand = lzcurr.copy() lzcand[idx] += np.einsum( 'mpq,mq->mp', cholesky(self.am_scale * covs), normal(size=(idx[0].shape[0], self.tCol)), ) zcand = np.exp(lzcand) am_alpha += self.log_zeta_likelihood(zcand, delta, delta_ind_mat) am_alpha -= self.log_zeta_likelihood(zcurr, delta, delta_ind_mat) with np.errstate(invalid='ignore'): am_alpha *= self.itl[:, None] am_alpha += self.log_logzeta_prior(lzcand, mu, Sigma_chol, Sigma_inv) am_alpha -= self.log_logzeta_prior(lzcurr, mu, Sigma_chol, Sigma_inv) keep = np.where(np.log(uniform(size=am_alpha.shape)) < am_alpha) zcurr[keep] = zcand[keep] return zcurr