Ejemplo n.º 1
0
 def sample_delta(self, delta, zeta, eta):
     curr_cluster_state = bincount2D_vectorized(delta, self.max_clust_count)
     cand_cluster_state = (curr_cluster_state == 0)
     log_likelihood = self.log_delta_likelihood(zeta)
     tidx = np.arange(self.nTemp)
     p = uniform(size=(self.nDat, self.nTemp))
     p += tidx[None, :]
     scratch = np.empty(curr_cluster_state.shape)
     for i in range(self.nDat):
         curr_cluster_state[tidx, delta.T[i]] -= 1
         scratch[:] = 0
         scratch += curr_cluster_state
         scratch += cand_cluster_state * (
             eta / (cand_cluster_state.sum(axis=1) + 1e-9))[:, None]
         with np.errstate(divide='ignore', invalid='ignore'):
             np.log(scratch, out=scratch)
         scratch += log_likelihood[i]
         np.nan_to_num(scratch, False, -np.inf)
         scratch -= scratch.max(axis=1)[:, None]
         with np.errstate(under='ignore'):
             np.exp(scratch, out=scratch)
         np.cumsum(scratch, axis=1, out=scratch)
         scratch /= scratch.T[-1][:, None]
         scratch += tidx[:, None]
         delta.T[i] = np.searchsorted(scratch.ravel(),
                                      p[i]) % self.max_clust_count
         curr_cluster_state[tidx, delta.T[i]] += 1
         cand_cluster_state[tidx, delta.T[i]] = False
     return
Ejemplo n.º 2
0
    def iter_sample(self):
        # current cluster assignments; number of new candidate clusters
        delta = self.curr_delta.copy()
        alpha = self.curr_alpha
        beta = self.curr_beta
        zeta = self.curr_zeta.copy()
        eta = self.curr_eta

        # Adaptive Metropolis Update
        self.update_am_cov()

        # Advance the iterator
        self.curr_iter += 1
        ci = self.curr_iter

        # Sample new candidate clusters
        cand_clusters = np.where(
            bincount2D_vectorized(delta, self.max_clust_count) == 0)
        zeta[cand_clusters] = self.sample_zeta_new(alpha, beta)[cand_clusters]

        # Update cluster assignments and re-index
        self.sample_delta(delta, zeta, eta)
        self.clean_delta_zeta(delta, zeta)
        self.samples.delta[ci] = delta

        # do rest of sampling
        extant_clusters = bincount2D_vectorized(self.curr_delta,
                                                self.max_clust_count) > 0
        self.samples.zeta[ci] = self.sample_zeta(
            zeta,
            self.curr_delta,
            alpha,
            beta,
        )
        self.samples.alpha[ci] = self.sample_alpha(self.curr_zeta, alpha,
                                                   extant_clusters)
        self.samples.beta[ci] = self.sample_beta(
            self.curr_zeta,
            self.curr_alpha,
            extant_clusters,
        )
        self.samples.eta[ci] = self.sample_eta(eta, self.curr_delta)

        # Attempt Swap:
        if self.curr_iter >= self.swap_start:
            self.try_tempering_swap()
        return
Ejemplo n.º 3
0
 def log_tempering_prior(self):
     out = np.zeros(self.nTemp)
     extant_clusters = (bincount2D_vectorized(self.curr_delta,
                                              self.max_clust_count) > 0)
     with np.errstate(invalid='ignore'):
         out += np.nansum(
             extant_clusters * pt_logd_prodgamma_my_st(
                 self.curr_zeta,
                 self.curr_alpha,
                 self.curr_beta,
             ),
             axis=1,
         )
     out += logd_gamma_my(self.curr_alpha, *self.priors.alpha).sum(axis=1)
     out += logd_gamma_my(self.curr_beta, *self.priors.beta).sum(axis=1)
     out += logd_gamma_my(self.curr_eta, *self.priors.eta)
     return out
Ejemplo n.º 4
0
 def log_tempering_prior(self):
     out = np.zeros(self.nTemp)
     Sigma_cho = cholesky(self.curr_Sigma)
     Sigma_inv = inv(self.curr_Sigma)
     extant_clusters = (bincount2D_vectorized(self.curr_delta,
                                              self.max_clust_count) > 0)
     with np.errstate(divide='ignore', invalid='ignore'):
         out += np.nansum(
             extant_clusters * pt_logd_mvnormal_mx_st(
                 np.log(self.curr_zeta),
                 self.curr_mu,
                 Sigma_cho,
                 Sigma_inv,
             ),
             axis=1,
         )
     out += logd_mvnormal_mx_st(self.curr_mu, *self.priors.mu)
     out += logd_invwishart_ms(self.curr_Sigma, *self.priors.Sigma)
     out += logd_gamma_my(self.curr_eta, *self.priors.eta)
     return out
Ejemplo n.º 5
0
    def iter_sample(self):
        # current cluster assignments; number of new candidate clusters
        delta = self.curr_delta.copy()
        zeta = self.curr_zeta.copy()
        mu = self.curr_mu
        Sigma = self.curr_Sigma
        Sigma_cho = cholesky(self.curr_Sigma)
        Sigma_inv = inv(Sigma)
        eta = self.curr_eta

        # Adaptive Metropolis Update
        self.update_am_cov()

        # Advance the iterator
        self.curr_iter += 1
        ci = self.curr_iter

        # Sample new candidate clusters
        cluster_state = bincount2D_vectorized(delta, self.max_clust_count)
        cand_clusters = np.where(cluster_state == 0)
        zeta[cand_clusters] = self.sample_zeta_new(mu,
                                                   Sigma_cho)[cand_clusters]

        # Update cluster assignments and re-index
        self.sample_delta(delta, zeta, eta)
        self.clean_delta_zeta(delta, zeta)
        self.samples.delta[ci] = delta

        # do rest of sampling
        extant_clusters = (cluster_state > 0)
        self.samples.zeta[ci] = self.sample_zeta(zeta, delta, mu, Sigma_cho,
                                                 Sigma_inv)
        self.samples.mu[ci] = self.sample_mu(zeta, Sigma_inv, extant_clusters)
        self.samples.Sigma[ci] = self.sample_Sigma(zeta, mu, extant_clusters)
        self.samples.eta[ci] = self.sample_eta(eta, self.curr_delta)

        # Attempt Swap:
        if self.curr_iter >= self.swap_start:
            self.try_tempering_swap()
        return
Ejemplo n.º 6
0
    def sample_zeta(self, zeta, delta, mu, Sigma_chol, Sigma_inv):
        """
        zeta      : (t x J x D)
        delta     : (t x n)
        r         : (t x n)
        mu        : (t x D)
        Sigma_cho : (t x D x D)
        Sigma_inv : (t x D x D)
        """
        curr_cluster_state = bincount2D_vectorized(delta, self.max_clust_count)
        cand_cluster_state = (curr_cluster_state == 0)
        delta_ind_mat = delta[:, :, None] == range(self.max_clust_count)
        idx = np.where(~cand_cluster_state)
        covs = self.am_covariance_matrices(delta, idx)

        am_alpha = np.zeros((self.nTemp, self.max_clust_count))
        am_alpha[:] = -np.inf
        am_alpha[idx] = 0.

        zcurr = zeta.copy()
        with np.errstate(divide='ignore'):
            lzcurr = np.log(zeta)
        lzcand = lzcurr.copy()
        lzcand[idx] += np.einsum(
            'mpq,mq->mp',
            cholesky(self.am_scale * covs),
            normal(size=(idx[0].shape[0], self.tCol)),
        )
        zcand = np.exp(lzcand)

        am_alpha += self.log_zeta_likelihood(zcand, delta, delta_ind_mat)
        am_alpha -= self.log_zeta_likelihood(zcurr, delta, delta_ind_mat)
        with np.errstate(invalid='ignore'):
            am_alpha *= self.itl[:, None]
        am_alpha += self.log_logzeta_prior(lzcand, mu, Sigma_chol, Sigma_inv)
        am_alpha -= self.log_logzeta_prior(lzcurr, mu, Sigma_chol, Sigma_inv)

        keep = np.where(np.log(uniform(size=am_alpha.shape)) < am_alpha)
        zcurr[keep] = zcand[keep]
        return zcurr