Beispiel #1
0
    def sample_heldout(self):
        J = self.data_t.shape[0]
        K = self.get_K()

        # Manage Alpha prior
        if not hasattr(self, 'logalpha'):
            alpha = np.exp(self.log_alpha_beta[:-1])
        else:
            alpha = np.exp(self.logalpha)

        doc_order = np.random.permutation(J)
        for doc_iter, j in enumerate(doc_order):
            nnz =  self.data_t[j].sum()
            lgg.debug( '%d \t %d \t %d' % ( doc_iter , nnz, K ))
            nnz_order = np.random.permutation(nnz)
            for i in nnz_order:

                k_ji = self.z_t[j][i]
                self.doc_topic_counts_t[j, k_ji] -=1

                params = np.log(self.doc_topic_counts_t[j] + alpha) + np.log(self._phi[self.data_t_w[j][i], k_ji])
                params =  lognormalize(params[:K])

                sample_topic = categorical(params)
                self.z_t[j][i] = sample_topic

                self.doc_topic_counts_t[j, sample_topic] += 1

        return self.z_t
Beispiel #2
0
    def _reduce_one(self, i, j, xij, update_kernel=True):

        if self._is_symmetric:
            self.pik = self.pjk = self.N_theta_left[i] + self.hyper_theta
            self.pjk = self.pik
        else:
            self.pik = self.N_theta_left[i] + self.hyper_theta
            self.pjk = self.N_theta_right[j] + self.hyper_theta

        if update_kernel:
            k = self.N_Y + self.hyper_phi[0]
            p = (self.N_phi + self.hyper_phi[1] + 1)**-1
            # @debug: Some invalie values here sometime !!
            self._kernel = lambda x:sp.stats.nbinom.pmf(x, k, 1-p)
            self._lut_nbinom = [sp.stats.nbinom.pmf(x, k, 1-p) for x in range(42)]
            #kernel = sp.stats.nbinom.pmf(xij, k, 1-p)

        if len(self._lut_nbinom) > xij:
            # Wins some times...
            kernel = self._lut_nbinom[xij]
        else:
            kernel = self._kernel(xij)

        # debug: Underflow
        kernel[kernel<=1e-300] = 1e-300

        outer_kk = np.log(np.outer(self.pik, self.pjk)) + np.log(kernel)

        return lognormalize(outer_kk.ravel())
Beispiel #3
0
    def _reduce_one(self, i, j, xij, update_local=True, update_kernel=True):

        if update_local:
            if self._is_symmetric:
                self.pik = self.pjk = self.N_theta_left[i] + self.hyper_theta
                self.pjk = self.pik
            else:
                self.pik = self.N_theta_left[i] + self.hyper_theta
                self.pjk = self.N_theta_right[j] + self.hyper_theta

        if update_kernel:
            k = self.N_Y + self.hyper_phi[0]
            p = self.hyper_phi[2] / (self.hyper_phi[2] * self.N_phi + 1)

            @lru_cache(maxsize=1000, typed=False)
            def _kernel(x):
                return nbinom_pmf(x, k, 1 - p)

            self._kernel = _kernel

            if self._hyper_phi == 'auto':
                N = len(self.pik)
                rk = self.hyper_phi[0]
                pk = self.hyper_phi[1]

                # Way 1
                #a = np.ones(self.N_phi.shape)*(self.c0_r0 -1)
                #a[self.N_Y < 1.461] += 1

                # Way 2
                a = self.c0_r0 + self.N_Y

                _pk = 1 - pk
                _pk[_pk < 1e-300] = 1e-200
                b = 1 / (self.c0 - (self.N_phi) * np.log(_pk))
                #rk = np.random.gamma(a, b)
                rk = a * b

                c = self.ce_eps + self.N_Y
                d = self.ce_minus_eps + rk * self.N_phi

                pk = np.random.beta(c, d)
                e_pk = c / (c + d)
                pk[pk < 1e-300] = 1e-200

                self.hyper_phi = [rk, pk, e_pk]

                #self._residu = np.array([sp.stats.gamma.pdf(rk, self.c0*self.r0, scale=1/self.c0), sp.stats.beta.pdf(pk, self.ce*self.eps, self.ce*(1-self.eps)) ])

        kernel = self._kernel(xij)

        # debug: Underflow
        kernel[kernel <= 1e-300] = 1e-200
        #kernel = ma.masked_invalid(kernel)

        outer_kk = np.log(np.outer(self.pik, self.pjk)) + np.log(
            kernel)  #+ np.log(self._residu).sum()

        return lognormalize(outer_kk.ravel())
Beispiel #4
0
    def _reduce_one(self, i, j):
        xij = self._xij

        self.pik = self.N_theta_left[i] + self.hyper_theta
        self.pjk = self.N_theta_right[j] + self.hyper_theta
        pxk = self.N_phi[xij] + self.hyper_phi[xij]

        outer_kk = np.log(np.outer(self.pik, self.pjk)) + np.log(pxk) - np.log(
            self.N_phi.sum(0) + self.hyper_phi_sum)

        return lognormalize(outer_kk.ravel())
Beispiel #5
0
    def _reduce_one(self, i, j):
        xij = self._xij

        self.pik = self.N_theta_left[i] + self.hyper_theta
        self.pjk = self.N_theta_right[j] + self.hyper_theta

        k = self.N_Y + self.hyper_phi[0]
        p = (self.N_phi + self.hyper_phi[1] + 1)**-1

        kernel = sp.stats.nbinom.pmf(xij, k, 1-p)
        outer_kk = np.log(np.outer(self.pik, self.pjk)) + np.log(kernel)

        return lognormalize(outer_kk.ravel())
Beispiel #6
0
Datei: mmsb.py Projekt: dtrckd/ml
    def prob_zji(self, j, i, K):
        k_jji = self.z[j, i, 0]
        k_jij = self.z[j, i, 1]
        self.doc_topic_counts[j, k_jji] -= self.symmetric_pt
        self.doc_topic_counts[i, k_jij] -= self.symmetric_pt

        # Keep the outer product in memory
        p_jk = self.doc_topic_counts[j] + self.alpha
        p_ik = self.doc_topic_counts[i] + self.alpha
        outer_kk = np.outer(p_jk, p_ik)

        params = np.log(outer_kk) + self.likelihood.compute(j, i, k_jji, k_jij)
        params = params[:K, :K].ravel()
        return lognormalize(params)
Beispiel #7
0
    def prob_zji(self, j, i, K):
        k_ji = self.z[j][i]
        self.doc_topic_counts[j, k_ji] -=1

        # Manage Alpha prior
        if not hasattr(self, 'logalpha'):
            log_alpha_beta = self.log_alpha_beta
            new_k = K - len(log_alpha_beta)
            if new_k > 0:
                log_alpha_beta = np.hstack((log_alpha_beta, np.ones((new_k,))*log_alpha_beta[-1]))
            alpha = np.exp(log_alpha_beta)
        else:
            alpha = np.exp(self.logalpha)

        params = np.log(self.doc_topic_counts[j] + alpha) + self.likelihood.compute(j, i, k_ji)
        return lognormalize(params[:K])
Beispiel #8
0
    def prob_jk(self, j, k):
        # -1 because table of current sample topic jk, is not conditioned on
        njdotk = self.count_k_by_j[j, k]
        if njdotk == 1:
            return np.ones(1)

        possible_ms = np.arange(1, njdotk)  # +1-1
        log_alpha_beta_k = self.get_log_alpha_beta(k)
        alpha_beta_k = np.exp(log_alpha_beta_k)

        normalizer = gammaln(alpha_beta_k) - gammaln(alpha_beta_k + njdotk)
        log_stir = self.stirling_mat(njdotk, possible_ms)

        params = normalizer + log_stir + possible_ms * log_alpha_beta_k

        return lognormalize(params)
Beispiel #9
0
    def _reduce_one(self, i, j, xij, update_local=True, update_kernel=True):

        if update_local:
            if self._is_symmetric:
                self.pik = self.pjk = self.N_theta_left[i] + self.hyper_theta
                self.pjk = self.pik
            else:
                self.pik = self.N_theta_left[i] + self.hyper_theta
                self.pjk = self.N_theta_right[j] + self.hyper_theta

        if update_kernel:
            #self.N_phi[self.N_phi<=1e-300] = 1e-300
            pxk = self.N_phi[xij] + self.hyper_phi[xij]
            # debug: Underflow
            self._kern = np.log(pxk)- np.log(self.N_phi.sum(0) + self.hyper_phi_sum)

        out = np.outer(self.pik, self.pjk)
        #out = ma.masked_invalid(out)
        #out[out<=1e-300] = 1e-300
        outer_kk = np.log(out) + self._kern

        return lognormalize(outer_kk.ravel())
Beispiel #10
0
    def _reduce_one(self, i, j, xij, update_local=True, update_kernel=True):

        if update_local:
            if self._is_symmetric:
                self.pik = self.pjk = self.N_theta_left[i] + self.hyper_theta
                self.pjk = self.pik
            else:
                self.pik = self.N_theta_left[i] + self.hyper_theta
                self.pjk = self.N_theta_right[j] + self.hyper_theta

        if update_kernel:

            k1 = np.where(
                np.random.multinomial(1, self.N_theta_left[i]) == 1)[0]
            k2 = np.where(
                np.random.multinomial(1, self.N_theta_right[j]) == 1)[0]

            btensor = np.outer(self.lbd.T,
                               self.phi).reshape(self._K, self._K, self._L)
            # Get diagonal
            btensor = np.array([
                btensor[i:i + self._K, i:i + self._K]
                for i in range(0, len(btensor), self._K)
            ])
            norm = btensor.sum(0)
            np_count = []
            for b in range(len(btensor)):
                btensor[b] /= norm
                np_count.append(
                    np.random.multinomial(xij, btensor[b].flatten()).reshape(
                        self._K, self._K))

            ksi = np.stack(np_count)

            self.ksi = np.random.multinomial(
                xij, self.lbd[k1] * self.phi[k2] / norm)
            self.lbd = np.random.dirichlet(0.1 + self.ksi.sum(0) +
                                           sum.ksi.sum(1))

            self.pl = self.random.beta(self.ce_eps)
            self.phi = 0
            ###  Unfinished.

            self._kernel = defaultdict2(lambda x: sp.stats.poisson.pmf(
                x,
                self.lbd.dot(self.phi).dot(self.lbd.T)))

            if self._hyper_phi == 'auto':
                N = len(self.pik)
                rk = self.hyper_phi[0]
                qk = self.hyper_phi[1]
                pk = 1 / (qk + 1)

                # Way 1
                ##print('Gamma %s, %s' % (self.c0*self.r0, 1/(self.c0 - N*np.log(1-pk))))
                a = self.c0_r0
                _pk = 1 - pk
                _pk[_pk < 1e-100] = 1e-100
                n = self.N_phi - 1
                n[n < 0] = 0
                b = 1 / (self.c0 - n * np.log(_pk))
                rk = np.random.gamma(a, b)
                #rk = a*b

                c = self.ce_eps + self.N_Y
                d = self.ce_minus_eps + rk * self.N_phi

                #pk = c/(c+d)
                pk = np.random.beta(c, d)
                pk[pk < 1e-100] = 1e-100

                #print('Beta %s, %s' % (self.ce*self.eps + self.N_Y, self.ce*(1-self.eps) + N*rk))
                self.hyper_phi = [rk, (1 - pk) / pk]

                # No Residu, in CGS? since p(F, Phi|Z, Y) = q(F,Phi|Z)
                #self._residu = np.array([sp.stats.gamma.pdf(rk, self.c0*self.r0, scale=1/self.c0), sp.stats.beta.pdf(pk, self.ce*self.eps, self.ce*(1-self.eps)) ])

        kernel = self._kernel[xij]

        # debug: Underflow
        kernel[kernel <= 1e-300] = 1e-100
        #kernel = ma.masked_invalid(kernel)

        outer_kk = np.log(np.outer(self.pik, self.pjk)) + np.log(
            kernel)  #+ np.log(self._residu).sum()

        return lognormalize(outer_kk.ravel())