Ejemplo n.º 1
0
    def resample_omega(self, augmented_data_list):
        """
        Resample omega from its conditional Polya-gamma distribution
        :return:
        """
        K = self.K
        for data in augmented_data_list:
            x = data["x"]
            T = data["T"]

            # TODO: Fix this hack
            if "z" in data:
                z = data["z"]
            elif "states" in data:
                z = data["states"].stateseq
            else:
                raise Exception("Could not find latent states in augmented data!")

            psi = z.dot(self.C.T) + self.mu[None, :]
            N = N_vec(x).astype(np.float)
            tmp_omg = np.zeros(N.size)
            ppg.pgdrawvpar(self.ppgs, N.ravel(), psi.ravel(), tmp_omg)
            data["omega"] = tmp_omg.reshape((T, self.K-1))

            # Clip out zeros
            data["omega"] = np.clip(data["omega"], 1e-8,np.inf)
Ejemplo n.º 2
0
    def _info_form_heldout_log_likelihood(self, X, M=10):
        """
        We can analytically integrate out z (latent states)
        given omega. To estimate the heldout log likelihood of a
        data sequence, we Monte Carlo integrate over omega,
        where omega is drawn from the prior.
        :param data:
        :param M: number of Monte Carlo samples for integrating out omega
        :return:
        """
        # assert len(self.data_list) == 1, "TODO: Support more than 1 data set"

        T, K = X.shape
        assert K == self.K
        kappa = kappa_vec(X)
        N = N_vec(X)

        # Compute the data-specific normalization constant from the
        # augmented multinomial distribution
        Z_mul = (gammaln(N + 1) - gammaln(X[:, :-1] + 1) -
                 gammaln(N - X[:, :-1] + 1)).sum()
        Z_mul += (-N * np.log(2.)).sum()

        # Monte carlo integrate wrt omega ~ PG(N, 0)
        import pypolyagamma as ppg
        hlls = np.zeros(M)
        for m in range(M):
            # Sample omega using the emission distributions samplers
            omega = np.zeros(N.size)
            ppg.pgdrawvpar(self.emission_distn.ppgs, N.ravel(),
                           np.zeros(N.size), omega)
            omega = omega.reshape((T, K - 1))

            # Exactly integrate out the latent states z using message passing
            # The "data" is the normal potential from the
            states = MultinomialLDSStates(model=self, data=X)
            conditional_mean = kappa / np.clip(
                omega, 1e-64, np.inf) - self.emission_distn.mu[None, :]
            conditional_prec = np.zeros((T, K - 1, K - 1))
            for t in range(T):
                conditional_prec[t, :, :] = np.diag(omega[t, :])

            Z_lds = states.info_log_likelihood(conditional_mean,
                                               conditional_prec)

            # Sum them up to get the heldout log likelihood for this omega
            hlls[m] = Z_mul + Z_lds

        # Now take the log of the average to get the log likelihood
        hll = logsumexp(hlls) - np.log(M)

        # Use bootstrap to compute error bars
        samples = np.random.choice(hlls, size=(100, M), replace=True)
        hll_samples = logsumexp(samples, axis=1) - np.log(M)
        std_hll = hll_samples.std()

        return hll, std_hll
Ejemplo n.º 3
0
Archivo: lds.py Proyecto: HIPS/pgmult
    def _info_form_heldout_log_likelihood(self, X, M=10):
        """
        We can analytically integrate out z (latent states)
        given omega. To estimate the heldout log likelihood of a
        data sequence, we Monte Carlo integrate over omega,
        where omega is drawn from the prior.
        :param data:
        :param M: number of Monte Carlo samples for integrating out omega
        :return:
        """
        # assert len(self.data_list) == 1, "TODO: Support more than 1 data set"

        T, K = X.shape
        assert K == self.K
        kappa = kappa_vec(X)
        N = N_vec(X)

        # Compute the data-specific normalization constant from the
        # augmented multinomial distribution
        Z_mul = (gammaln(N + 1) - gammaln(X[:,:-1]+1) - gammaln(N-X[:,:-1]+1)).sum()
        Z_mul += (-N * np.log(2.)).sum()

        # Monte carlo integrate wrt omega ~ PG(N, 0)
        import pypolyagamma as ppg
        hlls = np.zeros(M)
        for m in range(M):
            # Sample omega using the emission distributions samplers
            omega = np.zeros(N.size)
            ppg.pgdrawvpar(self.emission_distn.ppgs,
                           N.ravel(), np.zeros(N.size),
                           omega)
            omega = omega.reshape((T, K-1))

            # Exactly integrate out the latent states z using message passing
            # The "data" is the normal potential from the
            states = MultinomialLDSStates(model=self, data=X)
            conditional_mean = kappa / np.clip(omega, 1e-64,np.inf) - self.emission_distn.mu[None, :]
            conditional_prec = np.zeros((T, K-1, K-1))
            for t in range(T):
                conditional_prec[t,:,:] = np.diag(omega[t,:])

            Z_lds = states.info_log_likelihood(conditional_mean, conditional_prec)

            # Sum them up to get the heldout log likelihood for this omega
            hlls[m] = Z_mul + Z_lds

        # Now take the log of the average to get the log likelihood
        hll = logsumexp(hlls) - np.log(M)

        # Use bootstrap to compute error bars
        samples = np.random.choice(hlls, size=(100, M), replace=True)
        hll_samples = logsumexp(samples, axis=1) - np.log(M)
        std_hll = hll_samples.std()

        return hll, std_hll
Ejemplo n.º 4
0
    def resample_omega(self, x):
        """
        Resample omega from its conditional Polya-gamma distribution
        :return:
        """
        assert x.ndim == 2
        N = N_vec(x)

        #  Sum the N's (i.e. the b's in the denominator)
        NN = N.sum(0).astype(np.float)
        ppg.pgdrawvpar(self.ppgs, NN, self.psi, self.omega)
Ejemplo n.º 5
0
 def resample_omega(self):
     pgdrawvpar(
         self.ppgs,
         N_vec(self.time_word_topic_counts,
               axis=1).astype('float64').ravel(), self.psi.ravel(),
         self.omega.ravel())
     np.clip(self.omega, 1e-32, np.inf, out=self.omega)
Ejemplo n.º 6
0
    def _resample_X():
        pis = model.pi(data)
        X = np.array([np.random.multinomial(N_max, pis[m]) for m in xrange(M)])
        N = N_vec(X).astype(np.float)
        kappa = kappa_vec(X)

        data["X"] = X
        data["N"] = N
        data["kappa"] = kappa
Ejemplo n.º 7
0
Archivo: gp.py Proyecto: yinsenm/pgmult
    def add_data(self, Z, X, fixed_kernel=True):
        # Z is the array of points where multinomial vectors are observed
        # X is the corresponding set of multinomial vectors
        assert Z.ndim == 2 and Z.shape[1] == self.D
        M = Z.shape[0]
        assert X.shape == (M, self.K), "X must be MxK"

        # Compute kappa and N for each of the m inputs
        N = N_vec(X).astype(np.float)
        kappa = kappa_vec(X)

        # Initialize the auxiliary variables
        omega = np.ones((M, self.K-1))

        # Initialize a "sample" from psi
        psi = np.zeros((M, self.K-1))

        # Precompute the kernel for the case where it is fixed
        if fixed_kernel:
            C = self.kernel.K(Z)
            C += 1e-6 * np.eye(M)
            C_inv = np.linalg.inv(C)
        else:
            C = None
            C_inv = None

        # Pack all this up into a dict
        augmented_data = \
        {
            "X":        X,
            "Z":        Z,
            "M":        M,
            "N":        N,
            "C":        C,
            "C_inv":    C_inv,
            "kappa":    kappa,
            "omega":    omega,
            "psi":      psi
        }
        self.data_list.append(augmented_data)
        return augmented_data