Ejemplo n.º 1
0
    def resample_psi(self):
        Lmbda = np.linalg.inv(self.theta_prior.sigma)
        h = Lmbda.dot(self.theta_prior.mu)
        randvec = np.random.randn(self.D, self.T - 1)  # pre-generate randomness

        for d, c in enumerate(self.doc_topic_counts):
            self.psi[d] = sample_infogaussian(Lmbda + np.diag(self.omega[d]), h + kappa_vec(c), randvec[d])
Ejemplo n.º 2
0
    def resample_psi(self):
        Lmbda = np.linalg.inv(self.theta_prior.sigma)
        h = Lmbda.dot(self.theta_prior.mu)
        randvec = np.random.randn(self.D,
                                  self.T - 1)  # pre-generate randomness

        for d, c in enumerate(self.doc_topic_counts):
            self.psi[d] = sample_infogaussian(Lmbda + np.diag(self.omega[d]),
                                              h + kappa_vec(c), randvec[d])
Ejemplo n.º 3
0
    def _info_form_heldout_log_likelihood(self, X, M=10):
        """
        We can analytically integrate out z (latent states)
        given omega. To estimate the heldout log likelihood of a
        data sequence, we Monte Carlo integrate over omega,
        where omega is drawn from the prior.
        :param data:
        :param M: number of Monte Carlo samples for integrating out omega
        :return:
        """
        # assert len(self.data_list) == 1, "TODO: Support more than 1 data set"

        T, K = X.shape
        assert K == self.K
        kappa = kappa_vec(X)
        N = N_vec(X)

        # Compute the data-specific normalization constant from the
        # augmented multinomial distribution
        Z_mul = (gammaln(N + 1) - gammaln(X[:, :-1] + 1) -
                 gammaln(N - X[:, :-1] + 1)).sum()
        Z_mul += (-N * np.log(2.)).sum()

        # Monte carlo integrate wrt omega ~ PG(N, 0)
        import pypolyagamma as ppg
        hlls = np.zeros(M)
        for m in range(M):
            # Sample omega using the emission distributions samplers
            omega = np.zeros(N.size)
            ppg.pgdrawvpar(self.emission_distn.ppgs, N.ravel(),
                           np.zeros(N.size), omega)
            omega = omega.reshape((T, K - 1))

            # Exactly integrate out the latent states z using message passing
            # The "data" is the normal potential from the
            states = MultinomialLDSStates(model=self, data=X)
            conditional_mean = kappa / np.clip(
                omega, 1e-64, np.inf) - self.emission_distn.mu[None, :]
            conditional_prec = np.zeros((T, K - 1, K - 1))
            for t in range(T):
                conditional_prec[t, :, :] = np.diag(omega[t, :])

            Z_lds = states.info_log_likelihood(conditional_mean,
                                               conditional_prec)

            # Sum them up to get the heldout log likelihood for this omega
            hlls[m] = Z_mul + Z_lds

        # Now take the log of the average to get the log likelihood
        hll = logsumexp(hlls) - np.log(M)

        # Use bootstrap to compute error bars
        samples = np.random.choice(hlls, size=(100, M), replace=True)
        hll_samples = logsumexp(samples, axis=1) - np.log(M)
        std_hll = hll_samples.std()

        return hll, std_hll
Ejemplo n.º 4
0
    def _resample_X():
        pis = model.pi(data)
        X = np.array([np.random.multinomial(N_max, pis[m]) for m in xrange(M)])
        N = N_vec(X).astype(np.float)
        kappa = kappa_vec(X)

        data["X"] = X
        data["N"] = N
        data["kappa"] = kappa
Ejemplo n.º 5
0
    def _resample_X():
        pis = model.pi(data)
        X = np.array([np.random.multinomial(N_max, pis[m]) for m in xrange(M)])
        N = N_vec(X).astype(np.float)
        kappa = kappa_vec(X)

        data["X"] = X
        data["N"] = N
        data["kappa"] = kappa
Ejemplo n.º 6
0
Archivo: lds.py Proyecto: HIPS/pgmult
    def _info_form_heldout_log_likelihood(self, X, M=10):
        """
        We can analytically integrate out z (latent states)
        given omega. To estimate the heldout log likelihood of a
        data sequence, we Monte Carlo integrate over omega,
        where omega is drawn from the prior.
        :param data:
        :param M: number of Monte Carlo samples for integrating out omega
        :return:
        """
        # assert len(self.data_list) == 1, "TODO: Support more than 1 data set"

        T, K = X.shape
        assert K == self.K
        kappa = kappa_vec(X)
        N = N_vec(X)

        # Compute the data-specific normalization constant from the
        # augmented multinomial distribution
        Z_mul = (gammaln(N + 1) - gammaln(X[:,:-1]+1) - gammaln(N-X[:,:-1]+1)).sum()
        Z_mul += (-N * np.log(2.)).sum()

        # Monte carlo integrate wrt omega ~ PG(N, 0)
        import pypolyagamma as ppg
        hlls = np.zeros(M)
        for m in range(M):
            # Sample omega using the emission distributions samplers
            omega = np.zeros(N.size)
            ppg.pgdrawvpar(self.emission_distn.ppgs,
                           N.ravel(), np.zeros(N.size),
                           omega)
            omega = omega.reshape((T, K-1))

            # Exactly integrate out the latent states z using message passing
            # The "data" is the normal potential from the
            states = MultinomialLDSStates(model=self, data=X)
            conditional_mean = kappa / np.clip(omega, 1e-64,np.inf) - self.emission_distn.mu[None, :]
            conditional_prec = np.zeros((T, K-1, K-1))
            for t in range(T):
                conditional_prec[t,:,:] = np.diag(omega[t,:])

            Z_lds = states.info_log_likelihood(conditional_mean, conditional_prec)

            # Sum them up to get the heldout log likelihood for this omega
            hlls[m] = Z_mul + Z_lds

        # Now take the log of the average to get the log likelihood
        hll = logsumexp(hlls) - np.log(M)

        # Use bootstrap to compute error bars
        samples = np.random.choice(hlls, size=(100, M), replace=True)
        hll_samples = logsumexp(samples, axis=1) - np.log(M)
        std_hll = hll_samples.std()

        return hll, std_hll
Ejemplo n.º 7
0
    def _get_lds_effective_params(self):
        mu_uniform, sigma_uniform = compute_uniform_mean_psi(self.V)
        mu_init = np.tile(mu_uniform, self.K)
        sigma_init = np.tile(np.diag(sigma_uniform), self.K)

        sigma_states = np.repeat(self.sigmasq_states, (self.V - 1) * self.K)

        sigma_obs = 1.0 / self.omega
        y = kappa_vec(self.time_word_topic_counts, axis=1) / self.omega

        return mu_init, sigma_init, sigma_states, sigma_obs.reshape(y.shape[0], -1), y.reshape(y.shape[0], -1)
Ejemplo n.º 8
0
    def _get_lds_effective_params(self):
        mu_uniform, sigma_uniform = compute_uniform_mean_psi(self.V)
        mu_init = np.tile(mu_uniform, self.K)
        sigma_init = np.tile(np.diag(sigma_uniform), self.K)

        sigma_states = np.repeat(self.sigmasq_states, (self.V - 1) * self.K)

        sigma_obs = 1. / self.omega
        y = kappa_vec(self.time_word_topic_counts, axis=1) / self.omega

        return mu_init, sigma_init, sigma_states, \
            sigma_obs.reshape(y.shape[0], -1), y.reshape(y.shape[0], -1)
Ejemplo n.º 9
0
    def augment_data(self, augmented_data):
        """
        Augment the data with auxiliary variables
        :param augmented_data:
        :return:
        """
        x = augmented_data["x"]
        T, K = x.shape
        assert K == self.K

        augmented_data["kappa"] = kappa_vec(x)
        augmented_data["omega"] = np.ones((T,K-1))

        self.resample_omega([augmented_data])

        return augmented_data
Ejemplo n.º 10
0
    def conditional_psi(self, x):
        """
        Compute the conditional distribution over psi given observation x and omega
        :param x:
        :return:
        """
        assert x.ndim == 2
        Omega = np.diag(self.omega)
        Sigma_cond = inv(Omega + inv(self.Sigma))

        # kappa is the mean dot precision, i.e. the sufficient statistic of a Gaussian
        # therefore we can sum over datapoints
        kappa = kappa_vec(x).sum(0)
        mu_cond = Sigma_cond.dot(kappa +
                                 solve(self.Sigma, self.mu))

        return mu_cond, Sigma_cond
Ejemplo n.º 11
0
Archivo: gp.py Proyecto: yinsenm/pgmult
    def add_data(self, Z, X, fixed_kernel=True):
        # Z is the array of points where multinomial vectors are observed
        # X is the corresponding set of multinomial vectors
        assert Z.ndim == 2 and Z.shape[1] == self.D
        M = Z.shape[0]
        assert X.shape == (M, self.K), "X must be MxK"

        # Compute kappa and N for each of the m inputs
        N = N_vec(X).astype(np.float)
        kappa = kappa_vec(X)

        # Initialize the auxiliary variables
        omega = np.ones((M, self.K-1))

        # Initialize a "sample" from psi
        psi = np.zeros((M, self.K-1))

        # Precompute the kernel for the case where it is fixed
        if fixed_kernel:
            C = self.kernel.K(Z)
            C += 1e-6 * np.eye(M)
            C_inv = np.linalg.inv(C)
        else:
            C = None
            C_inv = None

        # Pack all this up into a dict
        augmented_data = \
        {
            "X":        X,
            "Z":        Z,
            "M":        M,
            "N":        N,
            "C":        C,
            "C_inv":    C_inv,
            "kappa":    kappa,
            "omega":    omega,
            "psi":      psi
        }
        self.data_list.append(augmented_data)
        return augmented_data