def log_likelihood(self): ll = 0 for states in self.states_list: psi = states.stateseq.dot(self.C.T) + self.mu pi = ln_psi_to_pi(psi) ll += np.sum(states.data * np.log(pi)) return ll
def lda_initializer(model): T, V = model.T, model.V model.beta = np.exp(np.loadtxt('ctm-out/000-log-beta.dat') .reshape((-1,V))).T lmbda = np.loadtxt('ctm-out/000-lambda.dat').reshape((-1,T)) nonempty_docs = np.asarray(model.data.sum(1) > 0).ravel() model.theta[nonempty_docs] = ln_psi_to_pi(lmbda) model.resample_z() return model
def log_joint_C(C): ll = 0 for states in self.states_list: z = states.stateseq psi = z.dot(C.T) + self.mu pi = ln_psi_to_pi(psi) # TODO: Generalize for multinomial ll += np.nansum(states.data * np.log(pi)) ll += (-0.5*C**2/self.sigma_C).sum() return ll
def compute_pred_likelihood(model, samples, test): Z_pred = get_inputs(test) preds = [] for sample in samples: model.set_sample(sample) preds.append(model.predict(Z_pred, full_output=True)[1]) psi_pred_mean = np.mean(preds, axis=0) if isinstance(model, pgmult.gp.MultinomialGP): pi_pred_mean = np.array([psi_to_pi(psi) for psi in psi_pred_mean]) elif isinstance(model, pgmult.gp.LogisticNormalGP): from pgmult.internals.utils import ln_psi_to_pi pi_pred_mean = np.array([ln_psi_to_pi(psi) for psi in psi_pred_mean]) else: raise NotImplementedError pll_gp = gammaln(test.data.sum(axis=1)+1).sum() - gammaln(test.data+1).sum() pll_gp += np.nansum(test.data * np.log(pi_pred_mean)) return pll_gp
def predictive_log_likelihood(self, Xtest, data_index=0, Npred=100): """ Hacky way of computing the predictive log likelihood :param X_pred: :param data_index: :param M: :return: """ Tpred = Xtest.shape[0] # Sample particle trajectories preds = self.states_list[data_index].sample_predictions(Tpred, Npred) preds = np.transpose(preds, [2,0,1]) assert preds.shape == (Npred, Tpred, self.n) psis = np.array([pred.dot(self.C.T) + self.mu for pred in preds]) pis = np.array([ln_psi_to_pi(psi) for psi in psis]) # TODO: Generalize for multinomial lls = np.zeros(Npred) for m in xrange(Npred): # lls[m] = np.sum( # [Multinomial(weights=pis[m,t,:], K=self.p).log_likelihood(Xtest[t][None,:]) # for t in xrange(Tpred)]) lls[m] = np.nansum(Xtest * np.log(pis[m])) # Compute the average hll = logsumexp(lls) - np.log(Npred) # Use bootstrap to compute error bars samples = np.random.choice(lls, size=(100, Npred), replace=True) hll_samples = logsumexp(samples, axis=1) - np.log(Npred) std_hll = hll_samples.std() return hll, std_hll
def pi(self): psi = self.stateseq.dot(self.C.T) return ln_psi_to_pi(psi)
nonempty_docs = np.asarray(model.data.sum(1) > 0).ravel() model.theta[nonempty_docs] = ln_psi_to_pi(lmbda) model.resample_z() return model fit_lda_gibbs = sampler_fitter( 'fit_lda_gibbs', StandardLDA, 'resample', lda_initializer) fit_lda_collapsed = sampler_fitter( 'fit_lda_collapsed', StandardLDA, 'resample_collapsed', lda_initializer) fit_lnctm_gibbs = sampler_fitter( 'fit_lnctm_gibbs', LogisticNormalCorrelatedLDA, 'resample', make_ctm_initializer(lambda lmbda: lmbda)) fit_sbctm_gibbs = sampler_fitter( 'fit_sbctm_gibbs', StickbreakingCorrelatedLDA, 'resample', make_ctm_initializer(lambda lmbda: pi_to_psi(ln_psi_to_pi(lmbda)))) ######################## # inspecting results # ######################## def plot_sb_interpretable_results(sb_results, words): nwords = 5 Sigma = sb_results[-1][-1] T = Sigma.shape[0] def get_topwords(topic): return words[np.argsort(sb_results[-1][0][:,topic])[-nwords:]] lim = np.abs(Sigma).max()
def theta(self): return ln_psi_to_pi(self.psi)