def resample_omega(self, augmented_data_list): """ Resample omega from its conditional Polya-gamma distribution :return: """ K = self.K for data in augmented_data_list: x = data["x"] T = data["T"] # TODO: Fix this hack if "z" in data: z = data["z"] elif "states" in data: z = data["states"].stateseq else: raise Exception("Could not find latent states in augmented data!") psi = z.dot(self.C.T) + self.mu[None, :] N = N_vec(x).astype(np.float) tmp_omg = np.zeros(N.size) ppg.pgdrawvpar(self.ppgs, N.ravel(), psi.ravel(), tmp_omg) data["omega"] = tmp_omg.reshape((T, self.K-1)) # Clip out zeros data["omega"] = np.clip(data["omega"], 1e-8,np.inf)
def _info_form_heldout_log_likelihood(self, X, M=10): """ We can analytically integrate out z (latent states) given omega. To estimate the heldout log likelihood of a data sequence, we Monte Carlo integrate over omega, where omega is drawn from the prior. :param data: :param M: number of Monte Carlo samples for integrating out omega :return: """ # assert len(self.data_list) == 1, "TODO: Support more than 1 data set" T, K = X.shape assert K == self.K kappa = kappa_vec(X) N = N_vec(X) # Compute the data-specific normalization constant from the # augmented multinomial distribution Z_mul = (gammaln(N + 1) - gammaln(X[:, :-1] + 1) - gammaln(N - X[:, :-1] + 1)).sum() Z_mul += (-N * np.log(2.)).sum() # Monte carlo integrate wrt omega ~ PG(N, 0) import pypolyagamma as ppg hlls = np.zeros(M) for m in range(M): # Sample omega using the emission distributions samplers omega = np.zeros(N.size) ppg.pgdrawvpar(self.emission_distn.ppgs, N.ravel(), np.zeros(N.size), omega) omega = omega.reshape((T, K - 1)) # Exactly integrate out the latent states z using message passing # The "data" is the normal potential from the states = MultinomialLDSStates(model=self, data=X) conditional_mean = kappa / np.clip( omega, 1e-64, np.inf) - self.emission_distn.mu[None, :] conditional_prec = np.zeros((T, K - 1, K - 1)) for t in range(T): conditional_prec[t, :, :] = np.diag(omega[t, :]) Z_lds = states.info_log_likelihood(conditional_mean, conditional_prec) # Sum them up to get the heldout log likelihood for this omega hlls[m] = Z_mul + Z_lds # Now take the log of the average to get the log likelihood hll = logsumexp(hlls) - np.log(M) # Use bootstrap to compute error bars samples = np.random.choice(hlls, size=(100, M), replace=True) hll_samples = logsumexp(samples, axis=1) - np.log(M) std_hll = hll_samples.std() return hll, std_hll
def _info_form_heldout_log_likelihood(self, X, M=10): """ We can analytically integrate out z (latent states) given omega. To estimate the heldout log likelihood of a data sequence, we Monte Carlo integrate over omega, where omega is drawn from the prior. :param data: :param M: number of Monte Carlo samples for integrating out omega :return: """ # assert len(self.data_list) == 1, "TODO: Support more than 1 data set" T, K = X.shape assert K == self.K kappa = kappa_vec(X) N = N_vec(X) # Compute the data-specific normalization constant from the # augmented multinomial distribution Z_mul = (gammaln(N + 1) - gammaln(X[:,:-1]+1) - gammaln(N-X[:,:-1]+1)).sum() Z_mul += (-N * np.log(2.)).sum() # Monte carlo integrate wrt omega ~ PG(N, 0) import pypolyagamma as ppg hlls = np.zeros(M) for m in range(M): # Sample omega using the emission distributions samplers omega = np.zeros(N.size) ppg.pgdrawvpar(self.emission_distn.ppgs, N.ravel(), np.zeros(N.size), omega) omega = omega.reshape((T, K-1)) # Exactly integrate out the latent states z using message passing # The "data" is the normal potential from the states = MultinomialLDSStates(model=self, data=X) conditional_mean = kappa / np.clip(omega, 1e-64,np.inf) - self.emission_distn.mu[None, :] conditional_prec = np.zeros((T, K-1, K-1)) for t in range(T): conditional_prec[t,:,:] = np.diag(omega[t,:]) Z_lds = states.info_log_likelihood(conditional_mean, conditional_prec) # Sum them up to get the heldout log likelihood for this omega hlls[m] = Z_mul + Z_lds # Now take the log of the average to get the log likelihood hll = logsumexp(hlls) - np.log(M) # Use bootstrap to compute error bars samples = np.random.choice(hlls, size=(100, M), replace=True) hll_samples = logsumexp(samples, axis=1) - np.log(M) std_hll = hll_samples.std() return hll, std_hll
def resample_omega(self, x): """ Resample omega from its conditional Polya-gamma distribution :return: """ assert x.ndim == 2 N = N_vec(x) # Sum the N's (i.e. the b's in the denominator) NN = N.sum(0).astype(np.float) ppg.pgdrawvpar(self.ppgs, NN, self.psi, self.omega)
def resample_omega(self): pgdrawvpar( self.ppgs, N_vec(self.time_word_topic_counts, axis=1).astype('float64').ravel(), self.psi.ravel(), self.omega.ravel()) np.clip(self.omega, 1e-32, np.inf, out=self.omega)
def _resample_X(): pis = model.pi(data) X = np.array([np.random.multinomial(N_max, pis[m]) for m in xrange(M)]) N = N_vec(X).astype(np.float) kappa = kappa_vec(X) data["X"] = X data["N"] = N data["kappa"] = kappa
def add_data(self, Z, X, fixed_kernel=True): # Z is the array of points where multinomial vectors are observed # X is the corresponding set of multinomial vectors assert Z.ndim == 2 and Z.shape[1] == self.D M = Z.shape[0] assert X.shape == (M, self.K), "X must be MxK" # Compute kappa and N for each of the m inputs N = N_vec(X).astype(np.float) kappa = kappa_vec(X) # Initialize the auxiliary variables omega = np.ones((M, self.K-1)) # Initialize a "sample" from psi psi = np.zeros((M, self.K-1)) # Precompute the kernel for the case where it is fixed if fixed_kernel: C = self.kernel.K(Z) C += 1e-6 * np.eye(M) C_inv = np.linalg.inv(C) else: C = None C_inv = None # Pack all this up into a dict augmented_data = \ { "X": X, "Z": Z, "M": M, "N": N, "C": C, "C_inv": C_inv, "kappa": kappa, "omega": omega, "psi": psi } self.data_list.append(augmented_data) return augmented_data