def sample_aux_vars(betas, num_matches, time_range, covariates=None): pg = PyPolyaGamma() if covariates is None: covariates = identity_matrix(len(betas)) if covariates.ndim == 2: num_players = len(covariates) aux_vars = [ np.matrix([ [ pg.pgdraw(num_matches[t][i, j], (covariates[i] - covariates[j]).dot(betas[t])) #entries for j in range(num_players) # columns ] for i in range(num_players) # rows ]) for t in time_range # index of matrix-list ] else: num_players = len(covariates[0]) aux_vars = [ np.matrix([ [ pg.pgdraw(num_matches[t][i, j], (covariates[t][i] - covariates[t][j]).dot( betas[t])) #entries for j in range(num_players) # columns ] for i in range(num_players) # rows ]) for t in time_range # index of matrix-list ] return aux_vars
def logisticAndReject(self, X, Y): pg = PyPolyaGamma() # use N(0, I) prior n = X.shape[0] # Output layer #out_fit = LinearRegression(fit_intercept = False).fit(self.layers[self.nlayer-1].h, Y) #self.layers[self.nlayer].W = out_fit.coef_ prior = np.random.normal(0, 1, size=self.hid_dim) w = np.zeros(n) for k in range(n): w[k] = pg.pgdraw( 1, np.dot(self.layers[self.nlayer - 1].h[k, :], prior)) kappa = self.layers[self.nlayer].h[:, 0] - 0.5 omega = np.diag(w) Vw = np.linalg.inv( np.dot(np.dot(np.transpose(self.layers[self.nlayer].h), omega), self.layers[self.nlayer].h) + 1)[0] mw = Vw * np.dot(np.transpose(self.layers[self.nlayer].h), kappa)[0] self.layers[self.nlayer].W[:, 0] = np.random.normal(mw, Vw) # Hidden layers for l in range(self.nlayer - 1, 0, -1): for j in range(self.hid_dim): # Draw prior beta curr = np.random.normal(0, 1, size=self.hid_dim) for t in range(self.mc_iter): # Draw latent w w = np.zeros(n) for k in range(n): w[k] = pg.pgdraw( 1, np.dot(self.layers[l - 1].h[k, :], curr)) # Draw posterior beta kappa = self.layers[l].h[:, j] - 0.5 omega = np.diag(w) Vw = np.linalg.inv( np.dot(np.dot(np.transpose(self.layers[l].h), omega), self.layers[l].h) + np.eye(self.hid_dim)) mw = np.dot(Vw, np.dot(np.transpose(self.layers[l].h), kappa)) curr = np.random.multivariate_normal(mw, Vw) self.layers[l].W[:, j] = curr
def _smpl_fn(cls, rng, b, c, size): pg = PyPolyaGamma(rng.randint(2 ** 16)) if not size and b.shape == c.shape == (): return pg.pgdraw(b, c) else: b, c = np.broadcast_arrays(b, c) out_shape = b.shape + tuple(size or ()) smpl_val = np.empty(out_shape, dtype="double") b = np.tile(b, tuple(size or ()) + (1,)) c = np.tile(c, tuple(size or ()) + (1,)) pg.pgdrawv( np.asarray(b.flat).astype("double", copy=True), np.asarray(c.flat).astype("double", copy=True), np.asarray(smpl_val.flat), ) return smpl_val
def _sample_reference_posterior( self, num_samples: int, num_observation: Optional[int] = None, ) -> torch.Tensor: from pypolyagamma import PyPolyaGamma from tqdm import tqdm self.dim_data = 10 # stimulus_I = torch.load(self.path / "files" / "stimulus_I.pt") design_matrix = torch.load(self.path / "files" / "design_matrix.pt") true_parameters = self.get_true_parameters(num_observation) self.raw = True observation_raw = self.get_observation(num_observation) self.raw = False mcmc_num_samples_warmup = 25000 mcmc_thinning = 25 mcmc_num_samples = mcmc_num_samples_warmup + mcmc_thinning * num_samples pg = PyPolyaGamma() X = design_matrix.numpy() obs = observation_raw.numpy() Binv = self.prior_params["precision_matrix"].numpy() sample = true_parameters.numpy().reshape(-1) # Init at true parameters samples = [] for j in tqdm(range(mcmc_num_samples)): psi = np.dot(X, sample) w = np.array([pg.pgdraw(1, b) for b in psi]) O = np.diag(w) # noqa: E741 V = np.linalg.inv(np.dot(np.dot(X.T, O), X) + Binv) m = np.dot(V, np.dot(X.T, obs.reshape(-1) - 1 * 0.5)) sample = np.random.multivariate_normal(np.ravel(m), V) samples.append(sample) samples = np.asarray(samples).astype(np.float32) samples_subset = samples[mcmc_num_samples_warmup::mcmc_thinning, :] reference_posterior_samples = torch.from_numpy(samples_subset) return reference_posterior_samples
def rng_fn(cls, rng, b, c, size): pg = PyPolyaGamma(rng.randint(2**16)) if not size and b.shape == c.shape == (): return pg.pgdraw(b, c) else: b, c = np.broadcast_arrays(b, c) size = tuple(size or ()) if len(size) > 0: b = np.broadcast_to(b, size) c = np.broadcast_to(c, size) smpl_val = np.empty(b.shape, dtype="double") pg.pgdrawv( np.asarray(b.flat).astype("double", copy=True), np.asarray(c.flat).astype("double", copy=True), np.asarray(smpl_val.flat), ) return smpl_val
class GibbSampler(SGCP_Sampler): def __init__(self, *args, **kwargs): super(GibbSampler, self).__init__(*args, **kwargs) self.pg = PyPolyaGamma(seed=np.random.randint(2 ** 16, size=None)) def run(self): print('Starting Gibbs') latent_events = np.random.rand(self.M, self.dim) * self.diff latent_marks = np.random.rand(self.M, 1) * 2 ** 10 # distribute on space marks = np.random.rand(self.N, 1) * 2 ** 10 # distribute on space start = time.time() for k in range(self.maxiter): if k == 1: loop_start = time.time() if k == 2: print('Approximately %.2f min to go' % (loop * self.maxiter / 60)) if self.inducing_points is not None: if k == 0: self.events_base = self.inducing_points self.K = self.cov_function(self.events_base, self.events_base, self.kernelparameter) self.K += np.eye(len(self.K)) * self.noise self.L = np.linalg.cholesky(self.K) self.L_inv = np.linalg.solve(self.L, np.eye(self.L.shape[0])) self.K_inv = self.L_inv.T @ self.L_inv self.sample_upper_bound(latent_marks.shape[0]) self.sample_gaussian_induced(latent_events, marks, latent_marks) if self.sample_kernel_parameter: if (k % 10) == 0: self.sample_kernelparameter() intensity = self.sample_results() latent_events, g_M, g_N = self.sample_latent_events_induced() latent_marks = self.sample_latent_marks(g_M) marks = self.sample_marks(g_N) else: self.events_base = np.concatenate((self.observed_events, latent_events), axis=0) self.K = self.cov_function(self.events_base, self.events_base, self.kernelparameter) self.K += np.eye(len(self.K)) * self.noise self.L = np.linalg.cholesky(self.K) self.L_inv = np.linalg.solve(self.L, np.eye(self.L.shape[0])) self.K_inv = self.L_inv.T @ self.L_inv self.sample_upper_bound(latent_marks.shape[0]) self.sample_gaussian(marks, latent_marks) if self.sample_kernel_parameter: if (k % 10) == 0: self.sample_kernelparameter() # updates the kernels intensity = self.sample_results() latent_events, g_M = self.sample_latent_events() latent_marks = self.sample_latent_marks(g_M) marks = self.sample_marks(np.array(self.g[:self.N, :])) if ((k > 0) or (k == self.maxiter - 1)) and (k % 50 == 0): if self.inducing_points is not None: print('%d with inducing points' % k) else: print(k) self.llambdas[k] = self.upper_bound self.latent_M[k] = latent_marks.shape[0] self.intensities[k, :] = intensity # self.log_likelihoods[k, :] = log_likelihood if k == 1: loop = time.time() - loop_start self.time = (time.time() - start) / 60 print('Done in %.2f min' % self.time) self.mean_intensities = np.mean(self.intensities[self.burnin:], axis=0) ###################################################################### def sample_gaussian_induced(self, latent_events, marks, latent_marks): L_ind = len(self.inducing_points) kN = self.cov_function(self.inducing_points, self.observed_events, self.kernelparameter) kM = self.cov_function(self.inducing_points, latent_events, self.kernelparameter) BN = kN[np.newaxis, ::] * kN[::, np.newaxis] # (L,L,N) BM = kM[np.newaxis, ::] * kM[::, np.newaxis] # (L,L,M) wN = np.repeat(marks, L_ind, axis=1) wN = np.repeat(wN[:, :, np.newaxis], L_ind, axis=2).T wM = np.repeat(latent_marks, L_ind, axis=1) wM = np.repeat(wM[:, :, np.newaxis], L_ind, axis=2).T B = np.sum(BN * wN, axis=2) + np.sum(BM * wM, axis=2) BLinv = np.linalg.solve(B + self.K, np.eye(L_ind)) sigmaL = self.K @ BLinv @ self.K muL = 0.5 * self.K @ BLinv @ (np.sum(kN, axis=1, keepdims=True) - np.sum(kM, axis=1, keepdims=True)) self.g = Utils.sample_gaussian(muL, sigmaL) # + np.eye(L_ind) * self.noise) def sample_latent_events_induced(self): xx = 0 while (xx == 0): latent_events, g_J, g_N = self.sample_latent_process_induced() xx = len(latent_events) return latent_events, g_J, g_N def sample_latent_process_induced(self): J = np.random.poisson(lam=self.vol * self.upper_bound, size=None) # nb_events events = np.random.rand(J, self.dim) * self.diff g = self.sample_cond(np.concatenate((events, self.observed_events), axis=0)) g_J = np.array(g[:len(events)]) g_N = np.array(g[len(events):]) R = np.random.rand(J) * self.upper_bound idx = R < self.upper_bound * SGCP_Sampler.sigmoid(-g_J.flatten()) acc_events = events[idx, :] return acc_events, g_J[idx, :], g_N ###################################################################### def sample_gaussian(self, marks, latent_marks): M = latent_marks.shape[0] marks_concat = np.concatenate((marks, latent_marks), axis=0) sigma = np.diag(1. / marks_concat.flatten()) sigma_NM = sigma - sigma @ np.linalg.solve(sigma + self.K, np.eye(self.N + M)) @ sigma u = np.concatenate((np.full((self.N, 1), 1. / 2, ), np.full((M, 1), -1. / 2)), axis=0) mean_NM = sigma_NM @ u self.g = Utils.sample_gaussian(mean_NM, sigma_NM) # + np.eye(sigma_NM.shape[0]) * self.noise) def sample_latent_process(self): J = np.random.poisson(lam=self.vol * self.upper_bound, size=None) # nb_events events = np.random.rand(J, self.dim) * self.diff g_J = self.sample_cond(events) R = np.random.rand(J) * self.upper_bound idx = R < self.upper_bound * SGCP_Sampler.sigmoid(-g_J.flatten()) acc_events = events[idx, :] return acc_events, g_J[idx, :] def sample_latent_events(self): xx = 0 while (xx == 0): latent_events, g_J = self.sample_latent_process() xx = len(latent_events) return latent_events, g_J ###################################################################### def sample_upper_bound(self, M): self.upper_bound = np.random.gamma(shape=self.alpha + M + self.N, scale=1. / (self.beta + self.vol)) def sample_latent_marks(self, g_M): M = g_M.shape[0] latent_marks = np.zeros([M, 1]) for i in range(M): latent_marks[i, :] = self.pg.pgdraw(1, g_M[i, :]) return latent_marks def sample_kernelparameter(self): prop = np.random.randn(self.dim + 1) alpha = np.exp(np.log(self.kernelparameter[0]) + prop[0] * 0.05) beta = np.exp(np.log(self.kernelparameter[1]) + prop[1:] * 0.05) proposal = [alpha, beta] K = self.cov_function(self.events_base, self.events_base, proposal) K += np.eye(K.shape[0]) * self.noise L = np.linalg.cholesky(K) L_inv = np.linalg.solve(L, np.eye(L.shape[0])) K_inv = L_inv.T @ L_inv prop = - np.sum(np.log(L.diagonal())) - 0.5 * self.g.T @ K_inv @ self.g old = - np.sum(np.log(self.L.diagonal())) - 0.5 * self.g.T @ self.K_inv @ self.g A = min(0, np.asscalar(prop - old)) u = np.log(np.random.rand()) if u < A: self.K = K self.L = L self.L_inv = L_inv self.K_inv = K_inv self.kernelparameter = proposal print(self.kernelparameter) def predict(self, Xtest): # predict unknown function values of Xtest C = self.cov_function(self.events_base, Xtest, self.kernelparameter) K_test = self.cov_function(Xtest, Xtest, self.kernelparameter) mean_predict = C.T @ self.K_inv @ self.g cov_predict = K_test - C.T @ self.K_inv @ C return mean_predict, cov_predict # posterior mean and covariance def sample_cond(self, Xtest): mean, cov = self.predict(Xtest) tmp = Utils.sample_gaussian(mean, cov + np.eye(cov.shape[0]) * self.noise) return tmp def sample_marks(self, g_N): marks = np.zeros([self.N, 1]) for i in range(self.N): marks[i, :] = self.pg.pgdraw(1, g_N[i, :]) return marks def sample_results(self): self.events.append(self.events_base) self.gaussians.append(self.g) g = self.sample_cond(self.grid_events) return self.upper_bound * SGCP_Sampler.sigmoid(g.flatten())
p_true = logistic(x_true) y = npr.binomial(N, p_true) # Gibbs sample the posterior distribution p(x | y) # Introduce PG(N,0) auxiliary variables to render # the model conjugate. First, initialize the PG # sampler and the model parameters. N_samples = 10000 pg = PyPolyaGamma(seed=0) xs = np.zeros(N_samples) omegas = np.ones(N_samples) # Now run the Gibbs sampler for i in range(1, N_samples): # Sample omega given x, y from its PG conditional omegas[i] = pg.pgdraw(N, xs[i-1]) # Sample x given omega, y from its Gaussian conditional sigmasq_hat = 1./(1. / sigmasq + omegas[i]) mu_hat = sigmasq_hat * (mu / sigmasq + (y - N / 2.)) xs[i] = npr.normal(mu_hat, np.sqrt(sigmasq_hat)) # Compute the true posterior density xx = np.linspace(x_true-3., x_true+3, 1000) prior = 1./np.sqrt(2 * np.pi * sigmasq) * np.exp(-0.5 * (xx - mu)**2 / sigmasq) lkhd = logistic(xx) ** y * (1-logistic(xx))**(N-y) post = prior * lkhd post /= np.trapz(post, xx) # Plot the results plt.figure(figsize=(5,4))
def pg_mcmc(true_params, obs, duration=100, dt=1, seed=None, prior_dist=None): """Polya-Gamma sampler for GLM Returns ------- array : samples from posterior """ if prior_dist is None: prior_dist = smoothing_prior(n_params=true_params.size, seed=seed) # seeding np.random.seed(seed) pg = PyPolyaGamma() # seed=seed # observation I = obs['I'].reshape(1,-1) S_obs = obs['data'].reshape(-1) # simulation protocol num_param_inf = len(true_params) dt = 1 t = np.arange(0, duration, dt) N = 1 # Number of trials M = num_param_inf-1 # Length of the filter # build covariate matrix X, such that X * h returns convolution of x with filter h X = np.zeros(shape=(len(t), M)) for j in range(M): X[j:,j] = I[0,0:len(t)-j] # prior # smoothing prior on h; N(0, 1) on b0. Smoothness encouraged by penalyzing # 2nd order differences of elements of filter #prior_dist = prior(n_params=true_params.size, seed=seed) Binv = prior_dist.P # The sampler consists of two iterative Gibbs updates # 1) sample auxiliary variables: w ~ PG(N, psi) # 2) sample parameters: beta ~ N(m, V); V = inv(X'O X + Binv), m = V*(X'k), k = y - N/2 nsamp = 500000 # samples to evaluate the posterior # add a column of 1s to the covariate matrix X, in order to model the offset too X = np.concatenate((np.ones(shape=(len(t), 1)), X), axis=1) beta = true_params*1. BETA = np.zeros((M+1,nsamp)) for j in tqdm(range(1, nsamp)): psi = np.dot(X, beta) w = np.array([pg.pgdraw(N, b) for b in psi]) O = np.diag(w) V = np.linalg.inv(np.dot(np.dot(X.T, O), X) + Binv) m = np.dot(V, np.dot(X.T, S_obs - N * 0.5)) beta = np.random.multivariate_normal(np.ravel(m), V) BETA[:,j] = beta # burn-in burn_in = 100000 BETA_sub_samp = BETA[:, burn_in:nsamp:30] # return sampling results return BETA_sub_samp
class WEIFTM(): NO_TOPIC = -1 def __init__(self, n_topics, alpha_0=.1, beta_0=.01, sig_0=1, topic_sparsity=.3, delta_0=1): self.n_topics = n_topics self.alpha_0 = alpha_0 self.beta_0 = beta_0 self.sig_0 = sig_0 self.topic_sparsity = topic_sparsity self.delta_0 = delta_0 self.log_likelihoods = [] self.accuracies = [] def get_documents_from_directory(self, directory_path): self.labels = {} count = 0 class_count = -1 classes = set() documents = [] for (path, dirs, files) in os.walk(directory_path): files.sort() cl = path.strip(os.path.sep).split(os.path.sep)[-1] for file_path in files: if file_path.endswith('.txt'): document_path = os.path.join(path, file_path) try: file = open(document_path, 'r') document = file.read() file.close() documents.append(document) if cl not in classes: classes.add(cl) class_count += 1 self.labels[count] = class_count count += 1 except Exception as e: print(e) return documents def get_documents_from_csv(self, csv_path, text_name="text", class_name="class"): with open(csv_path, 'r', encoding='utf8', errors='ignore') as csv_file: dataframe = pd.read_csv(StringIO(csv_file.read())) # dataframe = dataframe.iloc[np.random.permutation(dataframe.shape[0])[:10]] # dataframe = dataframe.reset_index() dataframe = dataframe.fillna(value={class_name: ''}) dataframe[class_name] = LabelEncoder().fit_transform( dataframe[class_name]) self.labels = dict(dataframe[class_name]) return list(dataframe[text_name]) def get_embedding_vocabulary(self, embedding_path): vocabulary = set() with open(embedding_path) as emb_file: for line in emb_file: if line != "": word = line.strip().split(" ", 1)[0] vocabulary.add(word) return vocabulary def load_corpus(self, documents, vocabulary, custom_stop_words=[]): preprocessed_documents = preprocess_tweets(documents, vocabulary, custom_stop_words) self.dictionary = corpora.Dictionary(preprocessed_documents) self.n_words = len(self.dictionary) self.corpus = [ self.dictionary.doc2bow(document) for document in preprocessed_documents ] self.n_documents = len(self.corpus) def load_embeddings(self, embedding_size, embedding_path, corpus_dir, use_pca=False, pca_var=.97): self.embedding_size = embedding_size cache_dir = "./cache/{}/".format( corpus_dir.strip(os.path.sep).strip('.csv').split(os.path.sep)[-1]) embedding_cache_path = cache_dir + "embedding{}.npy".format( embedding_size) if os.path.isfile(embedding_cache_path): self.f = np.load(embedding_cache_path) else: vocabulary = set(self.dictionary.values()) self.f = np.empty((self.n_words, self.embedding_size)) with open(embedding_path) as emb_file: for line in emb_file: if line != "": word, str_embedding = line.strip().split(" ", 1) if word in vocabulary: word_index = self.dictionary.token2id[word] self.f[word_index] = np.array( str_embedding.split(" "), dtype=float) if not os.path.isdir(cache_dir): os.makedirs(cache_dir) np.save(embedding_cache_path, self.f) if use_pca == True: self._embedding_PCA(pca_var) self.f_outer = np.array([np.outer(f_v, f_v) for f_v in self.f]) def _embedding_PCA(self, var_percent): self.pca = PCA(self.embedding_size) self.f_raw = self.f self.pca.fit(self.f_raw) n_components = np.argmax( np.cumsum(self.pca.explained_variance_ratio_) > var_percent) self.f = self.pca.transform(self.f_raw)[:, :n_components] self.embedding_size_raw = self.embedding_size self.embedding_size = n_components def initialize_parameters(self): self._init_b() self._init_n_m_Z() self._init_lamb() self._init_c() self._init_pi() self._init_embedding_aux_params() def _init_b(self): self.b = np.random.binomial(1, self.topic_sparsity, (self.n_topics, self.n_words)) self.b_sum_ax1 = np.sum(self.b, axis=1) def _init_n_m_Z(self): self.n = np.zeros((self.n_topics, self.n_words)) self.m = np.zeros((self.n_documents, self.n_topics)) self.Z = [] for document_index, document in enumerate(self.corpus): Z_document = [] for word_occurrence_tuple in document: word_index = word_occurrence_tuple[0] count = word_occurrence_tuple[1] for _ in range(count): nonzero_b = self.b[:, word_index].nonzero()[0] if len(nonzero_b) == 0: topic_assignment = WEIFTM.NO_TOPIC else: topic_assignment = np.random.choice(nonzero_b) self.n[topic_assignment, word_index] += 1 self.m[document_index, topic_assignment] += 1 Z_document.append([word_index, topic_assignment]) self.Z.append(Z_document) def _init_lamb(self): sig_I_lamb = self.sig_0**2 * np.eye(self.embedding_size) self.lamb = np.random.multivariate_normal(np.zeros( self.embedding_size), sig_I_lamb, size=self.n_topics) self.sig_I_lamb_inv = self.sig_0**-2 * np.eye(self.embedding_size) def _init_c(self): sig_I_c = self.sig_0**2 * np.eye(self.n_topics) self.c = np.random.multivariate_normal(np.zeros(self.n_topics), sig_I_c).reshape((-1, 1)) def _init_pi(self): self.pi = np.matmul(self.lamb, self.f.T) + self.c def _init_embedding_aux_params(self): self.pg = PyPolyaGamma() self.gamma = np.empty((self.n_topics, self.n_words)) self.gamma_sum_ax1 = np.zeros(self.n_topics) self.SIGMA_inv = np.empty( (self.n_topics, self.embedding_size, self.embedding_size)) self.b_cgam = np.empty((self.n_topics, self.n_words)) self.b_cgam_sum_ax1 = np.zeros(self.n_topics) self.MU = np.empty((self.n_topics, self.embedding_size)) for k in range(self.n_topics): for word_index in range(self.n_words): self.gamma[k, word_index] = self.pg.pgdraw( 1, self.pi[k, word_index]) self.gamma_sum_ax1[k] += self.gamma[k, word_index] self.SIGMA_inv[k] = np.matmul(self.f_outer.T, self.gamma[k]) + self.sig_I_lamb_inv self.b_cgam[k] = self.b[k] - .5 - self.c[k] * self.gamma[k] self.b_cgam_sum_ax1[k] = np.sum(self.b_cgam[k]) self.b_cgam_f = np.matmul(self.b_cgam, self.f) for k in range(self.n_topics): SIGMA_k = np.linalg.inv(self.SIGMA_inv[k]) self.MU[k] = np.matmul(SIGMA_k, self.b_cgam_f[k]) def train(self, iters=10): for i in range(iters): start_time = time.time() self._gibbs_sample() print("gibbs", time.time() - start_time) # start_time = time.time() self.log_likelihoods.append(self._compute_total_log_likelihood()) # print("log_likelihood", time.time() - start_time) self.accuracies.append(self.get_classification_accuracy()) return self.log_likelihoods, self.accuracies def _gibbs_sample(self): # gibbs_iter_time = time.time() for document_index, Z_document in enumerate(self.Z): document_length = len(Z_document) for token_index, Z_token_pair in enumerate(Z_document): # print("gibbs iter", time.time() - gibbs_iter_time) # gibbs_iter_time = time.time() # print(token_index, "/", document_length, document_index, "/", self.n_documents) word_index = Z_token_pair[0] topic_assignment = Z_token_pair[1] if topic_assignment != WEIFTM.NO_TOPIC: self.n[topic_assignment, word_index] -= 1 self.m[document_index, topic_assignment] -= 1 # start_time = time.time() self._sample_b(word_index) # print("sample_b", time.time() - start_time) # start_time = time.time() topic_assignment = self._sample_z(document_index, word_index) # print("sample_z", time.time() - start_time) Z_token_pair[1] = topic_assignment if topic_assignment != WEIFTM.NO_TOPIC: self.n[topic_assignment, word_index] += 1 self.m[document_index, topic_assignment] += 1 # start_time = time.time() self._sample_embeddings(word_index) # print("sample_embeddings", time.time() - start_time) def _sample_b(self, word_index): b_not_v = self.b_sum_ax1 - self.b[:, word_index] b_not_v[b_not_v == 0] += self.delta_0 b_not_v_beta = b_not_v * self.beta_0 num_a = b_not_v_beta + np.sum(self.n, axis=1) num_b = self.beta_0 num = beta_function(num_a, num_b) denom = beta_function(b_not_v_beta, self.beta_0) activation = sigmoid(self.pi[:, word_index]) p_1 = num * activation / denom p_0 = 1 - activation p = p_1 / (p_1 + p_0) self.b_sum_ax1 -= self.b[:, word_index] self.b[:, word_index] |= np.random.binomial(1, p) self.b_sum_ax1 += self.b[:, word_index] def _sample_z(self, document_index, word_index): if self.b[:, word_index].sum() == 0: topic_assignment = WEIFTM.NO_TOPIC else: p = (self.alpha_0 + self.m[document_index]) * ( self.n[:, word_index].flatten() + self.beta_0) / (self.n[:, word_index] + self.beta_0).sum() * self.b[:, word_index] p /= p.sum() topic_assignment = np.random.multinomial(1, p).argmax() return topic_assignment def _sample_embeddings(self, word_index): for k in range(self.n_topics): # sample gamma old_gamma_k_word_index = self.gamma[k, word_index] self.gamma[k, word_index] = self.pg.pgdraw(1, self.pi[k, word_index]) self.gamma_sum_ax1[k] += self.gamma[ k, word_index] - old_gamma_k_word_index # sample lamb self.SIGMA_inv[k] += ( self.gamma[k, word_index] - old_gamma_k_word_index) * self.f_outer[word_index] SIGMA_k = np.linalg.inv(self.SIGMA_inv[k]) old_b_cgam_k_word_index = self.b_cgam[k, word_index] self.b_cgam[k, word_index] = self.b[ k, word_index] - .5 - self.c[k] * self.gamma[k, word_index] self.b_cgam_sum_ax1[k] += self.b_cgam[ k, word_index] - old_b_cgam_k_word_index self.b_cgam_f[k] = self.b_cgam[k, word_index] * self.f[word_index] self.MU[k] = np.matmul(SIGMA_k, self.b_cgam_f[k]) self.lamb[k] = np.random.multivariate_normal(self.MU[k], SIGMA_k) # sample c sig_k = (self.gamma_sum_ax1[k] + self.sig_0**-2)**-1 mu_k = sig_k * self.b_cgam_sum_ax1[k] self.c[k] = np.random.normal(mu_k, sig_k) # update pi self.pi = np.matmul(self.lamb, self.f.T) + self.c def dirichlet_pdf_log(self, x, alpha): return np.sum(np.log(np.power(x, alpha - 1))) - np.sum( np.log(gamma_function(alpha))) + np.log( gamma_function(np.sum(alpha))) def _compute_total_log_likelihood(self): log_likelihood = 0 theta = self.get_theta() log_theta = np.log(theta) phi = self.get_phi() log_phi = np.log(phi) ALPHA = self.alpha_0 * np.ones(self.n_topics) for document_index in range(self.n_documents): # theta # log_likelihood += np.log(dirichlet.pdf(theta[document_index], ALPHA)) log_likelihood += self.dirichlet_pdf_log(theta[document_index], ALPHA) for token_index in range(len(self.Z[document_index])): word_index, topic_index = self.Z[document_index][token_index] if topic_index != WEIFTM.NO_TOPIC: # w log_likelihood += log_phi[topic_index, word_index] # z log_likelihood += log_theta[document_index, topic_index] log_likelihood += np.sum( np.log(bernoulli.pmf(self.b, sigmoid(self.pi)))) for k in range(self.n_topics): # phi b_k_nonzero = self.b[k].nonzero()[0] BETA = self.beta_0 * np.ones(b_k_nonzero.shape[0]) # log_likelihood += np.log(dirichlet.pdf(phi[k][b_k_nonzero], BETA)) log_likelihood += self.dirichlet_pdf_log(phi[k][b_k_nonzero], BETA) # c log_likelihood += np.log(norm.pdf(self.c[k], 0, self.sig_0)) for l in range(self.embedding_size): # lamb log_likelihood += np.log( norm.pdf(self.lamb[k, l], 0, self.sig_0)) return log_likelihood def get_phi(self): n_b = (self.n + self.beta_0) * self.b return n_b / n_b.sum(axis=1).reshape(-1, 1) def get_theta(self): return (self.m + self.alpha_0) / (self.m + self.alpha_0).sum( axis=1).reshape(-1, 1) def print_phi(self, n_words): phi = self.get_phi() for topic_index, topic, in enumerate(phi): labelled_probabilities = [(self.dictionary[word_index], prob) for word_index, prob in enumerate(topic)] sorted_probabilities = sorted(labelled_probabilities, key=lambda x: x[1], reverse=True)[:n_words] print('Topic {}:'.format(topic_index), sorted_probabilities) def print_theta(self): theta = self.get_theta() for document_index, document in enumerate(theta): print('Document {}:'.format(document_index), '; Label {}'.format(self.labels[document_index]), document) def get_classification_accuracy(self): theta = self.get_theta() predictions = [distribution.argmax() for distribution in theta] prediction_set = set(predictions) label_set = set(self.labels.values()) accuracies = [] if self.n_topics >= len(label_set): for tup in itertools.permutations(prediction_set, len(label_set)): count = 0. for index in self.labels: if tup[self.labels[index]] == predictions[index]: count += 1. accuracies.append(count / len(predictions)) else: for tup in itertools.permutations(label_set, self.n_topics): count = 0. for index in self.labels: if self.labels[index] == tup[predictions[index]]: count += 1. accuracies.append(count / len(predictions)) return max(accuracies) def plot(self, values, ylabel, path): title = path.strip(os.path.sep).strip('.csv').split(os.path.sep)[-1] plt.title(title) plt.xlabel('epoch') plt.ylabel(ylabel) plt.plot(values) plt.show() def __getstate__(self): state = self.__dict__.copy() state.pop("pg") return state def __setstate__(self, state): self.__dict__.update(state) def save(self, path): pickle.dump(self, open(path, "wb")) @staticmethod def load(path): return pickle.load(open(path, "rb"))
def s_blk(g_num, b_mu_ll, q_mu, b_v, q_v, b_mat_mu, q_arr, b_mu_lk, b_mat_v, ob, g_ij, z_i): #sample b_lk q n_lk, n_lk1, m_l, m_l1 = get_nlk(ob, g_num, g_ij, z_i) for l in range(g_num): for k in range(l, g_num): samplenum = 100 if l == k: b = np.zeros((samplenum, 2)) b[0, 0] = b_mu_ll b[0, 1] = q_mu mu = np.array([b_mu_ll, q_mu]) var = np.array(([b_v, 0], [0, q_v])) pg = PyPolyaGamma(seed=0) omegas = np.ones(2) x = np.array(([1, 0], [1, 1])) k_arr = np.array( [n_lk1[l, l] - n_lk[l, l] / 2, m_l1[l] - m_l[l] / 2]) for t in range(1, samplenum): omegas[0] = pg.pgdraw(n_lk[l, l], b[t - 1, 0]) omegas[1] = pg.pgdraw(m_l[l], np.sum(b[t - 1, :])) omega = np.array(([omegas[0], 0], [0, omegas[1]])) v = inv( np.dot(np.dot(np.transpose(x), omega), x) + inv(var)) m = np.dot( v, np.dot(np.transpose(x), np.transpose(k_arr)) + np.dot(inv(var), mu)) s = npr.multivariate_normal(m, v) b[t, 0] = np.copy(s[0]) b[t, 1] = np.copy(s[1]) b_mat_mu[l, l] = np.sum(b[50:samplenum, 0]) / (samplenum - 50) q_arr[l] = np.sum(b[50:samplenum, 1]) / (samplenum - 50) else: b = np.zeros((samplenum, 2)) b[0, 0] = b_mu_lk b[0, 1] = b_mu_lk mu = np.array([b_mu_lk, b_mu_lk]) var = np.copy(b_mat_v[:, :, l, k]) pg = PyPolyaGamma(seed=0) omegas = np.ones(2) k_arr = np.array([ n_lk1[l, k] - n_lk[l, k] / 2, n_lk1[k, l] - n_lk[k, l] / 2 ]) x = np.array(([1, 0], [0, 1])) for t in range(1, samplenum): omegas[0] = pg.pgdraw(n_lk[l, k], b[t - 1, 0]) omegas[1] = pg.pgdraw(n_lk[k, l], b[t - 1, 1]) omega = np.array(([omegas[0], 0], [0, omegas[1]])) v = inv( np.dot(np.dot(np.transpose(x), omega), x) + inv(var)) m = np.dot( v, np.dot(np.transpose(x), np.transpose(k_arr)) + np.dot(inv(var), mu)) s = npr.multivariate_normal(m, v) b[t, 0] = np.copy(s[0]) b[t, 1] = np.copy(s[1]) b_mat_mu[l, k] = np.sum(b[50:samplenum, 0]) / (samplenum - 50) b_mat_mu[k, l] = np.sum(b[50:samplenum, 1]) / (samplenum - 50)