def gen_alias_table(self, MH_max, perplexity=False): # here the phi is [:,4w] if perplexity: phi = self.theta[:, self.test_doc[2]] / self.norm_const # samples has shape (w, 1e3 * I) samples = util_funcs.gen_obj(phi.shape[1]) util_funcs.gen_alias_table(table_h=self.table_h, table_l=self.table_l, table_p=self.table_p, phi=phi / np.sum(phi, 0), batch_mask=self.test_doc[2], w_sample=self.test_doc[3], samples=samples, iter_per_update=self.samples_per_update, MH_max=MH_max) else: cul_time = time.time() tmp = self.theta[:, self.current_set[2]] self.time_bak += time.time() - cul_time - 1.5 * self.apprx * (tmp.shape[0] * tmp.shape[1]) / 1e9 phi = tmp / self.norm_const; tmp = None; collect() if self.w4_cnt is not None: util_funcs.kill_obj(self.w4_cnt, self.samples) self.w4_cnt = phi.shape[1] # samples has shape (w, 1e3 * I) samples = util_funcs.gen_obj(phi.shape[1]) util_funcs.gen_batch_map(self.current_set[2], self.batch_map_4w, self.W) util_funcs.gen_alias_table(table_h=self.table_h, table_l=self.table_l, table_p=self.table_p, phi=phi / np.sum(phi, 0), batch_mask=self.current_set[2], w_sample=self.current_set[1], samples=samples, iter_per_update=self.samples_per_update, MH_max=MH_max) return samples, phi
def gen_alias_table(self, MH_max, theta, norm_const, perplexity=False): """ generate alias table for fast per-token sampling """ # here the phi is [:,4w] if perplexity: phi = theta[:, self.test_doc[2]] / norm_const # samples has shape (w, 1e3 * I) samples = util_funcs.gen_obj(phi.shape[1]) util_funcs.gen_alias_table(table_h=self.table_h, table_l=self.table_l, table_p=self.table_p, phi=phi / np.sum(phi, 0), batch_mask=self.test_doc[2], w_sample=self.test_doc[3], samples=samples, iter_per_update=self.samples_per_update, MH_max=MH_max) else: cul_time = time.time() tmp = theta[:, self.current_set[2]] self.time_bak += time.time() - cul_time - 1.5 * self.apprx * (tmp.shape[0] * tmp.shape[1]) / 1e9 phi = tmp / norm_const; tmp = None; collect() if self.w4_cnt is not None: util_funcs.kill_obj(self.w4_cnt, self.samples) self.w4_cnt = phi.shape[1] # samples has shape (w, 1e3 * I) samples = util_funcs.gen_obj(phi.shape[1]) util_funcs.gen_batch_map(self.current_set[2], self.batch_map_4w, self.W) util_funcs.gen_alias_table(table_h=self.table_h, table_l=self.table_l, table_p=self.table_p, phi=phi / np.sum(phi, 0), batch_mask=self.current_set[2], w_sample=self.current_set[1], samples=samples, iter_per_update=self.samples_per_update, MH_max=MH_max) return samples, phi
def get_perp_just_in_time(self, iter, MH_max): # *************************************** parameters ************************************************ phi_mask = np.logical_and(self.test_doc[2], self.mask) phi = np.float32(self.nkw[:, phi_mask]) / np.float32( self.nk[:, np.newaxis]) samples = util_funcs.gen_obj(phi.shape[1]) table_h = np.zeros(self.K, dtype=np.int32) table_l = np.zeros(self.K, dtype=np.int32) table_p = np.zeros(self.K, dtype=np.float32) batch_map = np.zeros(self.V, dtype=np.int32) util_funcs.gen_batch_map(phi_mask, batch_map, self.V) # *************************************** sampling ************************************************ util_funcs.gen_alias_table(table_h=table_h, table_l=table_l, table_p=table_p, phi=phi / np.sum(phi, 0), batch_mask=phi_mask, w_sample=self.test_doc[3], samples=samples, iter_per_update=iter, MH_max=MH_max) batch_N = sum(len(doc) for doc in self.test_doc[0]) batch_D = len(self.test_doc[0]) w_cnt = phi.shape[1] z = [None for _ in xrange(batch_D)] Adk = np.zeros((batch_D, self.K), dtype=np.int32) Adk_mean = np.zeros(Adk.shape, dtype=np.float32) burn_in = iter // 2 rand_kkk = np.random.randint(self.K, size=batch_N) util_funcs.sample_z_par_alias_per(batch_D, self.test_doc[0], z, w_cnt, self.K, iter, burn_in, self.alpha, self.alpha_bar, self.beta, self.beta_bar, Adk, Adk_mean, batch_map, phi, samples, MH_max, rand_kkk, phi_mask, True) # *************************************** perplexity ************************************************ Adk_mean += self.alpha Adk_mean /= np.sum(Adk_mean, 1)[:, np.newaxis] doc_len = len(self.test_doc[1]) log_avg_probs = 0 for d in xrange(doc_len): for w in self.test_doc[1][d]: if not self.mask[w]: continue log_avg_probs += np.log( np.dot(Adk_mean[d, :], phi[:, batch_map[w]])) num = sum([len(d) for d in self.test_doc[1]]) util_funcs.kill_obj(phi.shape[1], samples) return np.exp(-log_avg_probs / num)
def get_perp_just_in_time(self, iter, MH_max): # *************************************** parameters ************************************************ phi_mask = np.logical_and(self.test_doc[2], self.mask) phi = np.float32(self.nkw[:, phi_mask]) / np.float32(self.nk[:, np.newaxis]) samples = util_funcs.gen_obj(phi.shape[1]) table_h = np.zeros(self.K, dtype=np.int32) table_l = np.zeros(self.K, dtype=np.int32) table_p = np.zeros(self.K, dtype=np.float32) batch_map = np.zeros(self.V, dtype=np.int32) util_funcs.gen_batch_map(phi_mask, batch_map, self.V) # *************************************** sampling ************************************************ util_funcs.gen_alias_table(table_h=table_h, table_l=table_l, table_p=table_p, phi=phi/np.sum(phi, 0), batch_mask=phi_mask, w_sample=self.test_doc[3], samples=samples, iter_per_update=iter, MH_max=MH_max) batch_N = sum(len(doc) for doc in self.test_doc[0]) batch_D = len(self.test_doc[0]) w_cnt = phi.shape[1] z = [None for _ in xrange(batch_D)] Adk = np.zeros((batch_D, self.K), dtype=np.int32) Adk_mean = np.zeros(Adk.shape, dtype=np.float32) burn_in = iter // 2 rand_kkk = np.random.randint(self.K, size=batch_N) util_funcs.sample_z_par_alias_per(batch_D, self.test_doc[0], z, w_cnt, self.K, iter, burn_in, self.alpha, self.alpha_bar, self.beta, self.beta_bar, Adk, Adk_mean, batch_map, phi, samples, MH_max, rand_kkk, phi_mask, True) # *************************************** perplexity ************************************************ Adk_mean += self.alpha Adk_mean /= np.sum(Adk_mean, 1)[:, np.newaxis] doc_len = len(self.test_doc[1]) log_avg_probs = 0 for d in xrange(doc_len): for w in self.test_doc[1][d]: if not self.mask[w]: continue log_avg_probs += np.log(np.dot(Adk_mean[d, :], phi[:, batch_map[w]])) num = sum([len(d) for d in self.test_doc[1]]) util_funcs.kill_obj(phi.shape[1], samples) return np.exp(- log_avg_probs / num)
def get_perp_just_in_time(self, MH_max): """ the form of test_doc is: [ [[w], [..], ...], [[test_w], [..], ..], mask[], map[] ]""" samples, phi = self.gen_alias_table(MH_max, perplexity=True) util_funcs.gen_batch_map(self.test_doc[2], self.batch_map, self.W) Adk_mean = self.sample_counts(self.test_doc[0], phi, len(self.test_doc[0]), self.samples_per_update, samples, phi.shape[1], self.batch_map, self.batch_map, MH_max, perplexity=True); collect() Adk_mean += self.alpha Adk_mean /= np.sum(Adk_mean, 1)[:, np.newaxis] doc_len = len(self.test_doc[1]) log_avg_probs = 0 for d in xrange(doc_len): for w in self.test_doc[1][d]: log_avg_probs += np.log(np.dot(Adk_mean[d, :], phi[:, self.batch_map[w]])) num = sum([len(d) for d in self.test_doc[1]]) util_funcs.kill_obj(phi.shape[1], samples) return np.exp(- log_avg_probs / num)
def get_perp_just_in_time(self, MH_max, theta=None, norm_const=None): """ note: assume the form of test_doc is: [ [[w], [..], ...], [[test_w], [..], ..], mask[], map[] ] """ theta = self.theta if theta is None else theta; norm_const = self.norm_const if norm_const is None else norm_const samples, phi = self.gen_alias_table(MH_max, theta, norm_const, perplexity=True) util_funcs.gen_batch_map(self.test_doc[2], self.batch_map, self.W) Adk_mean = self.sample_counts(self.test_doc[0], phi, len(self.test_doc[0]), self.samples_per_update, samples, phi.shape[1], self.batch_map, self.batch_map, MH_max, perplexity=True); collect() Adk_mean += self.alpha Adk_mean /= np.sum(Adk_mean, 1)[:, np.newaxis] doc_len = len(self.test_doc[1]) log_avg_probs = 0 for d in xrange(doc_len): for w in self.test_doc[1][d]: log_avg_probs += np.log(np.dot(Adk_mean[d, :], phi[:, self.batch_map[w]])) num = sum([len(d) for d in self.test_doc[1]]) util_funcs.kill_obj(phi.shape[1], samples) return np.exp(- log_avg_probs / num)