Ejemplo n.º 1
0
 def gen_alias_table(self, MH_max, perplexity=False):
     # here the phi is [:,4w]
     if perplexity:
         phi = self.theta[:, self.test_doc[2]] / self.norm_const
         # samples has shape (w, 1e3 * I)
         samples = util_funcs.gen_obj(phi.shape[1])
         util_funcs.gen_alias_table(table_h=self.table_h, table_l=self.table_l, table_p=self.table_p,
                                    phi=phi / np.sum(phi, 0), batch_mask=self.test_doc[2], w_sample=self.test_doc[3],
                                    samples=samples, iter_per_update=self.samples_per_update, MH_max=MH_max)
     else:
         cul_time = time.time()
         tmp = self.theta[:, self.current_set[2]]
         self.time_bak += time.time() - cul_time - 1.5 * self.apprx * (tmp.shape[0] * tmp.shape[1]) / 1e9
         phi = tmp / self.norm_const; tmp = None; collect()
         if self.w4_cnt is not None:
             util_funcs.kill_obj(self.w4_cnt, self.samples)
         self.w4_cnt = phi.shape[1]
         # samples has shape (w, 1e3 * I)
         samples = util_funcs.gen_obj(phi.shape[1])
         util_funcs.gen_batch_map(self.current_set[2], self.batch_map_4w, self.W)
         util_funcs.gen_alias_table(table_h=self.table_h, table_l=self.table_l, table_p=self.table_p,
                                    phi=phi / np.sum(phi, 0), batch_mask=self.current_set[2],
                                    w_sample=self.current_set[1], samples=samples,
                                    iter_per_update=self.samples_per_update, MH_max=MH_max)
     return samples, phi
Ejemplo n.º 2
0
 def gen_alias_table(self, MH_max, theta, norm_const, perplexity=False):
     """
         generate alias table for fast per-token sampling
     """
     # here the phi is [:,4w]
     if perplexity:
         phi = theta[:, self.test_doc[2]] / norm_const
         # samples has shape (w, 1e3 * I)
         samples = util_funcs.gen_obj(phi.shape[1])
         util_funcs.gen_alias_table(table_h=self.table_h, table_l=self.table_l, table_p=self.table_p,
                                    phi=phi / np.sum(phi, 0), batch_mask=self.test_doc[2], w_sample=self.test_doc[3],
                                    samples=samples, iter_per_update=self.samples_per_update, MH_max=MH_max)
     else:
         cul_time = time.time()
         tmp = theta[:, self.current_set[2]]
         self.time_bak += time.time() - cul_time - 1.5 * self.apprx * (tmp.shape[0] * tmp.shape[1]) / 1e9
         phi = tmp / norm_const; tmp = None; collect()
         if self.w4_cnt is not None:
             util_funcs.kill_obj(self.w4_cnt, self.samples)
         self.w4_cnt = phi.shape[1]
         # samples has shape (w, 1e3 * I)
         samples = util_funcs.gen_obj(phi.shape[1])
         util_funcs.gen_batch_map(self.current_set[2], self.batch_map_4w, self.W)
         util_funcs.gen_alias_table(table_h=self.table_h, table_l=self.table_l, table_p=self.table_p,
                                    phi=phi / np.sum(phi, 0), batch_mask=self.current_set[2],
                                    w_sample=self.current_set[1], samples=samples,
                                    iter_per_update=self.samples_per_update, MH_max=MH_max)
     return samples, phi
Ejemplo n.º 3
0
    def get_perp_just_in_time(self, iter, MH_max):
        # *************************************** parameters ************************************************
        phi_mask = np.logical_and(self.test_doc[2], self.mask)
        phi = np.float32(self.nkw[:, phi_mask]) / np.float32(
            self.nk[:, np.newaxis])

        samples = util_funcs.gen_obj(phi.shape[1])
        table_h = np.zeros(self.K, dtype=np.int32)
        table_l = np.zeros(self.K, dtype=np.int32)
        table_p = np.zeros(self.K, dtype=np.float32)
        batch_map = np.zeros(self.V, dtype=np.int32)
        util_funcs.gen_batch_map(phi_mask, batch_map, self.V)

        # *************************************** sampling ************************************************
        util_funcs.gen_alias_table(table_h=table_h,
                                   table_l=table_l,
                                   table_p=table_p,
                                   phi=phi / np.sum(phi, 0),
                                   batch_mask=phi_mask,
                                   w_sample=self.test_doc[3],
                                   samples=samples,
                                   iter_per_update=iter,
                                   MH_max=MH_max)

        batch_N = sum(len(doc) for doc in self.test_doc[0])
        batch_D = len(self.test_doc[0])
        w_cnt = phi.shape[1]
        z = [None for _ in xrange(batch_D)]
        Adk = np.zeros((batch_D, self.K), dtype=np.int32)
        Adk_mean = np.zeros(Adk.shape, dtype=np.float32)
        burn_in = iter // 2
        rand_kkk = np.random.randint(self.K, size=batch_N)

        util_funcs.sample_z_par_alias_per(batch_D, self.test_doc[0], z, w_cnt,
                                          self.K, iter, burn_in, self.alpha,
                                          self.alpha_bar, self.beta,
                                          self.beta_bar, Adk, Adk_mean,
                                          batch_map, phi, samples, MH_max,
                                          rand_kkk, phi_mask, True)
        # *************************************** perplexity ************************************************
        Adk_mean += self.alpha
        Adk_mean /= np.sum(Adk_mean, 1)[:, np.newaxis]

        doc_len = len(self.test_doc[1])
        log_avg_probs = 0

        for d in xrange(doc_len):
            for w in self.test_doc[1][d]:
                if not self.mask[w]:
                    continue
                log_avg_probs += np.log(
                    np.dot(Adk_mean[d, :], phi[:, batch_map[w]]))

        num = sum([len(d) for d in self.test_doc[1]])
        util_funcs.kill_obj(phi.shape[1], samples)
        return np.exp(-log_avg_probs / num)
Ejemplo n.º 4
0
    def update(self, MH_max, LWsampler=False, g_theta=None, rec=None):
        train_cts, phi = self.next_batch(MH_max, shift_dir=~LWsampler); collect()

        batch_mask = self.current_set[4][self.batch_loc[1]]
        if LWsampler: rec[:] = rec + batch_mask
        util_funcs.gen_batch_map(batch_mask, self.batch_map, self.W)

        cul_time = time.time()
        batch_theta = self.theta[:, batch_mask]
        self.time_bak += time.time() - cul_time - 1.5 * self.apprx * (batch_theta.shape[0]*batch_theta.shape[1]) / 1e9

        # change phi into [:,w]
        if self.batch_loc[1] != 0:
            phi = batch_theta / self.norm_const
        else:
            small_mask = np.zeros(phi.shape[1], dtype=bool)
            mask_cnt = 0
            for i in xrange(batch_mask.shape[0]):
                if self.current_set[2][i]:
                    small_mask[mask_cnt] = batch_mask[i]
                    mask_cnt += 1
            phi = phi[:, small_mask]

        w_cnt = batch_theta.shape[1]

        Adk_mean, nkw_avg = self.sample_counts(train_cts, phi, self.batch_size, self.samples_per_update, self.samples,
                                               w_cnt, self.batch_map, self.batch_map_4w, MH_max, Adk=self.ndk,
                                               Adk_mean=self.ndk_avg); collect()
        # ******************************* update theta *********************************************
        (a, b, c) = self.step_size_params
        eps_t = (a + self.update_ct / b) ** (-c)

        grad = self.beta - batch_theta + (self.D / self.batch_size) * (
            nkw_avg - np.sum(Adk_mean, 0)[:, np.newaxis] * phi); collect()
        if LWsampler:
            cul_time = time.time()
            g_theta_batch = g_theta[:, batch_mask]; collect()
            self.time_bak += time.time() - cul_time - 1.5 * self.apprx * (g_theta_batch.shape[0]*g_theta_batch.shape[1]) / 1e9
            grad += - 2 * (batch_theta - g_theta_batch) / self.H ** 2; collect()
        stale = np.sum(batch_theta, 1)[:, np.newaxis]
        batch_theta[:, :] = np.abs(batch_theta + eps_t * grad +
                                  np.random.randn(batch_theta.shape[0], batch_theta.shape[1])
                                  * (2 * eps_t) ** .5 * batch_theta ** .5); collect()
        self.norm_const += np.sum(batch_theta, 1)[:, np.newaxis] - stale

        cul_time = time.time()
        self.theta[:, batch_mask] = batch_theta
        self.time_bak += time.time() - cul_time - 1.5 * self.apprx * (batch_theta.shape[0]*batch_theta.shape[1]) / 1e9

        self.update_ct += 1
Ejemplo n.º 5
0
    def get_perp_just_in_time(self, iter, MH_max):
        # *************************************** parameters ************************************************
        phi_mask = np.logical_and(self.test_doc[2], self.mask)
        phi = np.float32(self.nkw[:, phi_mask]) / np.float32(self.nk[:, np.newaxis])

        samples = util_funcs.gen_obj(phi.shape[1])
        table_h = np.zeros(self.K, dtype=np.int32)
        table_l = np.zeros(self.K, dtype=np.int32)
        table_p = np.zeros(self.K, dtype=np.float32)
        batch_map = np.zeros(self.V, dtype=np.int32)
        util_funcs.gen_batch_map(phi_mask, batch_map, self.V)

        # *************************************** sampling ************************************************
        util_funcs.gen_alias_table(table_h=table_h, table_l=table_l, table_p=table_p, phi=phi/np.sum(phi, 0),
                                   batch_mask=phi_mask, w_sample=self.test_doc[3],
                                   samples=samples, iter_per_update=iter, MH_max=MH_max)

        batch_N = sum(len(doc) for doc in self.test_doc[0])
        batch_D = len(self.test_doc[0])
        w_cnt = phi.shape[1]
        z = [None for _ in xrange(batch_D)]
        Adk = np.zeros((batch_D, self.K), dtype=np.int32)
        Adk_mean = np.zeros(Adk.shape, dtype=np.float32)
        burn_in = iter // 2
        rand_kkk = np.random.randint(self.K, size=batch_N)

        util_funcs.sample_z_par_alias_per(batch_D, self.test_doc[0], z, w_cnt, self.K, iter, burn_in, self.alpha,
                                          self.alpha_bar, self.beta, self.beta_bar, Adk, Adk_mean, batch_map, phi,
                                          samples, MH_max, rand_kkk, phi_mask, True)
        # *************************************** perplexity ************************************************
        Adk_mean += self.alpha
        Adk_mean /= np.sum(Adk_mean, 1)[:, np.newaxis]

        doc_len = len(self.test_doc[1])
        log_avg_probs = 0

        for d in xrange(doc_len):
            for w in self.test_doc[1][d]:
                if not self.mask[w]:
                    continue
                log_avg_probs += np.log(np.dot(Adk_mean[d, :], phi[:, batch_map[w]]))

        num = sum([len(d) for d in self.test_doc[1]])
        util_funcs.kill_obj(phi.shape[1], samples)
        return np.exp(- log_avg_probs / num)
Ejemplo n.º 6
0
    def get_perp_just_in_time(self, MH_max):
        """ the form of test_doc is: [ [[w], [..], ...], [[test_w], [..], ..], mask[], map[] ]"""
        samples, phi = self.gen_alias_table(MH_max, perplexity=True)
        util_funcs.gen_batch_map(self.test_doc[2], self.batch_map, self.W)
        Adk_mean = self.sample_counts(self.test_doc[0], phi, len(self.test_doc[0]), self.samples_per_update, samples,
                                      phi.shape[1], self.batch_map, self.batch_map, MH_max, perplexity=True); collect()
        Adk_mean += self.alpha
        Adk_mean /= np.sum(Adk_mean, 1)[:, np.newaxis]

        doc_len = len(self.test_doc[1])
        log_avg_probs = 0

        for d in xrange(doc_len):
            for w in self.test_doc[1][d]:
                log_avg_probs += np.log(np.dot(Adk_mean[d, :], phi[:, self.batch_map[w]]))

        num = sum([len(d) for d in self.test_doc[1]])
        util_funcs.kill_obj(phi.shape[1], samples)
        return np.exp(- log_avg_probs / num)
Ejemplo n.º 7
0
    def get_perp_just_in_time(self, MH_max, theta=None, norm_const=None):
        """
            note:
                assume the form of test_doc is: [ [[w], [..], ...], [[test_w], [..], ..], mask[], map[] ]
        """
        theta = self.theta if theta is None else theta; norm_const = self.norm_const if norm_const is None else norm_const
        samples, phi = self.gen_alias_table(MH_max, theta, norm_const, perplexity=True)
        util_funcs.gen_batch_map(self.test_doc[2], self.batch_map, self.W)
        Adk_mean = self.sample_counts(self.test_doc[0], phi, len(self.test_doc[0]), self.samples_per_update, samples,
                                      phi.shape[1], self.batch_map, self.batch_map, MH_max, perplexity=True); collect()
        Adk_mean += self.alpha
        Adk_mean /= np.sum(Adk_mean, 1)[:, np.newaxis]

        doc_len = len(self.test_doc[1])
        log_avg_probs = 0

        for d in xrange(doc_len):
            for w in self.test_doc[1][d]:
                log_avg_probs += np.log(np.dot(Adk_mean[d, :], phi[:, self.batch_map[w]]))

        num = sum([len(d) for d in self.test_doc[1]])
        util_funcs.kill_obj(phi.shape[1], samples)
        return np.exp(- log_avg_probs / num)
Ejemplo n.º 8
0
    def update(self, MH_max, LWsampler=False, g_theta=None, rec=None):
        """
            update the model with a minibatch

            input:
                MH_max: MH steps for stale alias table correction (please refer to AliasLDA and LightLDA paper for detail)
                LWsampler: are we running in distributed mode or not
                g_theta: the global theta (refer to as theta in my paper)
                rec: the mask indicates which columns are modified, which is used in distributed mode (for 10708 prj:
                as I said it's a tirck for loading part of the mat, you can pretend that you are loading the whole mat
                and ignore the details)
        """

        train_cts, phi = self.next_batch(MH_max, shift_dir=~LWsampler); collect()

        batch_mask = self.current_set[4][self.batch_loc[1]]
        if LWsampler: rec[:] = rec + batch_mask
        # generate the map from binary vec to index vec (ignore for 10708 prj)
        util_funcs.gen_batch_map(batch_mask, self.batch_map, self.W)

        cul_time = time.time()
        batch_theta = self.theta[:, batch_mask]
        self.time_bak += time.time() - cul_time - 1.5 * self.apprx * (batch_theta.shape[0]*batch_theta.shape[1]) / 1e9

        # change phi into [:,w], where w is the number of words used in minibatch, which is usually w << W
        if self.batch_loc[1] != 0:
            phi = batch_theta / self.norm_const
        else:
            small_mask = np.zeros(phi.shape[1], dtype=bool)
            mask_cnt = 0
            for i in xrange(batch_mask.shape[0]):
                if self.current_set[2][i]:
                    small_mask[mask_cnt] = batch_mask[i]
                    mask_cnt += 1
            phi = phi[:, small_mask]

        w_cnt = batch_theta.shape[1]

        # compute the expectation term in SGLD
        Adk_mean, nkw_avg = self.sample_counts(train_cts, phi, self.batch_size, self.samples_per_update, self.samples,
                                               w_cnt, self.batch_map, self.batch_map_4w, MH_max, Adk=self.ndk,
                                               Adk_mean=self.ndk_avg)

        # ******************************* gradient decent on theta (T in paper) ***************************************
        (a, b, c) = self.step_size_params
        eps_t = (a + self.update_ct / b) ** (-c)

        grad = self.beta - batch_theta + (self.D / self.batch_size) * (
            nkw_avg - np.sum(Adk_mean, 0)[:, np.newaxis] * phi)

        # if in distributed mode then we add the gradient from the weierstrass kernel
        if LWsampler:
            cul_time = time.time()
            g_theta_batch = g_theta[:, batch_mask]; collect()
            self.time_bak += time.time() - cul_time - 1.5 * self.apprx * (self.K*w_cnt) / 1e9
            grad += - 2 * (batch_theta - g_theta_batch) / self.H ** 2

        stale = np.sum(batch_theta, 1)[:, np.newaxis]
        batch_theta[:, :] = np.abs(batch_theta + eps_t * grad + np.random.randn(self.K, w_cnt)*(2*eps_t)**.5 * batch_theta**.5)
        self.norm_const += np.sum(batch_theta, 1)[:, np.newaxis] - stale

        cul_time = time.time()
        self.theta[:, batch_mask] = batch_theta
        self.time_bak += time.time() - cul_time - 1.5 * self.apprx * (self.K*w_cnt) / 1e9

        self.update_ct += 1