def train(self, n_iterations=100, verbose=1, **kwargs): """ Takes an optional argument, `n_iterations` and updates the model `n_iterations` times. :param n_iterations: Number of iterations. Default is 100. :type n_iterations: int, optional :param verbose: If 1, current number of iterations are printed out to notify the user. Default is 1. :type verbose: int, optional :param kwargs: For compatability with calls to LdaCgsMulti. :type kwargs: optional """ random_state = np.random.RandomState(self.seed) random_state.set_state(self._mtrand_state) if verbose > 0: print ('Begin LDA training for {0} iterations'\ .format(n_iterations)) start = time.time() t = start # Training loop stop = self.iteration + n_iterations pbar = ProgressBar(widgets=[Percentage(), Bar()], maxval=n_iterations).start() #print("Stop ", stop) for itr in xrange(self.iteration , stop): results = cgs_update(self.iteration, self.corpus, self.word_top, self.inv_top_sums, self.top_doc, self.Z, self.indices, self._mtrand_state[0], self._mtrand_state[1], self._mtrand_state[2], self._mtrand_state[3], self._mtrand_state[4]) lp = results[4] self.log_probs.append((self.iteration, lp)) if verbose == 2: itr_time = np.around(time.time()-t, decimals=1) t = time.time() if verbose > 1 or itr==stop-1: print ('\nIteration {0} complete: log_prob={1}, time={2}' .format(self.iteration, lp, itr_time)) if verbose == 1: #print("Self iteration", self.iteration) pbar.update(self.iteration - (stop - n_iterations)) time.sleep(0.01) self.iteration += 1 self._mtrand_state = results[5:] pbar.finish(); if verbose > 1: print '-'*60, ('\n\nWalltime per iteration: {0} seconds' .format(np.around((t-start)/n_iterations, decimals=2)))
def update((docs, doc_indices, mtrand_state)): """ For LdaCgsMulti """ start, stop = docs[0][0], docs[-1][1] corpus = np.frombuffer(_corpus, dtype='i')[start:stop] Z = np.frombuffer(_Z, dtype='i')[start:stop].copy() gbl_word_top = np.frombuffer(_word_top, dtype='d') gbl_word_top = gbl_word_top.reshape(_V.value, _K.value) loc_word_top = gbl_word_top.copy() inv_top_sums = np.frombuffer(_inv_top_sums, dtype='d').copy() top_doc = np.frombuffer(_top_doc, dtype='d') top_doc = top_doc.reshape(_K.value, top_doc.size/_K.value) top_doc = top_doc[:, doc_indices[0]:doc_indices[1]].copy() log_p = 0 log_wk = np.log(gbl_word_top * inv_top_sums[np.newaxis, :]) log_kc = np.log(top_doc / top_doc.sum(0)[np.newaxis, :]) indices = np.array([(j - start) for (i,j) in docs], dtype='i') results = cgs_update(_iteration.value, corpus, loc_word_top, inv_top_sums, top_doc, Z, indices, mtrand_state[0], mtrand_state[1], mtrand_state[2], mtrand_state[3], mtrand_state[4]) (loc_word_top, inv_top_sums, top_doc, Z, log_p, mtrand_str, mtrand_keys, mtrand_pos, mtrand_has_gauss, mtrand_cached_gaussian) = results loc_word_top -= gbl_word_top return (Z, top_doc, loc_word_top, log_p, mtrand_str, mtrand_keys, mtrand_pos, mtrand_has_gauss, mtrand_cached_gaussian)