def setUp(self): num_topics = 3 vocab_size = 14 alpha = np.array([0.1, 0.2, 0.3]) eta = np.array(range(1, vocab_size+1)) / 100.0 self.model = lda.LdaModel(corpus, num_topics, alpha, eta, 0, 0) self.model.stats = stats
def main(): # Create an image with one row per iteration to visualize results topic_img = np.zeros((NUM_ITER + 2, NUM_TOPICS * VOCAB_SIZE)) print 'Generating corpus' corpus, beta = generate_corpus() # Add real topics to the image add_to_img(topic_img, NUM_ITER + 1, beta, beta) alpha_guess = 0.5 eta_guess = 0.5 print 'Initializing model' model = lda.LdaModel(corpus, NUM_TOPICS, alpha_guess, eta_guess, GIBBS_BURN, GIBBS_LAG) add_to_img(topic_img, 0, beta, model.beta()) # Do E-M iterations last_lik = model.log_likelihood_wz() likelihoods = [(0, last_lik)] for i in range(NUM_ITER): print 'Iteration %d' % i model.e_step() lik = model.log_likelihood_wz() print ' E-step ML: %f (%f)' % (lik, lik - last_lik) last_lik = lik model._m_alpha() lik = model.log_likelihood_wz() print ' M-step(alpha) ML: %f (%f)' % (lik, lik - last_lik) last_lik = lik model._m_eta() add_to_img(topic_img, i + 1, beta, model.beta()) lik = model.log_likelihood_wz() print ' M-step(eta) ML: %f (%f)' % (lik, lik - last_lik) last_lik = lik likelihoods.append((i + 1, lik)) save_likelihoods(likelihoods) save_topics(topic_img)
def main(): print 'Generating corpus' corpus = generate_corpus() print 'Initializing Model' model = lda.LdaModel(corpus, num_topics, alpha, 0, 0) print 'Sampling' for i in range(ITERATIONS): model._gibbs_sample(model.stats) print 'Iteration %d complete' % (i+1) save_beta(model.beta(), i)
def setUp(self): num_topics = 3 vocab_size = 14 self.model = lda.LdaModel(corpus, num_topics, stub_alpha, stub_eta) self.model.stats = stats
def test_init_default(self): model = lda.LdaModel(corpus, self.num_topics) alpha = np.ones(self.num_topics) * 0.1 eta = np.ones(self.vocab_size) * 0.1 nptest.assert_array_equal(model.alpha, alpha) nptest.assert_array_equal(model.eta, eta)
def test_init_vector(self): alpha = np.ones(self.num_topics) * 0.2 eta = np.ones(self.vocab_size) * 0.3 model = lda.LdaModel(corpus, self.num_topics, alpha, eta) nptest.assert_array_equal(model.alpha, alpha) nptest.assert_array_equal(model.eta, eta)
# -*- coding:utf-8 -*- import lda import simplejson as sj import os import time ISOTIMEFORMAT = '%Y-%m-%d %X' print 'start at:' + time.strftime(ISOTIMEFORMAT, time.localtime()) p = lda.LdaModel() p.readFile('YOUR DATA DIRECTION') print 'file reading finish...' p.DocumentInitiation() print 'initiation finish...' p.lda_inference() print 'lda inference...' print 'end at:' + time.strftime(ISOTIMEFORMAT, time.localtime())