def setUp(self): model = Model(20) model.load('../testdata/lda_model') vocabulary = Vocabulary() vocabulary.load('../testdata/vocabulary.dat') self.multi_chain_gibbs_sampler = \ MultiChainGibbsSampler(model, vocabulary, 10, 10, 5)
class MultiChainGibbsSamplerTest(unittest.TestCase): def setUp(self): model = Model(20) model.load('../testdata/lda_model') vocabulary = Vocabulary() vocabulary.load('../testdata/vocabulary.dat') self.multi_chain_gibbs_sampler = \ MultiChainGibbsSampler(model, vocabulary, 10, 10, 5) def test_infer_topics(self): doc_tokens = [] doc_topic_dist = self.multi_chain_gibbs_sampler.infer_topics(doc_tokens) self.assertEqual(0, len(doc_topic_dist)) doc_tokens = ['apple', 'ipad'] doc_topic_dist = self.multi_chain_gibbs_sampler.infer_topics(doc_tokens) print doc_topic_dist self.assertEqual(5, len(doc_topic_dist)) self.assertTrue(0 in doc_topic_dist) self.assertEqual(0.05, doc_topic_dist[0]) self.assertTrue(1 in doc_topic_dist) self.assertEqual(0.32, doc_topic_dist[1]) self.assertTrue(3 in doc_topic_dist) self.assertEqual(0.14, doc_topic_dist[3]) doc_tokens = ['apple', 'ipad', 'apple', 'null', 'nokia', 'macbook'] doc_topic_dist = self.multi_chain_gibbs_sampler.infer_topics(doc_tokens) print doc_topic_dist self.assertEqual(6, len(doc_topic_dist))
class MultiChainGibbsSamplerTest(unittest.TestCase): def setUp(self): model = Model(20) model.load('../testdata/lda_model') vocabulary = Vocabulary() vocabulary.load('../testdata/vocabulary.dat') self.multi_chain_gibbs_sampler = \ MultiChainGibbsSampler(model, vocabulary, 10, 10, 5) def test_infer_topics(self): doc_tokens = [] doc_topic_dist = self.multi_chain_gibbs_sampler.infer_topics( doc_tokens) self.assertEqual(0, len(doc_topic_dist)) doc_tokens = ['apple', 'ipad'] doc_topic_dist = self.multi_chain_gibbs_sampler.infer_topics( doc_tokens) print doc_topic_dist self.assertEqual(5, len(doc_topic_dist)) self.assertTrue(0 in doc_topic_dist) self.assertEqual(0.05, doc_topic_dist[0]) self.assertTrue(1 in doc_topic_dist) self.assertEqual(0.32, doc_topic_dist[1]) self.assertTrue(3 in doc_topic_dist) self.assertEqual(0.14, doc_topic_dist[3]) doc_tokens = ['apple', 'ipad', 'apple', 'null', 'nokia', 'macbook'] doc_topic_dist = self.multi_chain_gibbs_sampler.infer_topics( doc_tokens) print doc_topic_dist self.assertEqual(6, len(doc_topic_dist))
def setUp(self): model = Model(20) model.load('../testdata/lda_model') vocabulary = Vocabulary() vocabulary.load('../testdata/vocabulary.dat') self.multi_chain_gibbs_sampler = \ MultiChainGibbsSampler(model, vocabulary, 10, 10, 5)