def setUp(self):
     model = Model(20)
     model.load('../testdata/lda_model')
     vocabulary = Vocabulary()
     vocabulary.load('../testdata/vocabulary.dat')
     self.multi_chain_gibbs_sampler = \
             MultiChainGibbsSampler(model, vocabulary, 10, 10, 5)
Esempio n. 2
0
 def setUp(self):
     model = Model(20)
     model.load('../testdata/lda_model')
     vocabulary = Vocabulary()
     vocabulary.load('../testdata/vocabulary.dat')
     self.sparselda_gibbs_sampler = \
             SparseLDAGibbsSampler(model, vocabulary, 10, 5)
Esempio n. 3
0
class ModelEvaluatorTest(unittest.TestCase):
    def setUp(self):
        self.model = Model(20)
        self.model.load('../testdata/lda_model')
        self.vocabulary = Vocabulary()
        self.vocabulary.load('../testdata/vocabulary.dat')

    def test_compute_loglikelihood(self):
        doc_tokens = [
            'macbook',
            'ipad',  # exist in vocabulary and model
            'mac os x',
            'chrome',  # only exist in vocabulary
            'nokia',
            'null'
        ]  # inexistent
        document = Document(self.model.num_topics)
        rand = random.Random()
        rand.seed(0)
        document.parse_from_tokens(doc_tokens, rand, self.vocabulary,
                                   self.model)
        documents = [document, document]
        self.assertEqual(
            -14.113955684239654,
            model_evaluator.compute_loglikelihood(self.model, self.vocabulary,
                                                  documents))
 def setUp(self):
     model = Model(20)
     model.load('../testdata/lda_model')
     vocabulary = Vocabulary()
     vocabulary.load('../testdata/vocabulary.dat')
     self.multi_chain_gibbs_sampler = \
             MultiChainGibbsSampler(model, vocabulary, 10, 10, 5)
 def setUp(self):
     model = Model(20)
     model.load('../testdata/lda_model')
     vocabulary = Vocabulary()
     vocabulary.load('../testdata/vocabulary.dat')
     self.sparselda_gibbs_sampler = \
             SparseLDAGibbsSampler(model, vocabulary, 10, 5)
Esempio n. 6
0
class TopicWordsStatTest(unittest.TestCase):
    def setUp(self):
        self.model = Model(20)
        self.model.load("../testdata/lda_model")
        self.vocabulary = Vocabulary()
        self.vocabulary.load("../testdata/vocabulary.dat")

        self.topic_words_stat = TopicWordsStat(self.model, self.vocabulary)

    def test_save(self):
        print self.topic_words_stat.save("../testdata/topic_top_words.dat", 0.8)

    def test_get_topic_top_words(self):
        print self.topic_words_stat.get_topic_top_words(0.8)

    def test_compute_topic_word_distribution(self):
        print self.topic_words_stat.compute_topic_word_distribution()
Esempio n. 7
0
def main(args):
    model = Model(0)
    model.load(args.model_dir)
    vocabulary = Vocabulary()
    vocabulary.load(args.vocabulary)
    multi_chain_gibbs_sampler = MultiChainGibbsSampler(model, vocabulary,
            args.num_markov_chains, args.total_iterations,
            args.burn_in_iterations)

    fp = open(args.documents, 'r')
    for doc_str in fp.readlines():
        doc_str = doc_str.decode('gbk')
        doc_tokens = doc_str.strip().split('\t')
        topic_dist = multi_chain_gibbs_sampler.infer_topics(doc_tokens)
        print doc_str
        print topic_dist
    fp.close()
def main(args):
    model = Model(0)
    model.load(args.model_dir)
    vocabulary = Vocabulary()
    vocabulary.load(args.vocabulary)
    multi_chain_gibbs_sampler = MultiChainGibbsSampler(model, vocabulary,
                                                       args.num_markov_chains,
                                                       args.total_iterations,
                                                       args.burn_in_iterations)

    fp = open(args.documents, 'r')
    for doc_str in fp.readlines():
        doc_str = doc_str.decode('gbk')
        doc_tokens = doc_str.strip().split('\t')
        topic_dist = multi_chain_gibbs_sampler.infer_topics(doc_tokens)
        print doc_str
        print topic_dist
    fp.close()
Esempio n. 9
0
class TopicWordsStatTest(unittest.TestCase):
    def setUp(self):
        self.model = Model(20)
        self.model.load('../testdata/lda_model')
        self.vocabulary = Vocabulary()
        self.vocabulary.load('../testdata/vocabulary.dat')

        self.topic_words_stat = TopicWordsStat(self.model, self.vocabulary)

    def test_save(self):
        print self.topic_words_stat.save('../testdata/topic_top_words.dat',
                                         0.8)

    def test_get_topic_top_words(self):
        print self.topic_words_stat.get_topic_top_words(0.8)

    def test_compute_topic_word_distribution(self):
        print self.topic_words_stat.compute_topic_word_distribution()
Esempio n. 10
0
class ModelEvaluatorTest(unittest.TestCase):

    def setUp(self):
        self.model = Model(20)
        self.model.load('../testdata/lda_model')
        self.vocabulary = Vocabulary()
        self.vocabulary.load('../testdata/vocabulary.dat')

    def test_compute_loglikelihood(self):
        doc_tokens = ['macbook', 'ipad',  # exist in vocabulary and model
                'mac os x', 'chrome',  # only exist in vocabulary
                'nokia', 'null']  # inexistent
        document = Document(self.model.num_topics)
        rand = random.Random()
        rand.seed(0)
        document.parse_from_tokens(
                doc_tokens, rand, self.vocabulary, self.model)
        documents = [document, document]
        self.assertEqual(-14.113955684239654,
                model_evaluator.compute_loglikelihood(self.model, self.vocabulary, documents))