Beispiel #1
0
 def testPersistenceAfterProbabilityEstimationUsingTexts(self):
     fname = testfile()
     model = CoherenceModel(
         topics=self.topics1, texts=self.texts, dictionary=self.dictionary, coherence='c_v')
     model.estimate_probabilities()
     model.save(fname)
     model2 = CoherenceModel.load(fname)
     self.assertIsNotNone(model2._accumulator)
     self.assertTrue(model.get_coherence() == model2.get_coherence())
Beispiel #2
0
 def testAccumulatorCachingTopicSubsets(self):
     kwargs = dict(corpus=self.corpus, dictionary=self.dictionary, coherence='u_mass')
     cm1 = CoherenceModel(topics=self.topics1, **kwargs)
     cm1.estimate_probabilities()
     accumulator = cm1._accumulator
     self.assertIsNotNone(accumulator)
     cm1.topics = [t[:2] for t in self.topics1]
     self.assertEqual(accumulator, cm1._accumulator)
     cm1.topics = self.topics1
     self.assertEqual(accumulator, cm1._accumulator)
 def testAccumulatorCachingTopicSubsets(self):
     kwargs = dict(corpus=self.corpus, dictionary=self.dictionary, coherence='u_mass')
     cm1 = CoherenceModel(topics=self.topics1, **kwargs)
     cm1.estimate_probabilities()
     accumulator = cm1._accumulator
     self.assertIsNotNone(accumulator)
     cm1.topics = [t[:2] for t in self.topics1]
     self.assertEqual(accumulator, cm1._accumulator)
     cm1.topics = self.topics1
     self.assertEqual(accumulator, cm1._accumulator)
 def testPersistenceAfterProbabilityEstimationUsingCorpus(self):
     fname = get_tmpfile('gensim_similarities.tst.pkl')
     model = CoherenceModel(
         topics=self.topics1, corpus=self.corpus, dictionary=self.dictionary, coherence='u_mass'
     )
     model.estimate_probabilities()
     model.save(fname)
     model2 = CoherenceModel.load(fname)
     self.assertIsNotNone(model2._accumulator)
     self.assertTrue(model.get_coherence() == model2.get_coherence())
 def testPersistenceAfterProbabilityEstimationUsingTexts(self):
     fname = testfile()
     model = CoherenceModel(
         topics=self.topics1, texts=self.texts, dictionary=self.dictionary, coherence='c_v'
     )
     model.estimate_probabilities()
     model.save(fname)
     model2 = CoherenceModel.load(fname)
     self.assertIsNotNone(model2._accumulator)
     self.assertTrue(model.get_coherence() == model2.get_coherence())
Beispiel #6
0
 def testPersistenceAfterProbabilityEstimationUsingCorpus(self):
     fname = get_tmpfile('gensim_similarities.tst.pkl')
     model = CoherenceModel(
         topics=self.topics1, corpus=self.corpus, dictionary=self.dictionary, coherence='u_mass'
     )
     model.estimate_probabilities()
     model.save(fname)
     model2 = CoherenceModel.load(fname)
     self.assertIsNotNone(model2._accumulator)
     self.assertTrue(model.get_coherence() == model2.get_coherence())
Beispiel #7
0
 def testAccumulatorCachingWithModelSetting(self):
     kwargs = dict(corpus=self.corpus, dictionary=self.dictionary, coherence='u_mass')
     cm1 = CoherenceModel(topics=self.topics1, **kwargs)
     cm1.estimate_probabilities()
     self.assertIsNotNone(cm1._accumulator)
     cm1.model = self.ldamodel
     topics = []
     for topic in self.ldamodel.state.get_lambda():
         bestn = argsort(topic, topn=cm1.topn, reverse=True)
         topics.append(bestn)
     self.assertTrue(np.array_equal(topics, cm1.topics))
     self.assertIsNone(cm1._accumulator)
 def testAccumulatorCachingWithModelSetting(self):
     kwargs = dict(corpus=self.corpus, dictionary=self.dictionary, coherence='u_mass')
     cm1 = CoherenceModel(topics=self.topics1, **kwargs)
     cm1.estimate_probabilities()
     self.assertIsNotNone(cm1._accumulator)
     cm1.model = self.ldamodel
     topics = []
     for topic in self.ldamodel.state.get_lambda():
         bestn = argsort(topic, topn=cm1.topn, reverse=True)
         topics.append(bestn)
     self.assertTrue(np.array_equal(topics, cm1.topics))
     self.assertIsNone(cm1._accumulator)
Beispiel #9
0
    def testAccumulatorCachingWithTopnSettingGivenModel(self):
        kwargs = dict(corpus=self.corpus, dictionary=self.dictionary, topn=5, coherence='u_mass')
        cm1 = CoherenceModel(model=self.ldamodel, **kwargs)
        cm1.estimate_probabilities()
        self.assertIsNotNone(cm1._accumulator)

        accumulator = cm1._accumulator
        topics_before = cm1._topics
        cm1.topn = 3
        self.assertEqual(accumulator, cm1._accumulator)
        self.assertEqual(3, len(cm1.topics[0]))
        self.assertEqual(topics_before, cm1._topics)

        cm1.topn = 6  # should be able to expand given the model
        self.assertEqual(6, len(cm1.topics[0]))
    def testAccumulatorCachingWithTopnSettingGivenModel(self):
        kwargs = dict(corpus=self.corpus, dictionary=self.dictionary, topn=5, coherence='u_mass')
        cm1 = CoherenceModel(model=self.ldamodel, **kwargs)
        cm1.estimate_probabilities()
        self.assertIsNotNone(cm1._accumulator)

        accumulator = cm1._accumulator
        topics_before = cm1._topics
        cm1.topn = 3
        self.assertEqual(accumulator, cm1._accumulator)
        self.assertEqual(3, len(cm1.topics[0]))
        self.assertEqual(topics_before, cm1._topics)

        cm1.topn = 6  # should be able to expand given the model
        self.assertEqual(6, len(cm1.topics[0]))
Beispiel #11
0
    def testAccumulatorCachingWithTopnSettingGivenTopics(self):
        kwargs = dict(corpus=self.corpus, dictionary=self.dictionary, topn=5, coherence='u_mass')
        cm1 = CoherenceModel(topics=self.topics1, **kwargs)
        cm1.estimate_probabilities()
        self.assertIsNotNone(cm1._accumulator)

        accumulator = cm1._accumulator
        topics_before = cm1._topics
        cm1.topn = 3
        self.assertEqual(accumulator, cm1._accumulator)
        self.assertEqual(3, len(cm1.topics[0]))
        self.assertEqual(topics_before, cm1._topics)

        # Topics should not have been truncated, so topn settings below 5 should work
        cm1.topn = 4
        self.assertEqual(accumulator, cm1._accumulator)
        self.assertEqual(4, len(cm1.topics[0]))
        self.assertEqual(topics_before, cm1._topics)

        with self.assertRaises(ValueError):
            cm1.topn = 6  # can't expand topics any further without model
    def testAccumulatorCachingWithTopnSettingGivenTopics(self):
        kwargs = dict(corpus=self.corpus, dictionary=self.dictionary, topn=5, coherence='u_mass')
        cm1 = CoherenceModel(topics=self.topics1, **kwargs)
        cm1.estimate_probabilities()
        self.assertIsNotNone(cm1._accumulator)

        accumulator = cm1._accumulator
        topics_before = cm1._topics
        cm1.topn = 3
        self.assertEqual(accumulator, cm1._accumulator)
        self.assertEqual(3, len(cm1.topics[0]))
        self.assertEqual(topics_before, cm1._topics)

        # Topics should not have been truncated, so topn settings below 5 should work
        cm1.topn = 4
        self.assertEqual(accumulator, cm1._accumulator)
        self.assertEqual(4, len(cm1.topics[0]))
        self.assertEqual(topics_before, cm1._topics)

        with self.assertRaises(ValueError):
            cm1.topn = 6  # can't expand topics any further without model