def testCompareCoherenceForModels(self):
        models = [self.ldamodel, self.ldamodel]
        cm = CoherenceModel.for_models(
            models, dictionary=self.dictionary, texts=self.texts, coherence='c_v')
        self.assertIsNotNone(cm._accumulator)

        # Accumulator should have all relevant IDs.
        for model in models:
            cm.model = model
            self.assertIsNotNone(cm._accumulator)

        (coherence_topics1, coherence1), (coherence_topics2, coherence2) = \
            cm.compare_models(models)

        self.assertAlmostEqual(np.mean(coherence_topics1), coherence1, 4)
        self.assertAlmostEqual(np.mean(coherence_topics2), coherence2, 4)
        self.assertAlmostEqual(coherence1, coherence2, places=4)
Example #2
0
    def testCompareCoherenceForModels(self):
        models = [self.ldamodel, self.ldamodel]
        cm = CoherenceModel.for_models(
            models, dictionary=self.dictionary, texts=self.texts, coherence='c_v')
        self.assertIsNotNone(cm._accumulator)

        # Accumulator should have all relevant IDs.
        for model in models:
            cm.model = model
            self.assertIsNotNone(cm._accumulator)

        (coherence_topics1, coherence1), (coherence_topics2, coherence2) = \
            cm.compare_models(models)

        self.assertAlmostEqual(np.mean(coherence_topics1), coherence1, 4)
        self.assertAlmostEqual(np.mean(coherence_topics2), coherence2, 4)
        self.assertAlmostEqual(coherence1, coherence2, places=4)
#%%Topic Modeling
from sklearn.decomposition import TruncatedSVD
from gensim.models.coherencemodel import CoherenceModel
from nltk.tokenize import sent_tokenize, word_tokenize
# SVD represent documents and terms in vectors
svd_model = TruncatedSVD(n_components=20,
                         algorithm='randomized',
                         n_iter=100,
                         random_state=122)

svd_model.fit(X)

len(svd_model.components_)

text = sent_tokenize(''.join(list(data['processed_data'].values)))

coherencemodel = CoherenceModel(model=svd_model, texts=text)

#%%
from gensim.test.utils import common_corpus, common_dictionary
from gensim.models.ldamodel import LdaModel
from gensim.models.coherencemodel import CoherenceModel

m1 = LdaModel(common_corpus, 3, common_dictionary)
m2 = LdaModel(common_corpus, 5, common_dictionary)

cm = CoherenceModel.for_models([m1, m2],
                               common_dictionary,
                               corpus=common_corpus,
                               coherence='u_mass')