Esempio n. 1
0
    def LSAmodel(words, num_topics=5, num_words=5):

        dictionary = corpora.Dictionary(words)
        # Term Document Frequency
        corpus = [dictionary.doc2bow(word) for word in words]
        # save it!
        pickle.dump(corpus, open('corpus.pkl', 'wb'))
        dictionary.save('dictionary.gensim')
        # Train model
        lsimodel = LsiModel(corpus=corpus, num_topics=num_topics, id2word=dictionary)
        # print_topics(num_topics=20, num_words=10)
        topics = lsimodel.print_topics(num_topics=num_topics, num_words=num_words)
        # Validation
        # A measure of how good the model is. lower the better.
        val_perplexity = lsimodel.log_perplexity(corpus)
        # cohherent score
        coherence_lsimodel = CoherenceModel(model=lsimodel, texts=words, dictionary=dictionary, coherence='c_v')
        val_coherence = coherence_lsimodel.get_coherence()

        return topics, val_perplexity, val_coherence