def LSAmodel(words, num_topics=5, num_words=5): dictionary = corpora.Dictionary(words) # Term Document Frequency corpus = [dictionary.doc2bow(word) for word in words] # save it! pickle.dump(corpus, open('corpus.pkl', 'wb')) dictionary.save('dictionary.gensim') # Train model lsimodel = LsiModel(corpus=corpus, num_topics=num_topics, id2word=dictionary) # print_topics(num_topics=20, num_words=10) topics = lsimodel.print_topics(num_topics=num_topics, num_words=num_words) # Validation # A measure of how good the model is. lower the better. val_perplexity = lsimodel.log_perplexity(corpus) # cohherent score coherence_lsimodel = CoherenceModel(model=lsimodel, texts=words, dictionary=dictionary, coherence='c_v') val_coherence = coherence_lsimodel.get_coherence() return topics, val_perplexity, val_coherence