# In[44]: #looking at the output of our LDA topic model topics_with_wts = [item[0] for item in topics_coherences] print('LDA Topics with Weights') print('=' * 50) for idx, topic in enumerate(topics_with_wts): print('Topic #' + str(idx + 1) + ':') print([(term, round(wt, 3)) for wt, term in topic]) print() # In[45]: model = LdaModel(corpus=corpus_tfidf, id2word=dictionary, num_topics=10) for idx, topic in model.print_topics(): print('Topic: ({}) word: {}'.format(idx, topic)) # In[46]: #viewing the topics as a list of terms without the weights print('LDA Topics without Weights') print('=' * 50) for idx, topic in enumerate(topics_with_wts): print('Topic #' + str(idx + 1) + ':') print([term for wt, term in topic]) print() # In[52]: LDA_viz = pyLDAvis.gensim.prepare(lda_model, corpus_tfidf, dictionary)
corpus_tfidf = tfidf[bow_corpus] from pprint import pprint for doc in corpus_tfidf: pprint(doc) break ######################### # LDA with BAG OF WORDS ######################### # train the LDA model lda_model = gensim.models.LdaMulticore(bow_corpus, num_topics = 10, id2word = dictionary, passes = 2, workers = 2) # for each topic - explore the words occuring in that topic and their relative weight for idx, topic in lda_model.print_topics(-1): print('Topic {}, \nWords: {}'.format(idx, topic)) word_topic = np.array(lda_model.components_) word_topic = word_topic.transpose() num_topics = 10 num_top_words = 10 vocab_array = np.asarray(vocab) #fontsize_base = 70 / np.max(word_topic) # font size for word with largest share in corpus fontsize_base = 10 for t in range(num_topics): plt.subplot(1, num_topics, t + 1) # plot numbering starts with 1