Example #1
0
# In[44]:

#looking at the output of our LDA topic model
topics_with_wts = [item[0] for item in topics_coherences]
print('LDA Topics with Weights')
print('=' * 50)
for idx, topic in enumerate(topics_with_wts):
    print('Topic #' + str(idx + 1) + ':')
    print([(term, round(wt, 3)) for wt, term in topic])
    print()

# In[45]:

model = LdaModel(corpus=corpus_tfidf, id2word=dictionary, num_topics=10)
for idx, topic in model.print_topics():
    print('Topic: ({}) word: {}'.format(idx, topic))

# In[46]:

#viewing the topics as a list of terms without the weights
print('LDA Topics without Weights')
print('=' * 50)
for idx, topic in enumerate(topics_with_wts):
    print('Topic #' + str(idx + 1) + ':')
    print([term for wt, term in topic])
    print()

# In[52]:

LDA_viz = pyLDAvis.gensim.prepare(lda_model, corpus_tfidf, dictionary)
corpus_tfidf = tfidf[bow_corpus]

from pprint import pprint
for doc in corpus_tfidf:
    pprint(doc)
    break

#########################
# LDA with BAG OF WORDS
#########################
    
# train the LDA model 
lda_model = gensim.models.LdaMulticore(bow_corpus, num_topics = 10, id2word = dictionary, passes = 2, workers = 2)

# for each topic - explore the words occuring in that topic and their relative weight
for idx, topic in lda_model.print_topics(-1):
    print('Topic {}, \nWords: {}'.format(idx, topic))



word_topic = np.array(lda_model.components_)
word_topic = word_topic.transpose()
num_topics = 10
num_top_words = 10
vocab_array = np.asarray(vocab)

#fontsize_base = 70 / np.max(word_topic) # font size for word with largest share in corpus
fontsize_base = 10

for t in range(num_topics):
    plt.subplot(1, num_topics, t + 1)  # plot numbering starts with 1