# Estimate the optimal number of topics viz = Visualization(topic_model) viz.plot_greene_metric(min_num_topics=10, max_num_topics=30, tao=10, step=1, top_n_words=10) viz.plot_arun_metric(min_num_topics=5, max_num_topics=30, iterations=10) viz.plot_consensus_metric(min_num_topics=5, max_num_topics=30, iterations=10) # Infer topics print 'Inferring topics...' topic_model.infer_topics(num_topics=15) # Save model on disk utils.save_topic_model(topic_model, 'output/NMF_15topics.pickle') # Print results print '\nTopics:' topic_model.print_topics(num_words=10) print '\nTopic distribution for document 0:', \ topic_model.topic_distribution_for_document(0) print '\nMost likely topic for document 0:', \ topic_model.most_likely_topic_for_document(0) print '\nFrequency of topics:', \ topic_model.topics_frequency() print '\nTop 10 most relevant words for topic 2:', \ topic_model.top_words(2, 10)
# Infer topics topic_model = NonNegativeMatrixFactorization(corpus=corpus) topic_model.infer_topics(num_topics=num_topics) topic_model.print_topics(num_words=10) # Clean the data directory if os.path.exists('static/data'): shutil.rmtree('static/data') os.makedirs('static/data') # Export topic cloud utils.save_topic_cloud(topic_model, 'static/data/topic_cloud.json') # Export details about topics for topic_id in range(topic_model.nb_topics): utils.save_word_distribution(topic_model.top_words(topic_id, 20), 'static/data/word_distribution'+str(topic_id)+'.tsv') utils.save_affiliation_repartition(topic_model.affiliation_repartition(topic_id), 'static/data/affiliation_repartition'+str(topic_id)+'.tsv') evolution = [] for i in range(2004, 2016): evolution.append((i, topic_model.topic_frequency(topic_id, date=i))) utils.save_topic_evolution(evolution, 'static/data/frequency'+str(topic_id)+'.tsv') # Export details about documents for doc_id in range(topic_model.corpus.size): utils.save_topic_distribution(topic_model.topic_distribution_for_document(doc_id), 'static/data/topic_distribution_d'+str(doc_id)+'.tsv') # Export details about words for word_id in range(len(topic_model.corpus.vocabulary)):
topic_model = NonNegativeMatrixFactorization(corpus=corpus) topic_model.infer_topics(num_topics=num_topics) topic_model.print_topics(num_words=10) # Clean the data directory if os.path.exists('static/data'): shutil.rmtree('static/data') os.makedirs('static/data') # Export topic cloud utils.save_topic_cloud(topic_model, 'static/data/topic_cloud.json') # Export details about topics for topic_id in range(topic_model.nb_topics): utils.save_word_distribution( topic_model.top_words(topic_id, 20), 'static/data/word_distribution' + str(topic_id) + '.tsv') utils.save_affiliation_repartition( topic_model.affiliation_repartition(topic_id), 'static/data/affiliation_repartition' + str(topic_id) + '.tsv') evolution = [] for i in range(2012, 2016): evolution.append((i, topic_model.topic_frequency(topic_id, date=i))) utils.save_topic_evolution( evolution, 'static/data/frequency' + str(topic_id) + '.tsv') # Export details about documents for doc_id in range(topic_model.corpus.size): utils.save_topic_distribution( topic_model.topic_distribution_for_document(doc_id), 'static/data/topic_distribution_d' + str(doc_id) + '.tsv')