Beispiel #1
0
# Estimate the optimal number of topics
viz = Visualization(topic_model)
viz.plot_greene_metric(min_num_topics=10,
                       max_num_topics=30,
                       tao=10, step=1,
                       top_n_words=10)
viz.plot_arun_metric(min_num_topics=5,
                     max_num_topics=30,
                     iterations=10)
viz.plot_consensus_metric(min_num_topics=5,
                        max_num_topics=30,
                        iterations=10)

# Infer topics
print 'Inferring topics...'
topic_model.infer_topics(num_topics=15)
# Save model on disk
utils.save_topic_model(topic_model, 'output/NMF_15topics.pickle')

# Print results
print '\nTopics:'
topic_model.print_topics(num_words=10)
print '\nTopic distribution for document 0:', \
    topic_model.topic_distribution_for_document(0)
print '\nMost likely topic for document 0:', \
    topic_model.most_likely_topic_for_document(0)
print '\nFrequency of topics:', \
    topic_model.topics_frequency()
print '\nTop 10 most relevant words for topic 2:', \
    topic_model.top_words(2, 10)
Beispiel #2
0
# Infer topics
topic_model = NonNegativeMatrixFactorization(corpus=corpus)
topic_model.infer_topics(num_topics=num_topics)
topic_model.print_topics(num_words=10)

# Clean the data directory
if os.path.exists('static/data'):
    shutil.rmtree('static/data')
os.makedirs('static/data')

# Export topic cloud
utils.save_topic_cloud(topic_model, 'static/data/topic_cloud.json')

# Export details about topics
for topic_id in range(topic_model.nb_topics):
    utils.save_word_distribution(topic_model.top_words(topic_id, 20),
                                 'static/data/word_distribution'+str(topic_id)+'.tsv')
    utils.save_affiliation_repartition(topic_model.affiliation_repartition(topic_id),
                                       'static/data/affiliation_repartition'+str(topic_id)+'.tsv')
    evolution = []
    for i in range(2004, 2016):
        evolution.append((i, topic_model.topic_frequency(topic_id, date=i)))
    utils.save_topic_evolution(evolution, 'static/data/frequency'+str(topic_id)+'.tsv')

# Export details about documents
for doc_id in range(topic_model.corpus.size):
    utils.save_topic_distribution(topic_model.topic_distribution_for_document(doc_id),
                                  'static/data/topic_distribution_d'+str(doc_id)+'.tsv')

# Export details about words
for word_id in range(len(topic_model.corpus.vocabulary)):
Beispiel #3
0
topic_model = NonNegativeMatrixFactorization(corpus=corpus)
topic_model.infer_topics(num_topics=num_topics)
topic_model.print_topics(num_words=10)

# Clean the data directory
if os.path.exists('static/data'):
    shutil.rmtree('static/data')
os.makedirs('static/data')

# Export topic cloud
utils.save_topic_cloud(topic_model, 'static/data/topic_cloud.json')

# Export details about topics
for topic_id in range(topic_model.nb_topics):
    utils.save_word_distribution(
        topic_model.top_words(topic_id, 20),
        'static/data/word_distribution' + str(topic_id) + '.tsv')
    utils.save_affiliation_repartition(
        topic_model.affiliation_repartition(topic_id),
        'static/data/affiliation_repartition' + str(topic_id) + '.tsv')
    evolution = []
    for i in range(2012, 2016):
        evolution.append((i, topic_model.topic_frequency(topic_id, date=i)))
    utils.save_topic_evolution(
        evolution, 'static/data/frequency' + str(topic_id) + '.tsv')

# Export details about documents
for doc_id in range(topic_model.corpus.size):
    utils.save_topic_distribution(
        topic_model.topic_distribution_for_document(doc_id),
        'static/data/topic_distribution_d' + str(doc_id) + '.tsv')