Ejemplo n.º 1
0
def get_topics_from_partitions(G, words_by_part, num_words_per_topic=10):
    topics = []
    for counter in xrange(0, len(words_by_part)):
        H = G.subgraph(words_by_part[counter])
        topics.append(graph_cluster.pagerank_top_k(H, num_words_per_topic).tolist())

    return topics
Ejemplo n.º 2
0
def print_topics_from_partitions(G, words_by_part, num_words_per_topic=10):

    for counter in xrange(0, len(words_by_part)):
        print '\nTopic {}:\n----------'.format(counter)
        H = G.subgraph(words_by_part[counter])
        print ', '.join(graph_cluster.pagerank_top_k(H, num_words_per_topic))
Ejemplo n.º 3
0
# gb.load_texts(texts)
# G = gb.create_graph(graphtype='occurence')
#
# partition = community.best_partition(G)
# words_by_part = get_words_by_partition(partition)
#
#
# for counter in xrange(0, len(words_by_part)):
# 	print '\nTopic {}:\n----------'.format(counter)
# 	H = G.subgraph(words_by_part[counter])
# 	print ', '.join(graph_cluster.pagerank_top_k(H, 10))

# -- example using non dictionary words

gb = SimpleGraphBuilder(text_processing.only_non_dictionary_words, stem_words=False)
texts = (article['text'] for article in news.polished())
gb.load_texts(texts)
G = gb.create_graph()

partition = community.best_partition(G)
words_by_part = get_words_by_partition(partition)

for counter in xrange(0, len(words_by_part)):
	print '\nTopic {}:\n----------'.format(counter)
	H = G.subgraph(words_by_part[counter])
	print ', '.join(graph_cluster.pagerank_top_k(H, 10))




Ejemplo n.º 4
0
#
# gb = NounPhraseGraphBuilder(text_processing.clean_punctuation_and_stopwords)
# texts = (article['text'] for article in news.polished())
# gb.load_texts(texts)
# G = gb.create_graph(graphtype='occurence')
#
# partition = community.best_partition(G)
# words_by_part = get_words_by_partition(partition)
#
#
# for counter in xrange(0, len(words_by_part)):
# 	print '\nTopic {}:\n----------'.format(counter)
# 	H = G.subgraph(words_by_part[counter])
# 	print ', '.join(graph_cluster.pagerank_top_k(H, 10))

# -- example using non dictionary words

gb = SimpleGraphBuilder(text_processing.only_non_dictionary_words,
                        stem_words=False)
texts = (article['text'] for article in news.polished())
gb.load_texts(texts)
G = gb.create_graph()

partition = community.best_partition(G)
words_by_part = get_words_by_partition(partition)

for counter in xrange(0, len(words_by_part)):
    print '\nTopic {}:\n----------'.format(counter)
    H = G.subgraph(words_by_part[counter])
    print ', '.join(graph_cluster.pagerank_top_k(H, 10))