def get_topics_from_partitions(G, words_by_part, num_words_per_topic=10): topics = [] for counter in xrange(0, len(words_by_part)): H = G.subgraph(words_by_part[counter]) topics.append(graph_cluster.pagerank_top_k(H, num_words_per_topic).tolist()) return topics
def print_topics_from_partitions(G, words_by_part, num_words_per_topic=10): for counter in xrange(0, len(words_by_part)): print '\nTopic {}:\n----------'.format(counter) H = G.subgraph(words_by_part[counter]) print ', '.join(graph_cluster.pagerank_top_k(H, num_words_per_topic))
# gb.load_texts(texts) # G = gb.create_graph(graphtype='occurence') # # partition = community.best_partition(G) # words_by_part = get_words_by_partition(partition) # # # for counter in xrange(0, len(words_by_part)): # print '\nTopic {}:\n----------'.format(counter) # H = G.subgraph(words_by_part[counter]) # print ', '.join(graph_cluster.pagerank_top_k(H, 10)) # -- example using non dictionary words gb = SimpleGraphBuilder(text_processing.only_non_dictionary_words, stem_words=False) texts = (article['text'] for article in news.polished()) gb.load_texts(texts) G = gb.create_graph() partition = community.best_partition(G) words_by_part = get_words_by_partition(partition) for counter in xrange(0, len(words_by_part)): print '\nTopic {}:\n----------'.format(counter) H = G.subgraph(words_by_part[counter]) print ', '.join(graph_cluster.pagerank_top_k(H, 10))
# # gb = NounPhraseGraphBuilder(text_processing.clean_punctuation_and_stopwords) # texts = (article['text'] for article in news.polished()) # gb.load_texts(texts) # G = gb.create_graph(graphtype='occurence') # # partition = community.best_partition(G) # words_by_part = get_words_by_partition(partition) # # # for counter in xrange(0, len(words_by_part)): # print '\nTopic {}:\n----------'.format(counter) # H = G.subgraph(words_by_part[counter]) # print ', '.join(graph_cluster.pagerank_top_k(H, 10)) # -- example using non dictionary words gb = SimpleGraphBuilder(text_processing.only_non_dictionary_words, stem_words=False) texts = (article['text'] for article in news.polished()) gb.load_texts(texts) G = gb.create_graph() partition = community.best_partition(G) words_by_part = get_words_by_partition(partition) for counter in xrange(0, len(words_by_part)): print '\nTopic {}:\n----------'.format(counter) H = G.subgraph(words_by_part[counter]) print ', '.join(graph_cluster.pagerank_top_k(H, 10))