def get_topics_noun_phrases(num_news, draw=False, url='http://cnn.com'): texts = get_news(url, num_news) gb = NounPhraseGraphBuilder(text_processing.clean_punctuation_and_stopwords) gb.load_texts(texts) G = gb.create_graph() print "Graph built" partition = community.best_partition(G) words_by_part = get_words_by_partition(partition) print_topics_from_partitions(G, words_by_part, 10) mod = community.modularity(partition,G) print("modularity:", mod) #print_topics_from_partitions(G, words_by_part, 10) if draw: values = [partition.get(node) for node in G.nodes()] nx.draw_spring(G, cmap = plt.get_cmap('jet'), node_color = values, node_size=30, with_labels=False) plt.show() topics = get_topics_from_partitions(G, words_by_part, 10) return G, topics
def get_topics_noun_phrases_overlapping(num_news, k, url='http://cnn.com'): texts = get_news(url, num_news) gb = NounPhraseGraphBuilder(text_processing.clean_punctuation_and_stopwords) gb.load_texts(texts) G = gb.create_graph() print "Graph built" words_by_part = graph_cluster.get_overlap_clusters(G, k, 1) #print_topics_from_partitions(G, words_by_part, 10) topics = get_topics_from_partitions(G, words_by_part, 10) return G, topics