Exemple #1
0
def get_topics_noun_phrases(num_news, draw=False, url='http://cnn.com'):

    texts = get_news(url, num_news)

    gb = NounPhraseGraphBuilder(text_processing.clean_punctuation_and_stopwords)
    gb.load_texts(texts)
    G = gb.create_graph()
    print "Graph built"

    partition = community.best_partition(G)
    words_by_part = get_words_by_partition(partition)

    print_topics_from_partitions(G, words_by_part, 10)

    mod = community.modularity(partition,G)
    print("modularity:", mod)

    #print_topics_from_partitions(G, words_by_part, 10)
    if draw:
        values = [partition.get(node) for node in G.nodes()]
        nx.draw_spring(G, cmap = plt.get_cmap('jet'), node_color = values, node_size=30, with_labels=False)
        plt.show()

    topics = get_topics_from_partitions(G, words_by_part, 10)

    return G, topics
Exemple #2
0
def get_topics_noun_phrases_overlapping(num_news, k, url='http://cnn.com'):

    texts = get_news(url, num_news)

    gb = NounPhraseGraphBuilder(text_processing.clean_punctuation_and_stopwords)
    gb.load_texts(texts)
    G = gb.create_graph()
    print "Graph built"

    words_by_part = graph_cluster.get_overlap_clusters(G, k, 1)

    #print_topics_from_partitions(G, words_by_part, 10)

    topics = get_topics_from_partitions(G, words_by_part, 10)

    return G, topics