Esempio n. 1
0
def textRank(tokenized_words, tag_word_dict):
    
    words_set = syntactic_filter(tag_word_dict)

    # Add the vertex to the graph
    graph_dict = {}
    for w in words_set:
        graph_dict[w] = []
    
    graph = Graph(graph_dict)

    # Add the edges
    words = nltk.Text (tokenized_words)
    doc = nltk.ConcordanceIndex(words)
    for w in words_set:
        results = get_concordance(w, doc)

        for context in results:
            left = context[0].split()
            right = context[1].split()

            for l in left:
                if l in words_set:
                    graph.add_edge((w, l))
                    graph.add_edge((l, w))

            for l in right:
                if l in words_set:
                    graph.add_edge((w, l))
                    graph.add_edge((l, w))

    # Run the text rank algorithm
    delta = 1
    i = 0
    d = 0.85
    while (delta > 0.0001 and i < 5000):
        
        for v in graph.vertices():
            degree = graph.vertex_degree(v)
            old_rank = graph.text_rank(v)

            sum = 0
            for v2 in graph.adjacency_list(v):
                degree2 = graph.vertex_degree(v2)
                # print ("Degree for " + v2 + " = " + str(degree2))

                tr = graph.text_rank(v2)

                sum += tr / degree2

            value = (1 - d) + d * sum
            graph.set_text_rank(v, value)

            if abs(value - old_rank) < delta:
                delta = abs(value - old_rank)
        i = i + 1

    text_rank_dict = {}
    for v in graph.vertices():
        text_rank_dict[v] = graph.text_rank(v)

    sorted_text_rank = sorted(text_rank_dict.items(), key=operator.itemgetter(1), reverse=True)
    return sorted_text_rank