Exemple #1
0
#!/usr/bin/env python
# encoding: utf-8

import sys
import textrank

## Stage 1:
##  * perform statistical parsing/tagging on a document in JSON format
##
## INPUTS: <stage0>
## OUTPUT: JSON format `ParsedGraf(id, sha1, graf)`

if __name__ == "__main__":
    path = sys.argv[1]

    for graf in textrank.parse_doc(textrank.json_iter(path)):
        print(textrank.pretty_print(graf._asdict()))
Exemple #2
0
#!/usr/bin/env python
# encoding: utf-8

import sys
import textrank

DEBUG = False  # True

## Stage 2:
##  * collect and normalize the key phrases from a parsed document
##
## INPUTS: <stage1>
## OUTPUT: JSON format `RankedLexeme(text, rank, ids, pos)`

if __name__ == "__main__":
    path = sys.argv[1]
    graph, ranks = textrank.text_rank(path)

    textrank.render_ranks(graph, ranks)

    # output as JSON

    for rl in textrank.normalize_key_phrases(path, ranks):
        print(textrank.pretty_print(rl._asdict()))
Exemple #3
0
#!/usr/bin/env python
# encoding: utf-8

import sys
import textrank

DEBUG = True  # False

## "It scrubs its unreadable characters out of its text stream...
## Then it generates a JSON doc again."

if __name__ == "__main__":
    path = sys.argv[1]
    lines = []

    with open(path, 'r') as f:
        for line in f.readlines():
            line = line.strip().replace('“', '"').replace('”', '"')
            line = line.replace('…', '...').replace('–', '-')
            line = line.replace("’", "'").replace("`", "'")

            lines.append(line)

    j = {}
    j["id"] = "777"
    j["text"] = " ".join(lines)

    print(textrank.pretty_print(j))
Exemple #4
0
#!/usr/bin/env python
# encoding: utf-8

import sys
import textrank

## Stage 1:
##  * perform statistical parsing/tagging on a document in JSON format (Stage 0)
##  * output is in JSON format

if __name__ == "__main__":
    path = sys.argv[1]

    for graf in textrank.parse_doc(textrank.json_iter(path)):
        print(textrank.pretty_print(graf))
Exemple #5
0
#!/usr/bin/env python
# encoding: utf-8

import sys
import textrank

DEBUG = False  # True

## Stage 2:
##  * summarize key phrases from a parsed document in JSON format (Stage 1)
##  * output is in JSON format

if __name__ == "__main__":
    path = sys.argv[1]
    graph, ranks, summary = textrank.text_rank(path)

    if DEBUG:
        textrank.render_ranks(graph, ranks)

        for p in textrank.normalize_keyphrases(summary):
            print("%0.4f\t%s" % (p.rank, p.text), p.ids)

    # output as JSON

    for norm in textrank.get_kernel(summary):
        print(textrank.pretty_print(norm._asdict()))