Beispiel #1
0
import sys
import summarisation

if len(sys.argv) < 2:
    print "usage: python read.py filename"
    quit()

filename = sys.argv[1]

sentences = summarisation.summarise(filename, debug_output=False)

print summarisation.outputspl(sentences)
# print sentences
Beispiel #2
0
            "cd stanford-corenlp && java -cp stanford-corenlp-3.2.0.jar:stanford-corenlp-3.2.0-models.jar:xom.jar:joda-time.jar:jollyday.jar -Xmx3g edu.stanford.nlp.pipeline.StanfordCoreNLP -file "
            + filename)

sentences, coref = splitAndParse.splitSentencesAndParse("stanford-corenlp/" +
                                                        filename + ".xml")

order = range(len(sentences))

random.shuffle(order)


def cutoff_words(order, sentences, wc):
    r = []
    w = 0
    for k in order:
        sentence = sentences[k]
        swc = summarisation.words(sentence)
        if swc + w > wc:
            r.append((k, summarisation.cut_words(sentence, wc - w)))
            w = wc
        else:
            r.append((k, sentence))
            w += swc
        if w == wc:
            break
    r.sort()
    return [rr[1] for rr in r]


print summarisation.outputspl(cutoff_words(order, sentences, 200))