Beispiel #1
0
def readDocument(path_sentences, path_trees, path_output, max_length):
    ''' Performs CKY and Viterbi '''
    file_sentences = open(path_sentences, 'r+')
    file_trees     = open(path_trees, 'r+')
    timenow        = str(time.time())
    correct_trees  = open("GOLD_OUTPUT_" + timenow + ".txt", 'w+')
    test_trees  = open(path_output, 'w+')

    SentenceParser = Parser()
    parse_information = SentenceParser.load_database( 'parse_information_unknown.p' )
    Algorithm = CKY( parse_information )

    i = 0
    while True:
        sentence     = file_sentences.readline()
        correct_tree = file_trees.readline()
        words = sentence.split(' ')

        size = len(words) - 2
        if size > max_length:
            print "-- Skip sentence too long --"
            i += 1
            continue
        print "CKY starts parsing sentence:", i, "of size:", size
        i += 1
        if sentence == '':
            break

        # Run CKY and Viterbi
        tree = Algorithm.run( words[:size] )
        if tree:
            test_trees.write(tree)
            correct_trees.write(correct_tree)

    correct_trees.close()
    test_trees.close()