Esempio n. 1
0
                pi[i][j][X] = max_score
                bp[i][j][X] = best_rule
    parse = decode(bp, 0, n-1, start)
    return parse

def decode(bp, i, j, X):
    if i == j:
        nonterm, word = bp[i][j][X].split('~~')
        return '( ' + nonterm + ' ' + word + ' )'
    Y, Z, split_pos = bp[i][j][X].split(' ')
    s = int(split_pos)
    return '( ' + X + ' ' + decode(bp, i, s, Y) + ' ' + decode(bp, s+1, j, Z) + ' )'
       
if __name__ == "__main__":
    #print "reading pcfg..."
    prob, pcfg_b, nonterms, start = hw3_utils.get_pcfg(sys.argv[1])
    hw3_utils.print_size(prob)
    hw3_utils.print_size(pcfg_b)

    #print "reading data..."
    sentences = hw3_utils.get_sentences(sys.argv[2])

    #print "parsing using cky..."
    parsefile = open("candidate_parses_test.out", "w")
    total_diff = 0
    for sentence in sentences:
        if True:#len(sentence) <= 10:
            start_time = time()
            print sentences.index(sentence), ": size=", len(sentence)
            parse = run(sentence, nonterms, start, prob, pcfg_b, None) 
            parsefile.write(parse + '\n')
Esempio n. 2
0
                                    bp[i][j][X] = Y + ' ' + Z + ' ' + str(s)

    parse = decode(bp, 0, n-1, start)
    return parse

def decode(bp, i, j, X):
    if i == j:
        nonterm, word = bp[i][j][X].split('~~')
        return '( ' + nonterm + ' ' + word + ' )'
    Y, Z, split_pos = bp[i][j][X].split(' ')
    s = int(split_pos)
    return '( ' + X + ' ' + decode(bp, i, s, Y) + ' ' + decode(bp, s+1, j, Z) + ' )'
       
if __name__ == "__main__":
    #print "reading pcfg..."
    pcfg_u, inv, leaves, start = hw3_utils.get_pcfg(sys.argv[1])

    #print "reading data..."
    sentences = hw3_utils.get_sentences(sys.argv[2])

    #print "parsing using cky..."
    parsefile = open("candidate_parses_test.out", "w")
    total_diff = 0
    for sentence in sentences:
        if True:#len(sentence) <= 10:
            start_time = time()
            print sentences.index(sentence), ": size=", len(sentence)
            parse = run(sentence, leaves, start, inv, pcfg_u, None) 
            parsefile.write(parse + '\n')
            end_time = time()
            diff = end_time - start_time
        print parse_tags, "parser"

        if agree(parse_tags, tags):
            return k, tags, parse  # converges in the kth iteration
        
        y = compute_indicators(tags, tagset)
        z = compute_indicators(parse_tags, tagset)
        k += 1
        step_size = 1.0/k
        update(y, z, u, step_size)

    return -1, tags, parse # does not converge

if __name__ == "__main__":
    #print "reading pcfg..."
    pcfg_u, pcfg, preterms, start = hw3_utils.get_pcfg(sys.argv[1])

    # print "reading hmm..."
    trans_file = sys.argv[2]
    em_file = sys.argv[3]

    hmm, tagset = hw3_utils.get_hmm_tagset(trans_file, em_file)

    #print "reading data..."
    sentences = hw3_utils.get_sentences(sys.argv[4])

    #print "parsing using cky..."
    parsefile = open("candidate_parses_test2.out", "w")
    posfile = open("candidate_postags_test2.out", "w")
    
    print "--------------------------------------------------------------------"