pi[i][j][X] = max_score bp[i][j][X] = best_rule parse = decode(bp, 0, n-1, start) return parse def decode(bp, i, j, X): if i == j: nonterm, word = bp[i][j][X].split('~~') return '( ' + nonterm + ' ' + word + ' )' Y, Z, split_pos = bp[i][j][X].split(' ') s = int(split_pos) return '( ' + X + ' ' + decode(bp, i, s, Y) + ' ' + decode(bp, s+1, j, Z) + ' )' if __name__ == "__main__": #print "reading pcfg..." prob, pcfg_b, nonterms, start = hw3_utils.get_pcfg(sys.argv[1]) hw3_utils.print_size(prob) hw3_utils.print_size(pcfg_b) #print "reading data..." sentences = hw3_utils.get_sentences(sys.argv[2]) #print "parsing using cky..." parsefile = open("candidate_parses_test.out", "w") total_diff = 0 for sentence in sentences: if True:#len(sentence) <= 10: start_time = time() print sentences.index(sentence), ": size=", len(sentence) parse = run(sentence, nonterms, start, prob, pcfg_b, None) parsefile.write(parse + '\n')
bp[i][j][X] = Y + ' ' + Z + ' ' + str(s) parse = decode(bp, 0, n-1, start) return parse def decode(bp, i, j, X): if i == j: nonterm, word = bp[i][j][X].split('~~') return '( ' + nonterm + ' ' + word + ' )' Y, Z, split_pos = bp[i][j][X].split(' ') s = int(split_pos) return '( ' + X + ' ' + decode(bp, i, s, Y) + ' ' + decode(bp, s+1, j, Z) + ' )' if __name__ == "__main__": #print "reading pcfg..." pcfg_u, inv, leaves, start = hw3_utils.get_pcfg(sys.argv[1]) #print "reading data..." sentences = hw3_utils.get_sentences(sys.argv[2]) #print "parsing using cky..." parsefile = open("candidate_parses_test.out", "w") total_diff = 0 for sentence in sentences: if True:#len(sentence) <= 10: start_time = time() print sentences.index(sentence), ": size=", len(sentence) parse = run(sentence, leaves, start, inv, pcfg_u, None) parsefile.write(parse + '\n') end_time = time() diff = end_time - start_time
print parse_tags, "parser" if agree(parse_tags, tags): return k, tags, parse # converges in the kth iteration y = compute_indicators(tags, tagset) z = compute_indicators(parse_tags, tagset) k += 1 step_size = 1.0/k update(y, z, u, step_size) return -1, tags, parse # does not converge if __name__ == "__main__": #print "reading pcfg..." pcfg_u, pcfg, preterms, start = hw3_utils.get_pcfg(sys.argv[1]) # print "reading hmm..." trans_file = sys.argv[2] em_file = sys.argv[3] hmm, tagset = hw3_utils.get_hmm_tagset(trans_file, em_file) #print "reading data..." sentences = hw3_utils.get_sentences(sys.argv[4]) #print "parsing using cky..." parsefile = open("candidate_parses_test2.out", "w") posfile = open("candidate_postags_test2.out", "w") print "--------------------------------------------------------------------"