def read_tree(text, out_dict, label): fake_file = StringIO(text) complete_tree = ptb.read_tree(fake_file) if complete_tree is None: return None ptb.homogenise_tree(complete_tree) if not complete_tree.label.strip(): complete_tree.label = 'ROOT' tree = ptb.apply_collins_rules(complete_tree) if tree is None: mprint("Empty {} tree".format(label), out_dict, ['out', 'err']) mprint(complete_tree.__repr__(), out_dict, ['out', 'err']) mprint(tree.__repr__(), out_dict, ['out', 'err']) return tree
import ptb gold_in = open(sys.argv[1]) test_in = open(sys.argv[2]) while True: gold_text = gold_in.readline() test_text = test_in.readline() if gold_text == '' or test_text == '': break gold_text = gold_text.strip() test_text = test_text.strip() if len(gold_text) == 0 or len(test_text) == 0: continue tree = ptb.PTB_Tree() tree.set_by_text(gold_text) tree = ptb.apply_collins_rules(tree) gold_tree = error_tree.Error_Tree() gold_tree.set_by_ptb(tree) tree = ptb.PTB_Tree() tree.set_by_text(test_text) tree = ptb.apply_collins_rules(tree) test_tree = error_tree.Error_Tree() test_tree.set_by_ptb(tree) error_set = get_errors(gold_tree, test_tree)[0] missing = get_missing_errors(error_set, test_tree) print test_tree.colour_repr(missing=missing)
test_text = test_in.readline() if gold_text == '' or test_text == '': break gold_text = gold_text.strip() test_text = test_text.strip() if len(gold_text) == 0 or len(test_text) == 0: continue if VERBOSE: print gold_text tree = ptb.PTB_Tree() tree.set_by_text(gold_text) if VERBOSE: print tree simple_tree = ptb.apply_collins_rules(tree) if VERBOSE: print simple_tree if simple_tree is None: continue gold_tree = error_tree.Error_Tree() gold_tree.set_by_ptb(simple_tree, tree) if VERBOSE: print gold_tree if VERBOSE: print test_text tree = ptb.PTB_Tree() tree.set_by_text(test_text) if VERBOSE: print tree