def read_tree(text, out_dict, label): fake_file = StringIO(text) complete_tree = treebanks.ptb_read_tree(fake_file) if complete_tree is None: return None treebanks.homogenise_tree(complete_tree) if not complete_tree.label.strip(): complete_tree.label = 'ROOT' tree = treebanks.apply_collins_rules(complete_tree) if tree is None: for out in [out_dict['out'], out_dict['err']]: print >> out, "Empty {} tree".format(label) print >> out, complete_tree print >> out, tree return tree
break mprint("Sentence %d:" % sent_no, out, 'all') gold_text = gold_text.strip() test_text = test_text.strip() if len(gold_text) == 0: mprint("No gold tree", out, 'all') continue elif len(test_text) == 0: mprint("Not parsed", out, 'all') continue gold_complete_tree = pstree.tree_from_text(gold_text) treebanks.ptb_cleaning(gold_complete_tree) gold_tree = treebanks.apply_collins_rules(gold_complete_tree, False) if gold_tree is None: mprint("Empty gold tree", out, 'all') mprint(gold_complete_tree.__repr__(), out, 'all') mprint(gold_tree.__repr__(), out, 'all') continue if '()' in test_text: mprint("() test tree", out, 'all') continue test_complete_tree = pstree.tree_from_text(test_text) treebanks.ptb_cleaning(test_complete_tree) test_tree = treebanks.apply_collins_rules(test_complete_tree, False) if test_tree is None: mprint("Empty test tree", out, 'all') mprint(test_complete_tree.__repr__(), out, 'all')
# Apply edits if 'h' in edits: tree = treebanks.homogenise_tree(tree) if gold_tree is not None: gold_tree = treebanks.homogenise_tree(gold_tree) if 't' in edits: treebanks.remove_traces(tree) if gold_tree is not None: treebanks.remove_traces(gold_tree) if 'f' in edits: treebanks.remove_function_tags(tree) if gold_tree is not None: treebanks.remove_function_tags(gold_tree) if 'c' in edits: treebanks.apply_collins_rules(tree) if gold_tree is not None: treebanks.apply_collins_rules(gold_tree) if 'u' in edits: # This must be after all other deletion to work properly treebanks.remove_trivial_unaries(tree) if gold_tree is not None: treebanks.remove_trivial_unaries(gold_tree) # Print tree if out_format == 's': print render_tree.text_tree(tree, single_line=True) elif out_format == 'm': print render_tree.text_tree(tree, single_line=False) elif out_format == 'o': print render_tree.text_ontonotes(tree)