def compare(gold_text, test_text, out_dict, error_counts, language='english'): """ Compares two trees in text form. This checks for empty trees and mismatched numbers of words. """ gold_text = gold_text.strip() test_text = test_text.strip() if len(gold_text) == 0: mprint("No gold tree", out_dict, ['out', 'err']) return elif len(test_text) == 0: mprint("Not parsed", out_dict, ['out', 'err']) return gold_tree = read_tree(gold_text, out_dict, 'gold') test_tree = read_tree(test_text, out_dict, 'test') if gold_tree is None or test_tree is None: mprint("Not parsed, but had output", out_dict, ['out', 'err', 'init_errors']) return mprint( render_tree.text_coloured_errors(test_tree, gold_tree).strip(), out_dict, 'init_errors') gold_words = gold_tree.word_yield() test_words = test_tree.word_yield() if len(test_words.split()) != len(gold_words.split()): mprint("Sentence lengths do not match...", out_dict, ['out', 'err']) mprint("Gold: " + gold_words.__repr__(), out_dict, ['out', 'err']) mprint("Test: " + test_words.__repr__(), out_dict, ['out', 'err']) return return compare_trees(gold_tree, test_tree, out_dict, error_counts, language)
def compare(gold_text, test_text, out_dict, error_counts, language='english'): """ Compares two trees in text form. This checks for empty trees and mismatched numbers of words. """ gold_text = gold_text.strip() test_text = test_text.strip() if len(gold_text) == 0: mprint("No gold tree", out_dict, ['out', 'err']) return elif len(test_text) == 0: mprint("Not parsed", out_dict, ['out', 'err']) return gold_tree = read_tree(gold_text, out_dict, 'gold') test_tree = read_tree(test_text, out_dict, 'test') if gold_tree is None or test_tree is None: mprint("Not parsed, but had output", out_dict, ['out', 'err', 'init_errors']) return mprint(render_tree.text_coloured_errors(test_tree, gold_tree).strip(), out_dict, 'init_errors') gold_words = gold_tree.word_yield() test_words = test_tree.word_yield() if len(test_words.split()) != len(gold_words.split()): mprint("Sentence lengths do not match...", out_dict, ['out', 'err']) mprint("Gold: " + gold_words.__repr__(), out_dict, ['out', 'err']) mprint("Test: " + test_words.__repr__(), out_dict, ['out', 'err']) return return compare_trees(gold_tree, test_tree, out_dict, error_counts, language)
def compare_trees(gold_tree, test_tree, out_dict, error_counts, language='english'): """ Compares two trees. """ init_errors = test_tree.get_errors(gold_tree) error_count = len(init_errors) mprint("%d Initial errors" % error_count, out_dict, 'out') iters, path = greedy_search(gold_tree, test_tree, language) mprint("%d on fringe, %d iterations" % iters, out_dict, 'out') if path is not None: mprint(test_tree.__repr__(), out_dict, 'test_trees') mprint(gold_tree.__repr__(), out_dict, 'gold_trees') for tree in path[1:]: mprint( str(tree[2]) + " Error:" + tree[1]['classified_type'], out_dict, 'out') if len(path) > 1: for tree in path: mprint("Step:" + tree[1]['classified_type'], out_dict, 'out') error_counts[tree[1]['classified_type']].append(tree[2]) mprint(tree[1].__repr__(), out_dict, 'out') mprint( render_tree.text_coloured_errors(tree[0], gold=gold_tree).strip(), out_dict, 'out') else: mprint("no path found", out_dict, 'out') mprint("", out_dict, ['out', 'err'])
def compare_trees(gold_tree, test_tree, out_dict, error_counts, language='english'): """ Compares two trees. """ init_errors = test_tree.get_errors(gold_tree) error_count = len(init_errors) mprint("%d Initial errors" % error_count, out_dict, 'out') iters, path = greedy_search(gold_tree, test_tree, language) mprint("%d on fringe, %d iterations" % iters, out_dict, 'out') if path is not None: mprint(test_tree.__repr__(), out_dict, 'test_trees') mprint(gold_tree.__repr__(), out_dict, 'gold_trees') for tree in path[1:]: mprint(str(tree[2]) + " Error:" + tree[1]['classified_type'], out_dict, 'out') if len(path) > 1: for tree in path: mprint("Step:" + tree[1]['classified_type'], out_dict, 'out') error_counts[tree[1]['classified_type']].append(tree[2]) mprint(tree[1].__repr__(), out_dict, 'out') mprint(render_tree.text_coloured_errors(tree[0], gold=gold_tree).strip(), out_dict, 'out') else: mprint("no path found", out_dict, 'out') mprint("", out_dict, ['out', 'err'])