test_words = test_tree.word_yield() if len(test_words.split()) != len(gold_words.split()): mprint("Sentence lengths do not match...", out, 'all') mprint("Gold: " + gold_words.__repr__(), out, 'all') mprint("Test: " + test_words.__repr__(), out, 'all') mprint("After applying collins rules:", out, 'out') mprint( render_tree.text_coloured_errors(test_tree, gold_tree).strip(), out, 'out') match, gold, test, crossing, POS = parse_errors.counts_for_prf( test_tree, gold_tree) stats['out'][0] += match stats['out'][1] += gold stats['out'][2] += test p, r, f = nlp_eval.calc_prf(match, gold, test) mprint("Eval: %.2f %.2f %.2f" % (p * 100, r * 100, f * 100), out, 'out') # Work out the minimal span to show all errors gold_spans = set([(node.label, node.span[0], node.span[1]) for node in gold_tree.get_nodes()]) test_spans = set([(node.label, node.span[0], node.span[1]) for node in test_tree.get_nodes()]) diff = gold_spans.symmetric_difference(test_spans) width = [1e5, -1] for span in diff: if span[2] - span[1] == 1: continue if span[1] < width[0]: width[0] = span[1]
gold_words = gold_tree.word_yield() test_words = test_tree.word_yield() if len(test_words.split()) != len(gold_words.split()): mprint("Sentence lengths do not match...", out, 'all') mprint("Gold: " + gold_words.__repr__(), out, 'all') mprint("Test: " + test_words.__repr__(), out, 'all') match_strict, gold_strict, test_strict, _, _ = relaxed_parse_errors.counts_for_prf( test_tree, gold_tree) match_relaxed, gold_relaxed, test_relaxed, _, _ = relaxed_parse_errors.counts_for_prf( test_relaxed_tree, gold_relaxed_tree) stats['out_evalb'][0] += match_strict stats['out_evalb'][1] += gold_strict stats['out_evalb'][2] += test_strict p, r, f = nlp_eval.calc_prf(match_strict, gold_strict, test_strict) mprint( "Eval--Strict Evalb: %.2f %.2f %.2f" % (p * 100, r * 100, f * 100), out, 'out') stats['out_relaxed'][0] += match_relaxed stats['out_relaxed'][1] += gold_relaxed stats['out_relaxed'][2] += test_relaxed p, r, f = nlp_eval.calc_prf(match_relaxed, gold_relaxed, test_relaxed) mprint( "Eval--Relaxed Edit: %.2f %.2f %.2f" % (p * 100, r * 100, f * 100), out, 'out') match = stats['out_evalb'][0] gold = stats['out_evalb'][1] test = stats['out_evalb'][2] p, r, f = nlp_eval.calc_prf(match, gold, test)
def compute_overall_score(gold_file, test_file): gold_in = open(gold_file).readlines() test_in = open(test_file).readlines() stats = {'out_evalb': [0, 0, 0], 'out_relaxed': [0, 0, 0]} assert len(gold_in) == len(test_in) for i in range(len(gold_in)): print "Sent: " + str(i) gold_text = gold_in[i] test_text = test_in[i] if gold_text == '' and test_text == '': break elif gold_text == '': break elif test_text == '': break gold_text = gold_text.strip() test_text = test_text.strip() if len(gold_text) == 0: continue elif len(test_text) == 0: continue gold_complete_tree = pstree.tree_from_text(gold_text, allow_empty_labels=True) gold_complete_tree = treebanks.homogenise_tree(gold_complete_tree) treebanks.ptb_cleaning(gold_complete_tree) gold_tree = gold_complete_tree #gold_tree = treebanks.apply_collins_rules(gold_complete_tree, False) test_complete_tree = pstree.tree_from_text(test_text, allow_empty_labels=True) test_complete_tree = treebanks.homogenise_tree(test_complete_tree) treebanks.ptb_cleaning(test_complete_tree) test_tree = test_complete_tree #test_tree = treebanks.apply_collins_rules(test_complete_tree, False) gold_words = gold_tree.word_yield() test_words = test_tree.word_yield() if len(test_words.split()) != len(gold_words.split()): print "Sentence lengths do not match in sentence..." + str(i) print "Gold: " + gold_words.__repr__() print "Test: " + test_words.__repr__() match_strict, gold_strict, test_strict, _, _ = relaxed_parse_errors.counts_for_prf( test_tree, gold_tree) match_relaxed, gold_relaxed, test_relaxed, _, _ = relaxed_parse_errors.relaxed_counts_for_prf( test_tree, gold_tree) stats['out_evalb'][0] += match_strict stats['out_evalb'][1] += gold_strict stats['out_evalb'][2] += test_strict p, r, f = nlp_eval.calc_prf(match_strict, gold_strict, test_strict) print "Eval--Strict Evalb: %.2f %.2f %.2f" % (p * 100, r * 100, f * 100) stats['out_relaxed'][0] += match_relaxed stats['out_relaxed'][1] += gold_relaxed stats['out_relaxed'][2] += test_relaxed p, r, f = nlp_eval.calc_prf(match_relaxed, gold_relaxed, test_relaxed) print "Eval--Relaxed Edit: %.2f %.2f %.2f" % (p * 100, r * 100, f * 100) match = stats['out_evalb'][0] gold = stats['out_evalb'][1] test = stats['out_evalb'][2] p, r, f = nlp_eval.calc_prf(match, gold, test) print "Overall--Standard EVALB %s: %.2f %.2f %.2f" % ('out', p * 100, r * 100, f * 100) match = stats['out_relaxed'][0] gold = stats['out_relaxed'][1] test = stats['out_relaxed'][2] p, r, f = nlp_eval.calc_prf(match, gold, test) print "Overall--Relaxed EDIT %s: %.2f %.2f %.2f" % ('out', p * 100, r * 100, f * 100)
continue gold_words = gold_tree.word_yield() test_words = test_tree.word_yield() if len(test_words.split()) != len(gold_words.split()): mprint("Sentence lengths do not match...", out, 'all') mprint("Gold: " + gold_words.__repr__(), out, 'all') mprint("Test: " + test_words.__repr__(), out, 'all') mprint("After applying collins rules:", out, 'out') mprint(render_tree.text_coloured_errors(test_tree, gold_tree).strip(), out, 'out') match, gold, test, crossing, POS = parse_errors.counts_for_prf(test_tree, gold_tree) stats['out'][0] += match stats['out'][1] += gold stats['out'][2] += test p, r, f = nlp_eval.calc_prf(match, gold, test) mprint("Eval: %.2f %.2f %.2f" % (p*100, r*100, f*100), out, 'out') # Work out the minimal span to show all errors gold_spans = set([(node.label, node.span[0], node.span[1]) for node in gold_tree.get_nodes()]) test_spans = set([(node.label, node.span[0], node.span[1]) for node in test_tree.get_nodes()]) diff = gold_spans.symmetric_difference(test_spans) width = [1e5, -1] for span in diff: if span[2] - span[1] == 1: continue if span[1] < width[0]: width[0] = span[1] if span[2] > width[1]: width[1] = span[2] mprint('\n\\scalebox{\\derivscale}{', out, 'tex')
node.label = pair[0] if len(options['equivalent_words'][1]) > 0: for tree in [gold_tree, test_tree]: for node in gold_tree: for pair in options['equivalent_words'][1]: if node.word in pair: node.word = pair[0] if options['remove_trivial_unaries'][1]: treebanks.remove_trivial_unaries(test_tree) treebanks.remove_trivial_unaries(gold_tree) # Score and report match, gcount, tcount, crossing, POS = parse_errors.counts_for_prf(test_tree, gold_tree, include_terminals=options['include_POS_in_score'][1]) POS = twords - POS p, r, f = nlp_eval.calc_prf(match, gcount, tcount) f *= 100 r *= 100 p *= 100 POS_acc = 100.0 * POS / twords print("{:4} {:4} {: >7.2f} {: >7.2f} {: >7.2f} {:5} {:6} {:4} {:7}" " {:7} {: >8.2f}".format(sent_id, gwords, p, r, f, match, gcount, tcount, crossing, POS, POS_acc)) scores.append((sent_id, gwords, p, r, f, match, gcount, tcount, crossing, POS, POS_acc)) sent_id -= 1 # Work out summary sents = float(sent_id) parsed = len(filter(lambda x: x[7] != 0, scores)) if not options["include_unparsed_in_score"][1]: scores = filter(lambda x: x[7] > 0, scores)