Example #1
0
        test_words = test_tree.word_yield()
        if len(test_words.split()) != len(gold_words.split()):
            mprint("Sentence lengths do not match...", out, 'all')
            mprint("Gold: " + gold_words.__repr__(), out, 'all')
            mprint("Test: " + test_words.__repr__(), out, 'all')

        mprint("After applying collins rules:", out, 'out')
        mprint(
            render_tree.text_coloured_errors(test_tree, gold_tree).strip(),
            out, 'out')
        match, gold, test, crossing, POS = parse_errors.counts_for_prf(
            test_tree, gold_tree)
        stats['out'][0] += match
        stats['out'][1] += gold
        stats['out'][2] += test
        p, r, f = nlp_eval.calc_prf(match, gold, test)
        mprint("Eval: %.2f  %.2f  %.2f" % (p * 100, r * 100, f * 100), out,
               'out')

        # Work out the minimal span to show all errors
        gold_spans = set([(node.label, node.span[0], node.span[1])
                          for node in gold_tree.get_nodes()])
        test_spans = set([(node.label, node.span[0], node.span[1])
                          for node in test_tree.get_nodes()])
        diff = gold_spans.symmetric_difference(test_spans)
        width = [1e5, -1]
        for span in diff:
            if span[2] - span[1] == 1:
                continue
            if span[1] < width[0]:
                width[0] = span[1]
        gold_words = gold_tree.word_yield()
        test_words = test_tree.word_yield()
        if len(test_words.split()) != len(gold_words.split()):
            mprint("Sentence lengths do not match...", out, 'all')
            mprint("Gold: " + gold_words.__repr__(), out, 'all')
            mprint("Test: " + test_words.__repr__(), out, 'all')

        match_strict, gold_strict, test_strict, _, _ = relaxed_parse_errors.counts_for_prf(
            test_tree, gold_tree)
        match_relaxed, gold_relaxed, test_relaxed, _, _ = relaxed_parse_errors.counts_for_prf(
            test_relaxed_tree, gold_relaxed_tree)
        stats['out_evalb'][0] += match_strict
        stats['out_evalb'][1] += gold_strict
        stats['out_evalb'][2] += test_strict
        p, r, f = nlp_eval.calc_prf(match_strict, gold_strict, test_strict)
        mprint(
            "Eval--Strict Evalb: %.2f  %.2f  %.2f" %
            (p * 100, r * 100, f * 100), out, 'out')
        stats['out_relaxed'][0] += match_relaxed
        stats['out_relaxed'][1] += gold_relaxed
        stats['out_relaxed'][2] += test_relaxed
        p, r, f = nlp_eval.calc_prf(match_relaxed, gold_relaxed, test_relaxed)
        mprint(
            "Eval--Relaxed Edit: %.2f  %.2f  %.2f" %
            (p * 100, r * 100, f * 100), out, 'out')

    match = stats['out_evalb'][0]
    gold = stats['out_evalb'][1]
    test = stats['out_evalb'][2]
    p, r, f = nlp_eval.calc_prf(match, gold, test)
Example #3
0
def compute_overall_score(gold_file, test_file):
    gold_in = open(gold_file).readlines()
    test_in = open(test_file).readlines()
    stats = {'out_evalb': [0, 0, 0], 'out_relaxed': [0, 0, 0]}

    assert len(gold_in) == len(test_in)

    for i in range(len(gold_in)):
        print "Sent: " + str(i)
        gold_text = gold_in[i]
        test_text = test_in[i]
        if gold_text == '' and test_text == '':
            break
        elif gold_text == '':
            break
        elif test_text == '':
            break

        gold_text = gold_text.strip()
        test_text = test_text.strip()
        if len(gold_text) == 0:
            continue
        elif len(test_text) == 0:
            continue

        gold_complete_tree = pstree.tree_from_text(gold_text,
                                                   allow_empty_labels=True)
        gold_complete_tree = treebanks.homogenise_tree(gold_complete_tree)
        treebanks.ptb_cleaning(gold_complete_tree)
        gold_tree = gold_complete_tree
        #gold_tree = treebanks.apply_collins_rules(gold_complete_tree, False)

        test_complete_tree = pstree.tree_from_text(test_text,
                                                   allow_empty_labels=True)
        test_complete_tree = treebanks.homogenise_tree(test_complete_tree)
        treebanks.ptb_cleaning(test_complete_tree)
        test_tree = test_complete_tree
        #test_tree = treebanks.apply_collins_rules(test_complete_tree, False)

        gold_words = gold_tree.word_yield()
        test_words = test_tree.word_yield()
        if len(test_words.split()) != len(gold_words.split()):
            print "Sentence lengths do not match in sentence..." + str(i)
            print "Gold: " + gold_words.__repr__()
            print "Test: " + test_words.__repr__()

        match_strict, gold_strict, test_strict, _, _ = relaxed_parse_errors.counts_for_prf(
            test_tree, gold_tree)
        match_relaxed, gold_relaxed, test_relaxed, _, _ = relaxed_parse_errors.relaxed_counts_for_prf(
            test_tree, gold_tree)
        stats['out_evalb'][0] += match_strict
        stats['out_evalb'][1] += gold_strict
        stats['out_evalb'][2] += test_strict
        p, r, f = nlp_eval.calc_prf(match_strict, gold_strict, test_strict)
        print "Eval--Strict Evalb: %.2f  %.2f  %.2f" % (p * 100, r * 100,
                                                        f * 100)

        stats['out_relaxed'][0] += match_relaxed
        stats['out_relaxed'][1] += gold_relaxed
        stats['out_relaxed'][2] += test_relaxed
        p, r, f = nlp_eval.calc_prf(match_relaxed, gold_relaxed, test_relaxed)
        print "Eval--Relaxed Edit: %.2f  %.2f  %.2f" % (p * 100, r * 100,
                                                        f * 100)

    match = stats['out_evalb'][0]
    gold = stats['out_evalb'][1]
    test = stats['out_evalb'][2]
    p, r, f = nlp_eval.calc_prf(match, gold, test)
    print "Overall--Standard EVALB %s: %.2f  %.2f  %.2f" % ('out', p * 100,
                                                            r * 100, f * 100)

    match = stats['out_relaxed'][0]
    gold = stats['out_relaxed'][1]
    test = stats['out_relaxed'][2]
    p, r, f = nlp_eval.calc_prf(match, gold, test)
    print "Overall--Relaxed EDIT %s: %.2f  %.2f  %.2f" % ('out', p * 100,
                                                          r * 100, f * 100)
			continue

		gold_words = gold_tree.word_yield()
		test_words = test_tree.word_yield()
		if len(test_words.split()) != len(gold_words.split()):
			mprint("Sentence lengths do not match...", out, 'all')
			mprint("Gold: " + gold_words.__repr__(), out, 'all')
			mprint("Test: " + test_words.__repr__(), out, 'all')

		mprint("After applying collins rules:", out, 'out')
		mprint(render_tree.text_coloured_errors(test_tree, gold_tree).strip(), out, 'out')
		match, gold, test, crossing, POS = parse_errors.counts_for_prf(test_tree, gold_tree)
		stats['out'][0] += match
		stats['out'][1] += gold
		stats['out'][2] += test
		p, r, f = nlp_eval.calc_prf(match, gold, test)
		mprint("Eval: %.2f  %.2f  %.2f" % (p*100, r*100, f*100), out, 'out')

		# Work out the minimal span to show all errors
		gold_spans = set([(node.label, node.span[0], node.span[1]) for node in gold_tree.get_nodes()])
		test_spans = set([(node.label, node.span[0], node.span[1]) for node in test_tree.get_nodes()])
		diff = gold_spans.symmetric_difference(test_spans)
		width = [1e5, -1]
		for span in diff:
			if span[2] - span[1] == 1:
				continue
			if span[1] < width[0]:
				width[0] = span[1]
			if span[2] > width[1]:
				width[1] = span[2]
		mprint('\n\\scalebox{\\derivscale}{', out, 'tex')
Example #5
0
						node.label = pair[0]
	if len(options['equivalent_words'][1]) > 0:
		for tree in [gold_tree, test_tree]:
			for node in gold_tree:
				for pair in options['equivalent_words'][1]:
					if node.word in pair:
						node.word = pair[0]
	if options['remove_trivial_unaries'][1]:
		treebanks.remove_trivial_unaries(test_tree)
		treebanks.remove_trivial_unaries(gold_tree)

	# Score and report
	match, gcount, tcount, crossing, POS = parse_errors.counts_for_prf(test_tree,
		gold_tree, include_terminals=options['include_POS_in_score'][1])
	POS = twords - POS
	p, r, f = nlp_eval.calc_prf(match, gcount, tcount)
	f *= 100
	r *= 100
	p *= 100
	POS_acc = 100.0 * POS / twords

	print("{:4} {:4} {: >7.2f} {: >7.2f} {: >7.2f} {:5} {:6} {:4} {:7}"
		" {:7} {: >8.2f}".format(sent_id, gwords, p, r, f, match, gcount, tcount, crossing, POS, POS_acc))
	scores.append((sent_id, gwords, p, r, f, match, gcount, tcount, crossing, POS, POS_acc))
sent_id -= 1

# Work out summary
sents = float(sent_id)
parsed = len(filter(lambda x: x[7] != 0, scores))
if not options["include_unparsed_in_score"][1]:
	scores = filter(lambda x: x[7] > 0, scores)