def read_tree(text, out_dict, label):
	fake_file = StringIO(text)
	complete_tree = treebanks.ptb_read_tree(fake_file)
	if complete_tree is None:
		return None
	treebanks.homogenise_tree(complete_tree)
	if not complete_tree.label.strip():
		complete_tree.label = 'ROOT'
	tree = treebanks.apply_collins_rules(complete_tree)
	if tree is None:
		for out in [out_dict['out'], out_dict['err']]:
			print >> out, "Empty {} tree".format(label)
			print >> out, complete_tree
			print >> out, tree
	return tree
예제 #2
0
def read_tree(text, out_dict, label):
    fake_file = StringIO(text)
    complete_tree = treebanks.ptb_read_tree(fake_file)
    if complete_tree is None:
        return None
    treebanks.homogenise_tree(complete_tree)
    if not complete_tree.label.strip():
        complete_tree.label = 'ROOT'
    tree = treebanks.apply_collins_rules(complete_tree)
    if tree is None:
        for out in [out_dict['out'], out_dict['err']]:
            print >> out, "Empty {} tree".format(label)
            print >> out, complete_tree
            print >> out, tree
    return tree
예제 #3
0
    if out_format == 't':
        print tex_start
    for tree in treebanks.generate_trees(sys.stdin,
                                         return_empty=True,
                                         allow_empty_labels=True):
        gold_tree = None
        if gold_file is not None:
            gold_tree = gold_file.next()

        if tree is None:
            print
            continue

        # Apply edits
        if 'h' in edits:
            tree = treebanks.homogenise_tree(tree)
            if gold_tree is not None:
                gold_tree = treebanks.homogenise_tree(gold_tree)
        if 't' in edits:
            treebanks.remove_traces(tree)
            if gold_tree is not None:
                treebanks.remove_traces(gold_tree)
        if 'f' in edits:
            treebanks.remove_function_tags(tree)
            if gold_tree is not None:
                treebanks.remove_function_tags(gold_tree)
        if 'c' in edits:
            treebanks.apply_collins_rules(tree)
            if gold_tree is not None:
                treebanks.apply_collins_rules(gold_tree)
        if 'u' in edits:
        test_text = test_text.strip()

        gold_relaxed_text = flatten_edited_nodes(gold_text)
        test_relaxed_text = flatten_edited_nodes(test_text)
        print test_text
        print test_relaxed_text
        if len(gold_text) == 0:
            mprint("No gold tree", out, 'all')
            continue
        elif len(test_text) == 0:
            mprint("Not parsed", out, 'all')
            continue

        gold_complete_tree = pstree.tree_from_text(gold_text,
                                                   allow_empty_labels=True)
        gold_complete_tree = treebanks.homogenise_tree(gold_complete_tree)
        treebanks.ptb_cleaning(gold_complete_tree)
        gold_tree = treebanks.apply_collins_rules(gold_complete_tree, False)

        gold_relaxed_tree = pstree.tree_from_text(gold_relaxed_text,
                                                  allow_empty_labels=True)
        gold_relaxed_tree = treebanks.homogenise_tree(gold_relaxed_tree)
        treebanks.ptb_cleaning(gold_relaxed_tree)
        gold_relaxed_tree = treebanks.apply_collins_rules(
            gold_relaxed_tree, False)
        if gold_tree is None:
            mprint("Empty gold tree", out, 'all')
            mprint(gold_complete_tree.__repr__(), out, 'all')
            mprint(gold_tree.__repr__(), out, 'all')
            continue
		gold_file = treebanks.generate_trees(gold_file, allow_empty_labels=True)

	if out_format == 't':
		print tex_start
	for tree in treebanks.generate_trees(sys.stdin, return_empty=True, allow_empty_labels=True):
		gold_tree = None
		if gold_file is not None:
			gold_tree = gold_file.next()

		if tree is None:
			print
			continue

		# Apply edits
		if 'h' in edits:
			tree = treebanks.homogenise_tree(tree)
			if gold_tree is not None:
				gold_tree = treebanks.homogenise_tree(gold_tree)
		if 't' in edits:
			treebanks.remove_traces(tree)
			if gold_tree is not None:
				treebanks.remove_traces(gold_tree)
		if 'f' in edits:
			treebanks.remove_function_tags(tree)
			if gold_tree is not None:
				treebanks.remove_function_tags(gold_tree)
		if 'c' in edits:
			treebanks.apply_collins_rules(tree)
			if gold_tree is not None:
				treebanks.apply_collins_rules(gold_tree)
		if 'u' in edits:
			mprint("End of test input", out, 'err')
			break

		mprint("Sentence %d:" % sent_no, out, 'all')

		gold_text = gold_text.strip()
		test_text = test_text.strip()
		if len(gold_text) == 0:
			mprint("No gold tree", out, 'all')
			continue
		elif len(test_text) == 0:
			mprint("Not parsed", out, 'all')
			continue

		gold_complete_tree = pstree.tree_from_text(gold_text)
		tree = treebanks.homogenise_tree(gold_complete_tree, False)
		treebanks.remove_traces(tree)
		treebanks.remove_function_tags(tree)
		treebanks.ptb_cleaning(tree)
		treebanks.remove_trivial_unaries(tree)
		gold_tree = tree
		if gold_tree is None:
			mprint("Empty gold tree", out, 'all')
			mprint(gold_complete_tree.__repr__(), out, 'all')
			mprint(gold_tree.__repr__(), out, 'all')
			continue

		if '()' in test_text:
			mprint("() test tree", out, 'all')
			continue
		test_complete_tree = pstree.tree_from_text(test_text)
예제 #7
0
def compute_overall_score(gold_file, test_file):
    gold_in = open(gold_file).readlines()
    test_in = open(test_file).readlines()
    stats = {'out_evalb': [0, 0, 0], 'out_relaxed': [0, 0, 0]}

    assert len(gold_in) == len(test_in)

    for i in range(len(gold_in)):
        print "Sent: " + str(i)
        gold_text = gold_in[i]
        test_text = test_in[i]
        if gold_text == '' and test_text == '':
            break
        elif gold_text == '':
            break
        elif test_text == '':
            break

        gold_text = gold_text.strip()
        test_text = test_text.strip()
        if len(gold_text) == 0:
            continue
        elif len(test_text) == 0:
            continue

        gold_complete_tree = pstree.tree_from_text(gold_text,
                                                   allow_empty_labels=True)
        gold_complete_tree = treebanks.homogenise_tree(gold_complete_tree)
        treebanks.ptb_cleaning(gold_complete_tree)
        gold_tree = gold_complete_tree
        #gold_tree = treebanks.apply_collins_rules(gold_complete_tree, False)

        test_complete_tree = pstree.tree_from_text(test_text,
                                                   allow_empty_labels=True)
        test_complete_tree = treebanks.homogenise_tree(test_complete_tree)
        treebanks.ptb_cleaning(test_complete_tree)
        test_tree = test_complete_tree
        #test_tree = treebanks.apply_collins_rules(test_complete_tree, False)

        gold_words = gold_tree.word_yield()
        test_words = test_tree.word_yield()
        if len(test_words.split()) != len(gold_words.split()):
            print "Sentence lengths do not match in sentence..." + str(i)
            print "Gold: " + gold_words.__repr__()
            print "Test: " + test_words.__repr__()

        match_strict, gold_strict, test_strict, _, _ = relaxed_parse_errors.counts_for_prf(
            test_tree, gold_tree)
        match_relaxed, gold_relaxed, test_relaxed, _, _ = relaxed_parse_errors.relaxed_counts_for_prf(
            test_tree, gold_tree)
        stats['out_evalb'][0] += match_strict
        stats['out_evalb'][1] += gold_strict
        stats['out_evalb'][2] += test_strict
        p, r, f = nlp_eval.calc_prf(match_strict, gold_strict, test_strict)
        print "Eval--Strict Evalb: %.2f  %.2f  %.2f" % (p * 100, r * 100,
                                                        f * 100)

        stats['out_relaxed'][0] += match_relaxed
        stats['out_relaxed'][1] += gold_relaxed
        stats['out_relaxed'][2] += test_relaxed
        p, r, f = nlp_eval.calc_prf(match_relaxed, gold_relaxed, test_relaxed)
        print "Eval--Relaxed Edit: %.2f  %.2f  %.2f" % (p * 100, r * 100,
                                                        f * 100)

    match = stats['out_evalb'][0]
    gold = stats['out_evalb'][1]
    test = stats['out_evalb'][2]
    p, r, f = nlp_eval.calc_prf(match, gold, test)
    print "Overall--Standard EVALB %s: %.2f  %.2f  %.2f" % ('out', p * 100,
                                                            r * 100, f * 100)

    match = stats['out_relaxed'][0]
    gold = stats['out_relaxed'][1]
    test = stats['out_relaxed'][2]
    p, r, f = nlp_eval.calc_prf(match, gold, test)
    print "Overall--Relaxed EDIT %s: %.2f  %.2f  %.2f" % ('out', p * 100,
                                                          r * 100, f * 100)