def read_tree(text, out_dict, label):
	fake_file = StringIO(text)
	complete_tree = treebanks.ptb_read_tree(fake_file)
	if complete_tree is None:
		return None
	treebanks.homogenise_tree(complete_tree)
	if not complete_tree.label.strip():
		complete_tree.label = 'ROOT'
	tree = treebanks.apply_collins_rules(complete_tree)
	if tree is None:
		for out in [out_dict['out'], out_dict['err']]:
			print >> out, "Empty {} tree".format(label)
			print >> out, complete_tree
			print >> out, tree
	return tree
Ejemplo n.º 2
0
def read_tree(text, out_dict, label):
    fake_file = StringIO(text)
    complete_tree = treebanks.ptb_read_tree(fake_file)
    if complete_tree is None:
        return None
    treebanks.homogenise_tree(complete_tree)
    if not complete_tree.label.strip():
        complete_tree.label = 'ROOT'
    tree = treebanks.apply_collins_rules(complete_tree)
    if tree is None:
        for out in [out_dict['out'], out_dict['err']]:
            print >> out, "Empty {} tree".format(label)
            print >> out, complete_tree
            print >> out, tree
    return tree
Ejemplo n.º 3
0
            break

        mprint("Sentence %d:" % sent_no, out, 'all')

        gold_text = gold_text.strip()
        test_text = test_text.strip()
        if len(gold_text) == 0:
            mprint("No gold tree", out, 'all')
            continue
        elif len(test_text) == 0:
            mprint("Not parsed", out, 'all')
            continue

        gold_complete_tree = pstree.tree_from_text(gold_text)
        treebanks.ptb_cleaning(gold_complete_tree)
        gold_tree = treebanks.apply_collins_rules(gold_complete_tree, False)
        if gold_tree is None:
            mprint("Empty gold tree", out, 'all')
            mprint(gold_complete_tree.__repr__(), out, 'all')
            mprint(gold_tree.__repr__(), out, 'all')
            continue

        if '()' in test_text:
            mprint("() test tree", out, 'all')
            continue
        test_complete_tree = pstree.tree_from_text(test_text)
        treebanks.ptb_cleaning(test_complete_tree)
        test_tree = treebanks.apply_collins_rules(test_complete_tree, False)
        if test_tree is None:
            mprint("Empty test tree", out, 'all')
            mprint(test_complete_tree.__repr__(), out, 'all')
Ejemplo n.º 4
0
        # Apply edits
        if 'h' in edits:
            tree = treebanks.homogenise_tree(tree)
            if gold_tree is not None:
                gold_tree = treebanks.homogenise_tree(gold_tree)
        if 't' in edits:
            treebanks.remove_traces(tree)
            if gold_tree is not None:
                treebanks.remove_traces(gold_tree)
        if 'f' in edits:
            treebanks.remove_function_tags(tree)
            if gold_tree is not None:
                treebanks.remove_function_tags(gold_tree)
        if 'c' in edits:
            treebanks.apply_collins_rules(tree)
            if gold_tree is not None:
                treebanks.apply_collins_rules(gold_tree)
        if 'u' in edits:
            # This must be after all other deletion to work properly
            treebanks.remove_trivial_unaries(tree)
            if gold_tree is not None:
                treebanks.remove_trivial_unaries(gold_tree)

        # Print tree
        if out_format == 's':
            print render_tree.text_tree(tree, single_line=True)
        elif out_format == 'm':
            print render_tree.text_tree(tree, single_line=False)
        elif out_format == 'o':
            print render_tree.text_ontonotes(tree)
		# Apply edits
		if 'h' in edits:
			tree = treebanks.homogenise_tree(tree)
			if gold_tree is not None:
				gold_tree = treebanks.homogenise_tree(gold_tree)
		if 't' in edits:
			treebanks.remove_traces(tree)
			if gold_tree is not None:
				treebanks.remove_traces(gold_tree)
		if 'f' in edits:
			treebanks.remove_function_tags(tree)
			if gold_tree is not None:
				treebanks.remove_function_tags(gold_tree)
		if 'c' in edits:
			treebanks.apply_collins_rules(tree)
			if gold_tree is not None:
				treebanks.apply_collins_rules(gold_tree)
		if 'u' in edits:
			# This must be after all other deletion to work properly
			treebanks.remove_trivial_unaries(tree)
			if gold_tree is not None:
				treebanks.remove_trivial_unaries(gold_tree)

		# Print tree
		if out_format == 's':
			print render_tree.text_tree(tree, single_line=True)
		elif out_format == 'm':
			print render_tree.text_tree(tree, single_line=False)
		elif out_format == 'o':
			print render_tree.text_ontonotes(tree)
			break

		mprint("Sentence %d:" % sent_no, out, 'all')

		gold_text = gold_text.strip()
		test_text = test_text.strip()
		if len(gold_text) == 0:
			mprint("No gold tree", out, 'all')
			continue
		elif len(test_text) == 0:
			mprint("Not parsed", out, 'all')
			continue

		gold_complete_tree = pstree.tree_from_text(gold_text)
		treebanks.ptb_cleaning(gold_complete_tree)
		gold_tree = treebanks.apply_collins_rules(gold_complete_tree, False)
		if gold_tree is None:
			mprint("Empty gold tree", out, 'all')
			mprint(gold_complete_tree.__repr__(), out, 'all')
			mprint(gold_tree.__repr__(), out, 'all')
			continue

		if '()' in test_text:
			mprint("() test tree", out, 'all')
			continue
		test_complete_tree = pstree.tree_from_text(test_text)
		treebanks.ptb_cleaning(test_complete_tree)
		test_tree = treebanks.apply_collins_rules(test_complete_tree, False)
		if test_tree is None:
			mprint("Empty test tree", out, 'all')
			mprint(test_complete_tree.__repr__(), out, 'all')