Esempio n. 1
0
def missing_with_nothing_nearby(error, test_tree, to_group, clevel, left, right, ungrouped):
	'''Missing, with no crossing or other bracket error directly above, so
	something that should be in this bracket attached too high.'''
	if left == right:
		# This is actually a unary case
		return None, test_tree
	group_fields = {}
	group_fields['type'] = 'missing'
	group_fields['parent'] = clevel.label
	group_fields['left siblings'] = []
	group_fields['children'] = []
	group_fields['right siblings'] = []
	group_desc = 'attachment too_high %s_instead_of_%s' % (clevel.label, error.node.label)
	for i in xrange(len(clevel.subtrees)):
		if i < left:
			group_fields['left siblings'].append(clevel.subtrees[i].label)
		elif i > right:
			group_fields['right siblings'].append(clevel.subtrees[i].label)
		else:
			group_desc += ' ' + clevel.subtrees[i].label
			group_fields['children'].append(clevel.subtrees[i].label)
	group_fields['left siblings'] = ' '.join(group_fields['left siblings'])
	group_fields['right siblings'] = ' '.join(group_fields['right siblings'])
	group_fields['children not first'] = ' '.join(group_fields['children'][1:])
	group_fields['children'] = ' '.join(group_fields['children'])
	group_desc += ' ' + error.node.word_yield()
	group_fields['new spans'] = ''
	for merror in ungrouped:
		if merror.missing and error.node.span == merror.node.span:
			group_fields['new spans'] += ' ' + merror.node.label
			to_group.append(merror)
			repair_tree.repair_missing_node(merror, test_tree)
	group_fields['ID'] = 'mwnn1'
	group_desc += ' |mwnn1'
	group_fields['old desc'] = group_desc
	return group_fields, test_tree
Esempio n. 2
0
def unary_error(ungrouped, grouped, gold, test):
    nodes, span_set = test.get_spans()
    gold_nodes, gold_span_set = gold.get_spans()
    relevant_errors = {}
    for error in ungrouped:
        span = error.node.span
        if span[1] - span[0] > 1 and span in span_set and span in gold_span_set:
            if span not in relevant_errors:
                relevant_errors[span] = (len(span_set[span].values()),
                                         len(gold_span_set[span].values()), [])
            relevant_errors[span][2].append(error)

    changed = False
    for span in relevant_errors:
        test_count, gold_count, errors = relevant_errors[span]
        missing_errors = 0
        extra_errors = 0
        for error in errors:
            if error.missing:
                missing_errors += 1
            else:
                extra_errors += 1
        if test_count > 0 and extra_errors == 0:
            # there is/are missing unary production(s) here
            group = error_group.Error_Group()
            current_labels = []
            for node_set_label in span_set[span]:
                for node in span_set[span][node_set_label]:
                    current_labels.append(node.label)
            current_labels.sort()
            missing_labels = [error.node.label for error in errors]
            missing_labels.sort()
            for error in errors:
                ungrouped.remove(error)
                group.errors.append(error)
                repair_tree.repair_missing_node(error, test)
            group.fields['type'] = 'unary'
            group.fields['subtype'] = 'missing'
            group.desc = 'unary miss %s over %s' % ('_'.join(missing_labels),
                                                    '_'.join(current_labels))
            group.fields['nodes'] = ' '.join(missing_labels)
            group.fields['old desc'] = group.desc
            grouped.append(group)
            ###			print group.desc
            changed = True
        elif gold_count > 0 and missing_errors == 0:

            # there is/are extra unary production(s) here
            group = error_group.Error_Group()
            current_labels = []
            for node_set_label in span_set[span]:
                for node in span_set[span][node_set_label]:
                    if not node.extra:
                        current_labels.append(node.label)
            current_labels.sort()
            extra_labels = [error.node.label for error in errors]
            extra_labels.sort()
            # only use it if there isn't a matching missing error directly above
            skip = False
            if len(extra_labels) == 1:
                error = errors[0]
                for merror in ungrouped:
                    if merror.node.label == extra_labels[0]:
                        if merror.node.span[0] == error.node.span[0]:
                            if error.node.parent.span[1] >= merror.node.span[1]:
                                skip = True
                                break
                        elif merror.node.span[1] == error.node.span[1]:
                            if error.node.parent.span[0] <= merror.node.span[0]:
                                skip = True
                                break
            if not skip:
                for error in errors:
                    ungrouped.remove(error)
                    group.errors.append(error)
                    repair_tree.repair_extra_node(error, test)
                group.fields['type'] = 'unary'
                group.fields['subtype'] = 'extra'
                group.fields['nodes'] = ' '.join(extra_labels)
                group.desc = 'unary extra %s over %s' % (
                    '_'.join(extra_labels), '_'.join(current_labels))
                group.fields['old desc'] = group.desc
                grouped.append(group)
                changed = True
        elif missing_errors == 1 and extra_errors == 1:
            # We have a mislabelled node
            extra = relevant_errors[span][2][0]
            missing = relevant_errors[span][2][1]
            if not extra.extra:
                extra = relevant_errors[span][2][1]
                missing = relevant_errors[span][2][0]

            group = error_group.Error_Group()
            group.fields['type'] = 'wrong label, right span'
            if test_count == 1 and gold_count == 1:
                group.desc = 'diff %s should_be %s' % (extra.node.label,
                                                       missing.node.label)
###				print  'wrong label, right span %s should be %s' % (extra.node.label, missing.node.label)
            else:
                group.desc = 'unary diff %s should_be %s' % (
                    extra.node.label, missing.node.label)
            group.fields['old desc'] = group.desc
            group.errors.append(extra)
            ungrouped.remove(extra)
            group.errors.append(missing)
            ungrouped.remove(missing)
            repair_tree.repair_extra_missing_pair(missing, extra, test)
            grouped.append(group)
            changed = True
        else:
            # Most of the other cases are either just an incorrect node labelling, or less clear
            # TODO:  One case to consider is when there is a correct node with all
            # the missing nodes above and all the extra nodes below (or vice versa)
            pass

    return changed, test
Esempio n. 3
0
def extra_multicrossing_starting(error, test_tree, to_group, starting, ungrouped, ctree):
	'''Extra, then if there are crossing brackets that start here, and no
	crossing bracket that ends at the same spot, the other thing under this
	bracket has something that should have attached to it, but attached too high.
	Consider what would happen if it had attached here and see what other errors
	it fixes (ie this extra may now match with a missing bracket above)'''
	
###	print error
###	print ctree

	# find the longest crossing missing bracket that starts here
	start = starting.keys()[0]
	cend = ctree.span[1]
	crossing_errors = starting[start]
	longest_error = None
	for merror in crossing_errors:
		if longest_error is None or longest_error.node.span[1] < merror.node.span[1]:
			longest_error = merror
	mspan = (cend, longest_error.node.span[1])
###	print mspan

	# find the set of missing brackets that end where that one ends
###	print "Related missing:"
	related_missing = []
	for merror in ungrouped:
		if merror.missing:
			if merror.node.span[1] == longest_error.node.span[1]:
				related_missing.append((merror.node.span, merror))
###				print merror
	related_missing.sort()
	
	# find the set of extra brackets that end where this one ends
###	print "Related extra:"
	related_extra = []
	for eerror in ungrouped:
		if eerror.extra:
			current_node = bracket_errors.get_extra_tree(eerror, test_tree)
			if current_node.span[1] == ctree.span[1]:
				related_extra.append((current_node.span, eerror))
###				print current_node
###				print eerror
	related_extra.sort()
	
	# find the lowest pairing
	lowest = None
	for pair in related_extra:
		for mpair in related_missing:
			if mpair[1].node.label == pair[1].node.label:
				if mpair[1].node.span[0] == pair[1].node.span[0]:
					lowest = pair[1]
					break
	if lowest is None:
		return None, test_tree
###	print lowest

	# find all the parts that start in the missing bracket to be here
	moving = []
	while cend < mspan[1]:
		brac = test_tree
		done = False
		while not done:
			for subtree in brac.subtrees:
				if cend == subtree.span[0]:
					moving.append(subtree)
					done = True
					cend = subtree.span[1]
					break
				if subtree.span[0] < cend < subtree.span[1]:
					brac = subtree
					break
	# move them across
	group_fields = {}
	group_fields['type'] = 'attachment'
	group_fields['height'] = 'too high'
	group_fields['from parent'] = moving[0].parent.label
	addendum = []
	target = bracket_errors.get_extra_tree(lowest, test_tree)
	group_desc = 'attachment too_high %s_instead_of_%s' % (moving[0].parent.label, target.label)
	group_fields['to parent'] = target.label
	single_child_parents = []
	for node in moving:
		parent = node.parent
		parent.subtrees.remove(node)
		# if the parent now has only one child, look into whether it should be deleted
		if len(parent.subtrees) == 1:
			if parent.label == parent.subtrees[0].label:
				single_child_parents.append(parent)
		target.subtrees.append(node)
		node.parent = target
		addendum.append(node.label)
	group_fields['nodes moving'] = ' '.join(addendum)
	group_desc += ' ' + '_'.join(addendum)
	test_tree.update_span()

	for parent in single_child_parents:
		if len(parent.subtrees) == 1:
			if parent.subtrees[0].extra and parent.label == parent.subtrees[0].label:
				eerror = bracket_errors.get_extra_error(ungrouped, parent.subtrees[0])
				repair_tree.repair_extra_node(eerror, test_tree)
				to_group.append(eerror)

	# attempt to repair the longest crossing error
	if target == ctree:
		if repair_tree.repair_missing_node(longest_error, test_tree, failure_expected=True):
			to_group.append(longest_error)
	group_desc += ' |emcs1'
	group_fields['ID'] = 'emcs1'
	group_fields['old desc'] = group_desc
	return group_fields, test_tree
Esempio n. 4
0
def extra_crossing_starting(error, test_tree, to_group, starting, ungrouped, ctree):
	'''Extra, then if there is a crossing bracket that starts here, and no
	crossing bracket that ends at the same spot, the other thing under this
	bracket has something that should have attached to it, but attached too high.
	Consider what would happen if it had attached here and see what other errors
	it fixes (ie this extra may now match with a missing bracket above)'''
	
	# find the longest crossing missing bracket that starts here
	start = starting.keys()[0]
	cend = ctree.span[1]
	crossing_errors = starting[start]
	longest_error = None
	text = error.node.word_yield()
	for merror in crossing_errors:
		if longest_error is None or longest_error.node.span[1] < merror.node.span[1]:
			longest_error = merror
	mspan = (cend, longest_error.node.span[1])

	# find all the parts that start in the missing bracket to be here
	moving = []
	while cend < mspan[1]:
		brac = test_tree
		done = False
		while not done:
			for subtree in brac.subtrees:
				if cend == subtree.span[0]:
					moving.append(subtree)
					done = True
					cend = subtree.span[1]
					break
				if subtree.span[0] < cend < subtree.span[1]:
					brac = subtree
					break
	# move them across
	group_fields = {}
	group_fields['type'] = 'attachment'
	group_fields['height'] = 'too high'
	group_fields['from parent'] = moving[0].parent.label
	group_fields['to parent'] = longest_error.node.label
	group_desc = 'attachment too_high %s_instead_of_%s' % (moving[0].parent.label, longest_error.node.label)
	addendum = []
	target = ctree
	if ctree.subtrees[-1].extra:
		if ctree.subtrees[-1].label == longest_error.node.label:
			if ctree.subtrees[-1].span[0] == longest_error.node.span[0]:
				target = ctree.subtrees[-1]
	single_child_parents = []
	for node in moving:
		parent = node.parent
		parent.subtrees.remove(node)
		# if the parent now has only one child, look into whether it should be deleted
		if len(parent.subtrees) == 1:
			if parent.label == parent.subtrees[0].label:
				single_child_parents.append(parent)
		target.subtrees.append(node)
		node.parent = target
		addendum.append(node.label)
	group_desc += ' ' + '_'.join(addendum)
	group_fields['nodes moving'] = ' '.join(addendum)
	test_tree.update_span()

	for parent in single_child_parents:
		if len(parent.subtrees) == 1:
			if parent.subtrees[0].extra and parent.label == parent.subtrees[0].label:
				eerror = bracket_errors.get_extra_error(ungrouped, parent.subtrees[0])
				repair_tree.repair_extra_node(eerror, test_tree)
				to_group.append(eerror)

	# attempt to repair the longest crossing error
	if target == ctree:
		if repair_tree.repair_missing_node(longest_error, test_tree, failure_expected=True):
			to_group.append(longest_error)
	if error not in to_group:
		to_group.append(error)
	target.extra = False
	group_desc += ' ' + text + ' |ecs1'
	group_fields['ID'] = 'ecs1'
	group_fields['old desc'] = group_desc
	return group_fields, test_tree
Esempio n. 5
0
def extra_crossing_ending(error, test_tree, to_group, ending, ungrouped, ctree):
	'''Extra, then if there is a crossing bracket that ends in the middle of
	here, the other thing under this bracket is attaching too low.  This could
	explain a bunch of other errors.  In particular, consider if the wrongly
	attached thing was collapsed to 0, what would that fix (note that the extra
	bracket may still be extra at this point, or may now be equivalent to a
	msising bracket).'''
###	print error
	# work out what needs to move
	end = ending.keys()[0]
	crossing_errors = ending[end]
###	for cerror in crossing_errors:
###		print cerror

	# Check the case of a matching missing bracket
###	print error
###	print ending
	if len(ending[end]) == 1:
		for merror in ungrouped:
			if merror.missing and merror.node.label == error.node.label:
				if merror.node.span[1] == error.node.span[1]:
					if ending[end][0].node.span[0] == merror.node.span[0]:
						# the other things should be moving under here!
###						print merror
###						print error
						moving = []
						target = bracket_errors.get_extra_tree(error, test_tree)
						mspan = merror.node.span
						cend = target.span[0]
						while cend > mspan[0]:
							brac = test_tree
							done = False
							while not done:
								for subtree in brac.subtrees:
									if cend == subtree.span[1] and subtree.span[0] >= mspan[0]:
										moving.append(subtree)
										done = True
										cend = subtree.span[0]
										break
									if subtree.span[0] < cend <= subtree.span[1]:
										brac = subtree
										break
###						print "Moving"
###						for node in moving:
###							print node
###						print "To:"
###						print target
						# move them across
						group_fields = {}
						group_fields['type'] = 'attachment'
						group_fields['height'] = 'incorrect'
						group_fields['from parents'] = ''
						for node in moving:
							group_fields['from parents'] += ' ' + node.parent.label
						addendum = []
						group_desc = 'attachment incorrect %s_instead_of_%s' % (moving[0].parent.label, target.label)
						group_fields['to parent'] = target.label
						single_child_parents = []
						for node in moving:
							parent = node.parent
							parent.subtrees.remove(node)
							# if the parent now has only one child, look into whether it should be deleted
							if len(parent.subtrees) == 1:
								if parent.label == parent.subtrees[0].label:
									single_child_parents.append(parent)
							target.subtrees.insert(0, node)
							node.parent = target
							addendum.insert(0, node.label)
						group_fields['nodes moving'] = ' '.join(addendum)
						group_desc += ' ' + '_'.join(addendum)
						test_tree.update_span()

						for parent in single_child_parents:
							if len(parent.subtrees) == 1:
								if parent.subtrees[0].extra and parent.label == parent.subtrees[0].label:
									eerror = bracket_errors.get_extra_error(ungrouped, parent.subtrees[0])
									repair_tree.repair_extra_node(eerror, test_tree)
									to_group.append(eerror)

						target.extra = False
						if error not in to_group:
							to_group.append(error)
						to_group.append(merror)
						group_desc += ' |ece2'
						group_fields['ID'] = 'ece2'
						group_fields['old desc'] = group_desc
						test_tree.check_consistency()
						return group_fields, test_tree

	# work out where it is going to move to
	# first find the longest crossing error
	longest_error = None
	for merror in crossing_errors:
		if longest_error is None or merror.node.span[0] < longest_error.node.span[0]:
			longest_error = merror
	end = longest_error.node.span[1]
###	print "getting movers from:", ctree
###	print "after:", end, ctree.span
	cend = end
	moving = []
	while cend < ctree.span[1]:
		brac = test_tree
		done = False
		while not done:
			for subtree in brac.subtrees:
				if cend == subtree.span[0] and subtree.span[0] <= ctree.span[1]:
					moving.append(subtree)
					done = True
					cend = subtree.span[1]
					break
				if subtree.span[0] <= cend < subtree.span[1]:
					brac = subtree
					break
###	print "Moving:"
###	for mover in moving:
###		print mover
	# then see how far up we can go to it
	parent = ctree
	while parent.span[1] == ctree.span[1]:
		if parent.span[0] <= longest_error.node.span[0]:
			break
		parent = parent.parent

###	print parent
	# move the things up to this level
	group_fields = {}
	group_fields['type'] = 'attachment'
	group_fields['height'] = 'too low'
	group_fields['from parent'] = ctree.label
	group_fields['to parent'] = parent.label
	group_fields['nodes moving'] = []
	group_desc = 'attachment too_low %s_instead_of_%s' % (ctree.label, parent.label)
	for pos in xrange(len(parent.subtrees)):
		if parent.subtrees[pos].span[1] == ctree.span[1]:
			for subtree in moving:
				subtree.parent.subtrees.remove(subtree)
				parent.subtrees.insert(pos + 1, subtree)
				pos += 1
				subtree.parent = parent
				group_desc += ' ' + subtree.label
				group_fields['nodes moving'].append(subtree.label)
			break
	group_fields['nodes moving'] = ' '.join(group_fields['nodes moving'])

	# if only one thing is left behind, and its parent is extra, fix that
	if len(ctree.subtrees) == 1:
		for pos in xrange(len(ctree.parent.subtrees)):
			if ctree.parent.subtrees[pos] == ctree:
				for subtree in ctree.subtrees[::-1]:
					ctree.parent.subtrees.insert(pos+1, subtree)
					subtree.parent = ctree.parent
				break
		ctree.parent.subtrees.remove(ctree)
		to_group.append(error)
	test_tree.update_span()

	# if possible, fix longest_error
	left, right = -1, -1
	for pos in xrange(len(parent.subtrees)):
		if longest_error.node.span[0] == parent.subtrees[pos].span[0]:
			left = pos
		if longest_error.node.span[1] == parent.subtrees[pos].span[1]:
			right = pos
	if -1 < left < right:
		repair_tree.repair_missing_node(longest_error, test_tree)
		to_group.append(longest_error)

	# other errors that are fixed as a side effect will be found by the cleanup stuff

	group_desc += ' |ece1'
	group_fields['ID'] = 'ece1'
	group_fields['old desc'] = group_desc
	return group_fields, test_tree
Esempio n. 6
0
def missing_with_matching_extra(error, test_tree, to_group, to_add, left, right, parent, ungrouped):
	'''Missing, then if there is an equivalent extra above it, then the next
	chunk of sentence is attaching too low.  This one attachment mistake could
	actually be causing a stack of errors, so we pull out the incorrectly
	attached bits and see what else is fixed.'''
	if left == 0:
		# our missing bracket covers nodes starting on the left
		end = error.node.span[1]

		# take the rest out, and move them up to be beneath the next layer that is
		# correct (not extra)
		to_group.append(error)
		eerror = bracket_errors.get_extra_error(ungrouped, parent)
		if eerror is None:
			print "Couldn't find match!"
			for terror in ungrouped:
				print terror
		else:
			to_group.append(eerror)
		parent.extra = False
		clevel = parent
		parent = parent.parent
		prev = clevel
###		while parent.extra and parent.parent is not None:
###			# check for crossing spans
###			for oerror in ungrouped:
###				if oerror.missing and oerror != error:
###					if parent.parent.span[0] < oerror.node.span[0] < parent.parent.span[1]:
###						break
###					if parent.parent.span[0] < oerror.node.span[1] < parent.parent.span[1]:
###						break
###			if clevel.span[1] < parent.span[1]:
###				break
###			prev = parent
###			parent = parent.parent

		# pull out the node(s) down the bottom on the right
		# move them up to the discovered level
		group_fields = {}
		group_fields['type'] = 'attachment'
		group_fields['height'] = 'too low'
		group_fields['from parent'] = clevel.label
		group_fields['from left siblings'] = ''
		for child in parent.subtrees:
			if child == prev:
				break
			group_fields['from left siblings'] += ' ' + child.label
		group_fields['to parent'] = parent.label
		group_desc = 'attachment too_low %s_instead_of_%s' % (clevel.label, parent.label)
		addendum = []
		for pos in xrange(len(parent.subtrees)):
			if clevel.span[1] <= parent.subtrees[pos].span[1]:
				if clevel.span[1] == parent.subtrees[pos].span[1]:
					pos = pos + 1
				while len(clevel.subtrees) > right + 1:
					node = clevel.subtrees.pop()
					parent.subtrees.insert(pos, node)
					node.parent = parent
					addendum.insert(0, node.label)
				break
		group_fields['nodes moving'] = ' '.join(addendum)
		group_desc += ' ' + '_'.join(addendum)
		test_tree.update_span()
		group_fields['ID'] = '|mwme1'
		group_desc += ' |mwme1'
		group_fields['old desc'] = group_desc
		test_tree.check_consistency()
		return group_fields, test_tree
	elif right == len(parent.subtrees) - 1:
		# our missing bracket is to the right

		# if the extra is an NP and everthing under it is a word, NP internal structure
		if parent.label == 'NP':
			if parent.parent is not None:
				if  parent.parent.label == 'NP' and not parent.parent.extra:
					all_words = True
					for subtree in parent.subtrees:
						if subtree.word is None:
							all_words = False
							break
					if all_words:
						group_fields = {}
						group_fields['type'] = 'NP structure'
						eerror = bracket_errors.get_extra_error(ungrouped, parent)
						for merror in ungrouped:
							if merror.node.span[0] >= parent.span[0]:
								if merror.node.span[1] <= parent.span[1]:
									if merror.missing:
										to_group.append(merror)
										repair_tree.repair_missing_node(merror, test_tree)
						to_group.append(eerror)
						repair_tree.repair_extra_node(eerror, test_tree)
						test_tree.update_span()
						group_fields['ID'] = '|mwme2'
						group_fields['old desc'] = 'missing error NP structure |mwme2'
						return group_fields, test_tree

		# no other missing or extra brackets under this extra span
		# attachment, give info
		no_others = True
###		print "Available:"
###		for terror in ungrouped:
###			print terror
		eerror = bracket_errors.get_extra_error(ungrouped, parent)
		for oerror in ungrouped:
			if oerror.node.span[0] >= parent.span[0]:
				if oerror.node.span[1] <= parent.span[1]:
					if oerror != error and oerror != eerror:
						no_others = False
						break
		if no_others:
			group_fields = {}
			group_fields['type'] = 'extra under bracket on right'
			group_fields['parent'] = parent.label
			group_fields['extra nodes'] = ''
			group_fields['children'] = ''
			for subtree in parent.subtrees:
				if subtree.span[0] < error.node.span[0]:
					group_fields['extra nodes'] += ' ' + subtree.label
				elif subtree.span[1] < error.node.span[1]:
					group_fields['children'] += ' ' + subtree.label
			group_fields['ID'] = '|mwme3'
			group_fields['old desc'] = 'extra under bracket on right |mwme3'
			if error is not None:
				to_group.append(error)
				repair_tree.repair_missing_node(error, test_tree)
			if eerror is not None:
				to_group.append(eerror)
				repair_tree.repair_extra_node(eerror, test_tree)
			test_tree.update_span()
			return group_fields, test_tree
	else:
		# our missing bracket is somewhere in the middle
		pass
	return None, test_tree
Esempio n. 7
0
def single_word_error(ungrouped, grouped, gold, test):
    '''An extra/missing bracket at any depth that has a span of 1
	'''
    singles = {}
    for error in ungrouped:
        span = error.node.span
        if span[0] + 1 == span[1]:
            if span not in singles:
                singles[span] = []
            singles[span].append(error)

    changed = False
    to_fix = []
    for span in singles:
        errors = singles[span]
        # First check for cases where there is a matching bracket (so it is in fact
        # just the wrong label)
        if len(errors) == 2 and errors[0].extra != errors[1].extra:
            group = error_group.Error_Group()
            group.errors += errors
            group.fields['type'] = 'wrong label, right span'
            group.desc = 'single_word diff '
            if errors[0].extra:
                group.desc += errors[0].node.label + '_' + errors[1].node.label
            else:
                group.desc += errors[1].node.label + '_' + errors[0].node.label
            grouped.append(group)
            to_fix += errors
        else:
            # this includes cases of multiple brackets (so we don't know which to
            # link as above), and a single bracket error
            for error in errors:
                # check to see if a matching bracket type starts here and matches type
                use = True
                for uerror in ungrouped:
                    if uerror.node.span[0] == error.node.span[0]:
                        if uerror.node.label == error.node.label:
                            if uerror.missing and error.extra:
                                use = False
                                break
                            if uerror.extra and error.missing:
                                use = False
                                break
                if not use:
                    continue
                group = error_group.Error_Group()
                group.errors.append(error)
                group.desc = 'single_word '
                if error.missing:
                    group.desc += 'miss'
                else:
                    group.desc += 'extra'
                group.desc += ' ' + error.node.label
                ###				print group.desc
                group.fields['type'] = 'single word phrase'
                group.fields['old desc'] = group.desc
                grouped.append(group)
                to_fix.append(error)
    for error in to_fix:
        ungrouped.remove(error)
        if error.extra:
            repair_tree.repair_extra_node(error, test)
        else:
            repair_tree.repair_missing_node(error, test)
    return changed, test
def unary_error(ungrouped, grouped, gold, test):
	nodes, span_set = test.get_spans()
	gold_nodes, gold_span_set = gold.get_spans()
	relevant_errors = {}
	for error in ungrouped:
		span = error.node.span
		if span[1] - span[0] > 1 and span in span_set and span in gold_span_set:
			if span not in relevant_errors:
				relevant_errors[span] = (len(span_set[span].values()), len(gold_span_set[span].values()), [])
			relevant_errors[span][2].append(error)
	
	changed = False
	for span in relevant_errors:
		test_count, gold_count, errors = relevant_errors[span]
		missing_errors = 0
		extra_errors = 0
		for error in errors:
			if error.missing:
				missing_errors += 1
			else:
				extra_errors += 1
		if test_count > 0 and extra_errors == 0:
			# there is/are missing unary production(s) here
			group = error_group.Error_Group()
			current_labels = []
			for node_set_label in span_set[span]:
				for node in span_set[span][node_set_label]:
					current_labels.append(node.label)
			current_labels.sort()
			missing_labels = [error.node.label for error in errors]
			missing_labels.sort()
			for error in errors:
				ungrouped.remove(error)
				group.errors.append(error)
				repair_tree.repair_missing_node(error, test)
			group.fields['type'] = 'unary'
			group.fields['subtype'] = 'missing'
			group.desc = 'unary miss %s over %s' % ('_'.join(missing_labels), '_'.join(current_labels))
			group.fields['nodes'] = ' '.join(missing_labels)
			group.fields['old desc'] = group.desc
			grouped.append(group)
###			print group.desc
			changed = True
		elif gold_count > 0 and missing_errors == 0:

			# there is/are extra unary production(s) here
			group = error_group.Error_Group()
			current_labels = []
			for node_set_label in span_set[span]:
				for node in span_set[span][node_set_label]:
					if not node.extra:
						current_labels.append(node.label)
			current_labels.sort()
			extra_labels = [error.node.label for error in errors]
			extra_labels.sort()
			# only use it if there isn't a matching missing error directly above
			skip = False
			if len(extra_labels) == 1:
				error = errors[0]
				for merror in ungrouped:
					if merror.node.label == extra_labels[0]:
						if merror.node.span[0] == error.node.span[0]:
							if error.node.parent.span[1] >= merror.node.span[1]:
								skip = True
								break
						elif merror.node.span[1] == error.node.span[1]:
							if error.node.parent.span[0] <= merror.node.span[0]:
								skip = True
								break
			if not skip:
				for error in errors:
					ungrouped.remove(error)
					group.errors.append(error)
					repair_tree.repair_extra_node(error, test)
				group.fields['type'] = 'unary'
				group.fields['subtype'] = 'extra'
				group.fields['nodes'] = ' '.join(extra_labels)
				group.desc = 'unary extra %s over %s' % ('_'.join(extra_labels), '_'.join(current_labels))
				group.fields['old desc'] = group.desc
				grouped.append(group)
				changed = True
		elif missing_errors == 1 and extra_errors == 1:
			# We have a mislabelled node
			extra = relevant_errors[span][2][0]
			missing = relevant_errors[span][2][1]
			if not extra.extra:
				extra = relevant_errors[span][2][1]
				missing = relevant_errors[span][2][0]

			group = error_group.Error_Group()
			group.fields['type'] = 'wrong label, right span'
			if test_count == 1 and gold_count == 1:
				group.desc = 'diff %s should_be %s' % (extra.node.label, missing.node.label)
###				print  'wrong label, right span %s should be %s' % (extra.node.label, missing.node.label)
			else:
				group.desc = 'unary diff %s should_be %s' % (extra.node.label, missing.node.label)
			group.fields['old desc'] = group.desc
			group.errors.append(extra)
			ungrouped.remove(extra)
			group.errors.append(missing)
			ungrouped.remove(missing)
			repair_tree.repair_extra_missing_pair(missing, extra, test)
			grouped.append(group)
			changed = True
		else:
			# Most of the other cases are either just an incorrect node labelling, or less clear
			# TODO:  One case to consider is when there is a correct node with all
			# the missing nodes above and all the extra nodes below (or vice versa)
			pass

	return changed, test
def single_word_error(ungrouped, grouped, gold, test):
	'''An extra/missing bracket at any depth that has a span of 1
	'''
	singles = {}
	for error in ungrouped:
		span = error.node.span
		if span[0] + 1 == span[1]:
			if span not in singles:
				singles[span] = []
			singles[span].append(error)

	changed = False
	to_fix = []
	for span in singles:
		errors = singles[span]
		# First check for cases where there is a matching bracket (so it is in fact
		# just the wrong label)
		if len(errors) == 2 and errors[0].extra != errors[1].extra:
			group = error_group.Error_Group()
			group.errors += errors
			group.fields['type'] = 'wrong label, right span'
			group.desc = 'single_word diff '
			if errors[0].extra:
				group.desc += errors[0].node.label + '_' + errors[1].node.label
			else:
				group.desc += errors[1].node.label + '_' + errors[0].node.label
			grouped.append(group)
			to_fix += errors
		else:
			# this includes cases of multiple brackets (so we don't know which to
			# link as above), and a single bracket error
			for error in errors:
				# check to see if a matching bracket type starts here and matches type
				use = True
				for uerror in ungrouped:
					if uerror.node.span[0] == error.node.span[0]:
						if uerror.node.label == error.node.label:
							if uerror.missing and error.extra:
								use = False
								break
							if uerror.extra and error.missing:
								use = False
								break
				if not use:
					continue
				group = error_group.Error_Group()
				group.errors.append(error)
				group.desc = 'single_word '
				if error.missing:
					group.desc += 'miss'
				else:
					group.desc += 'extra'
				group.desc += ' ' + error.node.label
###				print group.desc
				group.fields['type'] = 'single word phrase'
				group.fields['old desc'] = group.desc
				grouped.append(group)
				to_fix.append(error)
	for error in to_fix:
		ungrouped.remove(error)
		if error.extra:
			repair_tree.repair_extra_node(error, test)
		else:
			repair_tree.repair_missing_node(error, test)
	return changed, test