Example #1
0
def attachment_error(ungrouped, grouped, gold, test):
	changed = False
	global global_gold
	global_gold = gold
	while True:
		test.check_consistency()
		# fix errors one at a time
		to_group = []
		to_add = []
		group_fields = None
		for error in ungrouped:
			if error.missing:
###				print 'missing'
				if not bracket_errors.error_crosses_bracket(error, test):
###					print 'not crossing'
					group_fields, test = missing_not_crossing(error, test, to_group, to_add, ungrouped)
				else:
###					print 'crossing'
					group_fields, test = missing_crossing(error, test, to_group, to_add)
			elif error.extra:
###				print 'extra'
				group_fields, test = extra(error, test, to_group, to_add, ungrouped)
			if group_fields is not None:
###				print 'resolved!'
				break

		if group_fields is not None:
			group = error_group.Error_Group()
			group.fields = group_fields
			group.desc = group.fields['old desc']
###			print group.desc
###			print group.fields
			group.determine_type()
###			print 'Class:', group.classification
###			print 'Fixes:',
###			for error in to_group:
###				print error
###				print '%s (%d %d)' % (error.node.label, error.node.span[0], error.node.span[1]),
###			print
			for error in to_group:
				ungrouped.remove(error)
				group.errors.append(error)
			for error in to_add:
				ungrouped.append(error)
			test = classify.check_for_matching_errors(ungrouped, group, gold, test)
			grouped.append(group)
			changed = True
###			nerror_set = bracket_errors.get_errors(gold, test)[0]
###			missing = bracket_errors.get_missing_errors(nerror_set, test)
###			print test.colour_repr(missing=missing).strip()
		else:
			break
	return changed, test
Example #2
0
def extra(error, test_tree, to_group, to_add, ungrouped):
	# Get the bracket in the tree that corresponds to this error
	ctree = bracket_errors.get_extra_tree(error, test_tree)
	if ctree is None:
		print 'Did not find the matching extra bracket'
		print >> sys.stderr, 'Did not find the matching extra bracket'
		print error
		print test_tree

	# Find all errors that cross this bracket
	crossing_errors = []
	for merror in ungrouped:
		if merror.missing and bracket_errors.error_crosses_bracket(merror, ctree):
			crossing_errors.append(merror)

	if len(crossing_errors) > 0:
		# sort them into those that start here and those that end here
		ending = {}
		starting = {}
		other = []
		for merror in crossing_errors:
			if ctree.span[0] < merror.node.span[0] < ctree.span[1] < merror.node.span[1]:
				start = merror.node.span[0]
				if start not in starting:
					starting[start] = []
				starting[start].append(merror)
			elif merror.node.span[0] < ctree.span[0] < merror.node.span[1] < ctree.span[1]:
				end = merror.node.span[1]
				if end not in ending:
					ending[end] = []
				ending[end].append(merror)
			else:
				other.append(merror)

		if len(starting) == 0 and len(ending) == 1 and len(other) == 0:
			return extra_crossing_ending(error, test_tree, to_group, ending, ungrouped, ctree)
		elif len(starting) == 1 and len(ending) == 0 and len(other) == 0:
			return extra_crossing_starting(error, test_tree, to_group, starting, ungrouped, ctree)
		elif len(starting) > 1 and len(ending) == 0:
			return extra_multicrossing_starting(error, test_tree, to_group, starting, ungrouped, ctree)
		else:
			# there could be a mixture of starting and ending
			# of multiple starting points, and multiple ending points
			pass
	else:
		# no crossing errors
		# find the smallest missing error that covers this extra error
		shortest_error = None
		snode = None
		for merror in ungrouped:
			if merror.missing:
				mnode = merror.node
				if mnode.span[0] <= error.node.span[0] and error.node.span[1] <= mnode.span[1]:
					if snode is None or (snode.span[0] <= mnode.span[0] and mnode.span[1] <= snode.span[1]):
						shortest_error = merror
						snode = merror.node
		# Check that there are no spans that are over the extra and under the missing
		intermediate_spans = False
		shortest_error is None
		if shortest_error is not None:
			if shortest_error.node.span[0] < ctree.parent.span[0] and ctree.parent.span[1] <= shortest_error.node.span[1]:
				if not ctree.parent.extra:
					intermediate_spans = True
			elif shortest_error.node.span[0] <= ctree.parent.span[0] and ctree.parent.span[1] < shortest_error.node.span[1]:
				if not ctree.parent.extra:
					intermediate_spans = True

###		print 'considering'
###		print error
###		print shortest_error
		if shortest_error is None:
			intermediate_spans = True
		if not intermediate_spans and shortest_error.node.label == ctree.label:
			# we have a matching missing error
###			print test_tree
###			print shortest_error
			if bracket_errors.error_crosses_bracket(shortest_error, test_tree):
###				print 'crossing'
				return extra_matching_crossing_miss(error, test_tree, shortest_error, ungrouped, to_group)
			else:
###				print 'not crossing'
				return extra_matching_miss(error, test_tree, shortest_error, ctree, to_group)
		else:
###			if shortest_error.node.label == ctree.label:
###				if ctree.parent.extra
			return extra_no_matching(error, test_tree, ctree, to_group)
	return None, test_tree