def attachment_error(ungrouped, grouped, gold, test): changed = False global global_gold global_gold = gold while True: test.check_consistency() # fix errors one at a time to_group = [] to_add = [] group_fields = None for error in ungrouped: if error.missing: ### print 'missing' if not bracket_errors.error_crosses_bracket(error, test): ### print 'not crossing' group_fields, test = missing_not_crossing(error, test, to_group, to_add, ungrouped) else: ### print 'crossing' group_fields, test = missing_crossing(error, test, to_group, to_add) elif error.extra: ### print 'extra' group_fields, test = extra(error, test, to_group, to_add, ungrouped) if group_fields is not None: ### print 'resolved!' break if group_fields is not None: group = error_group.Error_Group() group.fields = group_fields group.desc = group.fields['old desc'] ### print group.desc ### print group.fields group.determine_type() ### print 'Class:', group.classification ### print 'Fixes:', ### for error in to_group: ### print error ### print '%s (%d %d)' % (error.node.label, error.node.span[0], error.node.span[1]), ### print for error in to_group: ungrouped.remove(error) group.errors.append(error) for error in to_add: ungrouped.append(error) test = classify.check_for_matching_errors(ungrouped, group, gold, test) grouped.append(group) changed = True ### nerror_set = bracket_errors.get_errors(gold, test)[0] ### missing = bracket_errors.get_missing_errors(nerror_set, test) ### print test.colour_repr(missing=missing).strip() else: break return changed, test
def extra(error, test_tree, to_group, to_add, ungrouped): # Get the bracket in the tree that corresponds to this error ctree = bracket_errors.get_extra_tree(error, test_tree) if ctree is None: print 'Did not find the matching extra bracket' print >> sys.stderr, 'Did not find the matching extra bracket' print error print test_tree # Find all errors that cross this bracket crossing_errors = [] for merror in ungrouped: if merror.missing and bracket_errors.error_crosses_bracket(merror, ctree): crossing_errors.append(merror) if len(crossing_errors) > 0: # sort them into those that start here and those that end here ending = {} starting = {} other = [] for merror in crossing_errors: if ctree.span[0] < merror.node.span[0] < ctree.span[1] < merror.node.span[1]: start = merror.node.span[0] if start not in starting: starting[start] = [] starting[start].append(merror) elif merror.node.span[0] < ctree.span[0] < merror.node.span[1] < ctree.span[1]: end = merror.node.span[1] if end not in ending: ending[end] = [] ending[end].append(merror) else: other.append(merror) if len(starting) == 0 and len(ending) == 1 and len(other) == 0: return extra_crossing_ending(error, test_tree, to_group, ending, ungrouped, ctree) elif len(starting) == 1 and len(ending) == 0 and len(other) == 0: return extra_crossing_starting(error, test_tree, to_group, starting, ungrouped, ctree) elif len(starting) > 1 and len(ending) == 0: return extra_multicrossing_starting(error, test_tree, to_group, starting, ungrouped, ctree) else: # there could be a mixture of starting and ending # of multiple starting points, and multiple ending points pass else: # no crossing errors # find the smallest missing error that covers this extra error shortest_error = None snode = None for merror in ungrouped: if merror.missing: mnode = merror.node if mnode.span[0] <= error.node.span[0] and error.node.span[1] <= mnode.span[1]: if snode is None or (snode.span[0] <= mnode.span[0] and mnode.span[1] <= snode.span[1]): shortest_error = merror snode = merror.node # Check that there are no spans that are over the extra and under the missing intermediate_spans = False shortest_error is None if shortest_error is not None: if shortest_error.node.span[0] < ctree.parent.span[0] and ctree.parent.span[1] <= shortest_error.node.span[1]: if not ctree.parent.extra: intermediate_spans = True elif shortest_error.node.span[0] <= ctree.parent.span[0] and ctree.parent.span[1] < shortest_error.node.span[1]: if not ctree.parent.extra: intermediate_spans = True ### print 'considering' ### print error ### print shortest_error if shortest_error is None: intermediate_spans = True if not intermediate_spans and shortest_error.node.label == ctree.label: # we have a matching missing error ### print test_tree ### print shortest_error if bracket_errors.error_crosses_bracket(shortest_error, test_tree): ### print 'crossing' return extra_matching_crossing_miss(error, test_tree, shortest_error, ungrouped, to_group) else: ### print 'not crossing' return extra_matching_miss(error, test_tree, shortest_error, ctree, to_group) else: ### if shortest_error.node.label == ctree.label: ### if ctree.parent.extra return extra_no_matching(error, test_tree, ctree, to_group) return None, test_tree