def _replace_ids(can_anns, _from, to, excluded_anns=None, debug=False): if excluded_anns is None: excluded_anns = set() for can_ann in can_anns: if isinstance(can_ann, Text) and Text not in excluded_anns: # These annotations can not reference others, pass pass elif isinstance(can_ann, Event) and Event not in excluded_anns: can_ann.args = [(a, to if v in _from else v) for a, v in can_ann.args] elif isinstance(can_ann, Modifier) and Modifier not in excluded_anns: if can_ann.target in _from: can_ann.target = to elif isinstance(can_ann, Equiv) and Equiv not in excluded_anns: if debug: print >> stderr, 'Will purge from equiv:', _from & set( can_ann.members), to new_members = [m for m in can_ann.members if m not in _from] if new_members != can_ann.members: can_ann.members = new_members can_ann.members.append(to) sort_nicely(can_ann.members) elif not any( isinstance(can_ann, c) for c in ( Text, Event, Modifier, Equiv, )): assert False, 'unknown canonical annotation type'
def _replace_ids(can_anns, _from, to, excluded_anns=None, debug=False): if excluded_anns is None: excluded_anns = set() for can_ann in can_anns: if isinstance(can_ann, Text) and Text not in excluded_anns: # These annotations can not reference others, pass pass elif isinstance(can_ann, Event) and Event not in excluded_anns: can_ann.args = [(a, to if v in _from else v) for a, v in can_ann.args] elif isinstance(can_ann, Modifier) and Modifier not in excluded_anns: if can_ann.target in _from: can_ann.target = to elif isinstance(can_ann, Equiv) and Equiv not in excluded_anns: if debug: print >> stderr, 'Will purge from equiv:', _from & set(can_ann.members), to new_members = [m for m in can_ann.members if m not in _from] if new_members != can_ann.members: can_ann.members = new_members can_ann.members.append(to) sort_nicely(can_ann.members) elif not any(isinstance(can_ann, c) for c in (Text, Event, Modifier, Equiv, )): assert False, 'unknown canonical annotation type'
eq_anns_to_remove = set() eq_anns = [a for a in can_anns if isinstance(a, Equiv)] for eq_set in equivalent_texts.itervalues(): #print 'EQ:', eq_set #print eq_anns for eq_ann_a, eq_ann_b in combinations(eq_anns, 2): #print 'A:', eq_ann_a.members #print 'B:', eq_ann_b.members # Does the equivalent texts bridge the equivalences? if ((set(eq_ann_a.members) & eq_set) and (set(eq_ann_b.members) & eq_set)): if debug: print >> stderr, 'Merging:', eq_ann_a, 'and', eq_ann_b eq_ann_a.members = eq_ann_a.members + [a for a in eq_ann_b.members if a not in eq_ann_a.members] sort_nicely(eq_ann_a.members) eq_ann_b.members = eq_ann_b.members + [a for a in eq_ann_a.members if a not in eq_ann_b.members] sort_nicely(eq_ann_b.members) # We can now safely remove the latter annotation later on eq_anns_to_remove.add(eq_ann_b) if debug and eq_anns_to_remove: print >> stderr, 'Will remove redundant Equivs:', eq_anns_to_remove can_anns = [a for a in can_anns if a not in eq_anns_to_remove] # Merge text into unique (texts, types) since we now lack textual bounds ids_to_remove = set() for eq_set in equivalent_texts.itervalues(): eqs = [a for a in eq_set] sort_nicely(eqs) to_keep = eqs[0] not_to_keep = set(eq_set - set((to_keep, )))
eq_anns = [a for a in can_anns if isinstance(a, Equiv)] for eq_set in equivalent_texts.itervalues(): #print 'EQ:', eq_set #print eq_anns for eq_ann_a, eq_ann_b in combinations(eq_anns, 2): #print 'A:', eq_ann_a.members #print 'B:', eq_ann_b.members # Does the equivalent texts bridge the equivalences? if ((set(eq_ann_a.members) & eq_set) and (set(eq_ann_b.members) & eq_set)): if debug: print >> stderr, 'Merging:', eq_ann_a, 'and', eq_ann_b eq_ann_a.members = eq_ann_a.members + [ a for a in eq_ann_b.members if a not in eq_ann_a.members ] sort_nicely(eq_ann_a.members) eq_ann_b.members = eq_ann_b.members + [ a for a in eq_ann_a.members if a not in eq_ann_b.members ] sort_nicely(eq_ann_b.members) # We can now safely remove the latter annotation later on eq_anns_to_remove.add(eq_ann_b) if debug and eq_anns_to_remove: print >> stderr, 'Will remove redundant Equivs:', eq_anns_to_remove can_anns = [a for a in can_anns if a not in eq_anns_to_remove] # Merge text into unique (texts, types) since we now lack textual bounds ids_to_remove = set() for eq_set in equivalent_texts.itervalues(): eqs = [a for a in eq_set] sort_nicely(eqs) to_keep = eqs[0]