def _filter_overlapping_builtins(builtin_entities, tokens, tags, tagging_scheme): slots = tags_to_preslots(tokens, tags, tagging_scheme) ents = [] for ent in builtin_entities: if any(ranges_overlap(ent[RES_MATCH_RANGE], s[RES_MATCH_RANGE]) for s in slots): continue ents.append(ent) return ents
def _deduplicate_overlapping_slots(slots, language): deduplicated_slots = [] for slot in slots: is_overlapping = False for slot_index, dedup_slot in enumerate(deduplicated_slots): if ranges_overlap(slot[RES_MATCH_RANGE], dedup_slot[RES_MATCH_RANGE]): is_overlapping = True tokens = tokenize(slot[RES_VALUE], language) dedup_tokens = tokenize(dedup_slot[RES_VALUE], language) if len(tokens) > len(dedup_tokens): deduplicated_slots[slot_index] = slot elif len(tokens) == len(dedup_tokens) \ and len(slot[RES_VALUE]) > len(dedup_slot[RES_VALUE]): deduplicated_slots[slot_index] = slot if not is_overlapping: deduplicated_slots.append(slot) return deduplicated_slots
def test_ranges_overlap(self): # Given range1 = [4, 8] range2 = [5, 7] range3 = [3, 9] range4 = [3, 4] range5 = [8, 9] range6 = [3, 6] range7 = [4, 10] # When / Then self.assertTrue(ranges_overlap(range1, range2)) self.assertTrue(ranges_overlap(range1, range3)) self.assertFalse(ranges_overlap(range1, range4)) self.assertFalse(ranges_overlap(range1, range5)) self.assertTrue(ranges_overlap(range1, range6)) self.assertTrue(ranges_overlap(range1, range7))
def _disambiguate_builtin_entities(builtin_entities): if not builtin_entities: return [] builtin_entities = sorted( builtin_entities, key=lambda be: be[RES_MATCH_RANGE][END] - be[RES_MATCH_RANGE][START], reverse=True) disambiguated_entities = [builtin_entities[0]] for entity in builtin_entities[1:]: entity_rng = entity[RES_MATCH_RANGE] conflict = False for disambiguated_entity in disambiguated_entities: disambiguated_entity_rng = disambiguated_entity[RES_MATCH_RANGE] if ranges_overlap(entity_rng, disambiguated_entity_rng): conflict = True if entity_rng == disambiguated_entity_rng: disambiguated_entities.append(entity) break if not conflict: disambiguated_entities.append(entity) return sorted(disambiguated_entities, key=lambda be: be[RES_MATCH_RANGE][START])
def overlap(lhs_entity, rhs_entity): return ranges_overlap(lhs_entity[RES_MATCH_RANGE], rhs_entity[RES_MATCH_RANGE])
def overlap(lhs_slot, rhs_slot): return ranges_overlap(lhs_slot[RES_MATCH_RANGE], rhs_slot[RES_MATCH_RANGE])