def _filter_overlapping_builtins(builtin_entities, tokens, tags,
                                 tagging_scheme):
    slots = tags_to_preslots(tokens, tags, tagging_scheme)
    ents = []
    for ent in builtin_entities:
        if any(
                ranges_overlap(ent[RES_MATCH_RANGE], s[RES_MATCH_RANGE])
                for s in slots):
            continue
        ents.append(ent)
    return ents
Esempio n. 2
0
    def test_ranges_overlap(self):
        # Given
        range1 = [4, 8]
        range2 = [5, 7]
        range3 = [3, 9]
        range4 = [3, 4]
        range5 = [8, 9]
        range6 = [3, 6]
        range7 = [4, 10]

        # When / Then
        self.assertTrue(ranges_overlap(range1, range2))
        self.assertTrue(ranges_overlap(range1, range3))
        self.assertFalse(ranges_overlap(range1, range4))
        self.assertFalse(ranges_overlap(range1, range5))
        self.assertTrue(ranges_overlap(range1, range6))
        self.assertTrue(ranges_overlap(range1, range7))
def _disambiguate_builtin_entities(builtin_entities):
    if not builtin_entities:
        return []
    builtin_entities = sorted(
        builtin_entities,
        key=lambda be: be[RES_MATCH_RANGE][END] - be[RES_MATCH_RANGE][START],
        reverse=True)

    disambiguated_entities = [builtin_entities[0]]
    for entity in builtin_entities[1:]:
        entity_rng = entity[RES_MATCH_RANGE]
        conflict = False
        for disambiguated_entity in disambiguated_entities:
            disambiguated_entity_rng = disambiguated_entity[RES_MATCH_RANGE]
            if ranges_overlap(entity_rng, disambiguated_entity_rng):
                conflict = True
                if entity_rng == disambiguated_entity_rng:
                    disambiguated_entities.append(entity)
                break
        if not conflict:
            disambiguated_entities.append(entity)

    return sorted(disambiguated_entities,
                  key=lambda be: be[RES_MATCH_RANGE][START])
 def overlap(lhs_slot, rhs_slot):
     return ranges_overlap(lhs_slot[RES_MATCH_RANGE],
                           rhs_slot[RES_MATCH_RANGE])