コード例 #1
0
def _filter_overlapping_builtins(builtin_entities, tokens, tags,
                                 tagging_scheme):
    slots = tags_to_preslots(tokens, tags, tagging_scheme)
    ents = []
    for ent in builtin_entities:
        if any(ranges_overlap(ent[RES_MATCH_RANGE], s[RES_MATCH_RANGE])
               for s in slots):
            continue
        ents.append(ent)
    return ents
コード例 #2
0
def _filter_overlapping_builtins(builtin_entities, tokens, tags,
                                 tagging_scheme):
    slots = tags_to_preslots(tokens, tags, tagging_scheme)
    ents = []
    for ent in builtin_entities:
        if any(ranges_overlap(ent[RES_MATCH_RANGE], s[RES_MATCH_RANGE])
               for s in slots):
            continue
        ents.append(ent)
    return ents
コード例 #3
0
def _deduplicate_overlapping_slots(slots, language):
    deduplicated_slots = []
    for slot in slots:
        is_overlapping = False
        for slot_index, dedup_slot in enumerate(deduplicated_slots):
            if ranges_overlap(slot[RES_MATCH_RANGE],
                              dedup_slot[RES_MATCH_RANGE]):
                is_overlapping = True
                tokens = tokenize(slot[RES_VALUE], language)
                dedup_tokens = tokenize(dedup_slot[RES_VALUE], language)
                if len(tokens) > len(dedup_tokens):
                    deduplicated_slots[slot_index] = slot
                elif len(tokens) == len(dedup_tokens) \
                        and len(slot[RES_VALUE]) > len(dedup_slot[RES_VALUE]):
                    deduplicated_slots[slot_index] = slot
        if not is_overlapping:
            deduplicated_slots.append(slot)
    return deduplicated_slots
コード例 #4
0
def _deduplicate_overlapping_slots(slots, language):
    deduplicated_slots = []
    for slot in slots:
        is_overlapping = False
        for slot_index, dedup_slot in enumerate(deduplicated_slots):
            if ranges_overlap(slot[RES_MATCH_RANGE],
                              dedup_slot[RES_MATCH_RANGE]):
                is_overlapping = True
                tokens = tokenize(slot[RES_VALUE], language)
                dedup_tokens = tokenize(dedup_slot[RES_VALUE], language)
                if len(tokens) > len(dedup_tokens):
                    deduplicated_slots[slot_index] = slot
                elif len(tokens) == len(dedup_tokens) \
                        and len(slot[RES_VALUE]) > len(dedup_slot[RES_VALUE]):
                    deduplicated_slots[slot_index] = slot
        if not is_overlapping:
            deduplicated_slots.append(slot)
    return deduplicated_slots
コード例 #5
0
ファイル: test_utils.py プロジェクト: lym0302/snips-nlu
    def test_ranges_overlap(self):
        # Given
        range1 = [4, 8]
        range2 = [5, 7]
        range3 = [3, 9]
        range4 = [3, 4]
        range5 = [8, 9]
        range6 = [3, 6]
        range7 = [4, 10]

        # When / Then
        self.assertTrue(ranges_overlap(range1, range2))
        self.assertTrue(ranges_overlap(range1, range3))
        self.assertFalse(ranges_overlap(range1, range4))
        self.assertFalse(ranges_overlap(range1, range5))
        self.assertTrue(ranges_overlap(range1, range6))
        self.assertTrue(ranges_overlap(range1, range7))
コード例 #6
0
    def test_ranges_overlap(self):
        # Given
        range1 = [4, 8]
        range2 = [5, 7]
        range3 = [3, 9]
        range4 = [3, 4]
        range5 = [8, 9]
        range6 = [3, 6]
        range7 = [4, 10]

        # When / Then
        self.assertTrue(ranges_overlap(range1, range2))
        self.assertTrue(ranges_overlap(range1, range3))
        self.assertFalse(ranges_overlap(range1, range4))
        self.assertFalse(ranges_overlap(range1, range5))
        self.assertTrue(ranges_overlap(range1, range6))
        self.assertTrue(ranges_overlap(range1, range7))
コード例 #7
0
def _disambiguate_builtin_entities(builtin_entities):
    if not builtin_entities:
        return []
    builtin_entities = sorted(
        builtin_entities,
        key=lambda be: be[RES_MATCH_RANGE][END] - be[RES_MATCH_RANGE][START],
        reverse=True)

    disambiguated_entities = [builtin_entities[0]]
    for entity in builtin_entities[1:]:
        entity_rng = entity[RES_MATCH_RANGE]
        conflict = False
        for disambiguated_entity in disambiguated_entities:
            disambiguated_entity_rng = disambiguated_entity[RES_MATCH_RANGE]
            if ranges_overlap(entity_rng, disambiguated_entity_rng):
                conflict = True
                if entity_rng == disambiguated_entity_rng:
                    disambiguated_entities.append(entity)
                break
        if not conflict:
            disambiguated_entities.append(entity)

    return sorted(disambiguated_entities,
                  key=lambda be: be[RES_MATCH_RANGE][START])
コード例 #8
0
def _disambiguate_builtin_entities(builtin_entities):
    if not builtin_entities:
        return []
    builtin_entities = sorted(
        builtin_entities,
        key=lambda be: be[RES_MATCH_RANGE][END] - be[RES_MATCH_RANGE][START],
        reverse=True)

    disambiguated_entities = [builtin_entities[0]]
    for entity in builtin_entities[1:]:
        entity_rng = entity[RES_MATCH_RANGE]
        conflict = False
        for disambiguated_entity in disambiguated_entities:
            disambiguated_entity_rng = disambiguated_entity[RES_MATCH_RANGE]
            if ranges_overlap(entity_rng, disambiguated_entity_rng):
                conflict = True
                if entity_rng == disambiguated_entity_rng:
                    disambiguated_entities.append(entity)
                break
        if not conflict:
            disambiguated_entities.append(entity)

    return sorted(disambiguated_entities,
                  key=lambda be: be[RES_MATCH_RANGE][START])
コード例 #9
0
 def overlap(lhs_entity, rhs_entity):
     return ranges_overlap(lhs_entity[RES_MATCH_RANGE],
                           rhs_entity[RES_MATCH_RANGE])
コード例 #10
0
 def overlap(lhs_slot, rhs_slot):
     return ranges_overlap(lhs_slot[RES_MATCH_RANGE],
                           rhs_slot[RES_MATCH_RANGE])