Esempio n. 1
0
 def _get_matching_result(self, text, processed_text, regex, intent,
                          entities_ranges_mapping=None):
     found_result = regex.match(processed_text)
     if found_result is None:
         return None
     parsed_intent = intent_classification_result(intent_name=intent,
                                                  probability=1.0)
     slots = []
     for group_name in found_result.groupdict():
         ref_group_name = group_name
         if "_" in group_name:
             ref_group_name = group_name.split("_")[0]
         slot_name = self.group_names_to_slot_names[ref_group_name]
         entity = self.slot_names_to_entities[intent][slot_name]
         rng = (found_result.start(group_name),
                found_result.end(group_name))
         if entities_ranges_mapping is not None:
             if rng in entities_ranges_mapping:
                 rng = entities_ranges_mapping[rng]
             else:
                 shift = _get_range_shift(
                     rng, entities_ranges_mapping)
                 rng = {START: rng[0] + shift, END: rng[1] + shift}
         else:
             rng = {START: rng[0], END: rng[1]}
         value = text[rng[START]:rng[END]]
         parsed_slot = unresolved_slot(
             match_range=rng, value=value, entity=entity,
             slot_name=slot_name)
         slots.append(parsed_slot)
     parsed_slots = _deduplicate_overlapping_slots(slots, self.language)
     parsed_slots = sorted(parsed_slots,
                           key=lambda s: s[RES_MATCH_RANGE][START])
     return extraction_result(parsed_intent, parsed_slots)
    def test_should_parse_top_intents(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: intent1
utterances:
  - hello world
  
---
type: intent
name: intent2
utterances:
  - foo bar""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser = DeterministicIntentParser().fit(dataset)
        text = "hello world"

        # When
        results = parser.parse(text, top_n=3)

        # Then
        expected_intent = intent_classification_result(intent_name="intent1",
                                                       probability=1.0)
        expected_results = [extraction_result(expected_intent, [])]
        self.assertEqual(expected_results, results)
Esempio n. 3
0
    def parse(self, text, intents=None, top_n=None):
        """Performs intent parsing on the provided *text* by calling its intent
        parsers successively

        Args:
            text (str): Input
            intents (str or list of str, optional): If provided, reduces the
                scope of intent parsing to the provided list of intents
            top_n (int, optional): when provided, this method will return a
                list of at most top_n most likely intents, instead of a single
                parsing result.
                Note that the returned list can contain less than ``top_n``
                elements, for instance when the parameter ``intents`` is not
                None, or when ``top_n`` is greater than the total number of
                intents.

        Returns:
            dict or list: the most likely intent(s) along with the extracted
            slots. See :func:`.parsing_result` and :func:`.extraction_result`
            for the output format.

        Raises:
            NotTrained: When the nlu engine is not fitted
            InvalidInputError: When input type is not unicode
        """
        if not isinstance(text, str):
            raise InvalidInputError("Expected unicode but received: %s" %
                                    type(text))

        if isinstance(intents, str):
            intents = {intents}
        elif isinstance(intents, list):
            intents = set(intents)

        if top_n is None:
            none_proba = 0.0
            for parser in self.intent_parsers:
                res = parser.parse(text, intents)
                if is_empty(res):
                    none_proba = res[RES_INTENT][RES_PROBA]
                    continue
                resolved_slots = self._resolve_slots(text, res[RES_SLOTS])
                return parsing_result(text,
                                      intent=res[RES_INTENT],
                                      slots=resolved_slots)
            return empty_result(text, none_proba)

        intents_results = self.get_intents(text)
        if intents is not None:
            intents_results = [
                res for res in intents_results
                if res[RES_INTENT_NAME] in intents
            ]
        intents_results = intents_results[:top_n]
        results = []
        for intent_res in intents_results:
            slots = self.get_slots(text, intent_res[RES_INTENT_NAME])
            results.append(extraction_result(intent_res, slots))
        return results
    def test_should_parse_top_intents(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: intent1
utterances:
  - meeting tomorrow
  
---
type: intent
name: intent2
utterances:
  - meeting [time:snips/datetime](today)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser = DeterministicIntentParser().fit(dataset)
        text = "meeting tomorrow"

        # When
        results = parser.parse(text, top_n=3)

        # Then
        slot = {
            "entity": "snips/datetime",
            "range": {
                "end": 16,
                "start": 8
            },
            "slotName": "time",
            "value": "tomorrow"
        }
        expected_results = [
            extraction_result(
                intent_classification_result(intent_name="intent1",
                                             probability=0.5), []),
            extraction_result(
                intent_classification_result(intent_name="intent2",
                                             probability=0.5), [slot])
        ]
        results = sorted(results, key=lambda r: r[RES_INTENT][RES_INTENT_NAME])
        self.assertEqual(expected_results, results)
    def parse(self, text, intents=None, top_n=None):
        """Performs intent parsing on the provided *text* by first classifying
        the intent and then using the correspond slot filler to extract slots

        Args:
            text (str): input
            intents (str or list of str): if provided, reduces the scope of
                intent parsing to the provided list of intents
            top_n (int, optional): when provided, this method will return a
                list of at most top_n most likely intents, instead of a single
                parsing result.
                Note that the returned list can contain less than ``top_n``
                elements, for instance when the parameter ``intents`` is not
                None, or when ``top_n`` is greater than the total number of
                intents.

        Returns:
            dict or list: the most likely intent(s) along with the extracted
            slots. See :func:`.parsing_result` and :func:`.extraction_result`
            for the output format.

        Raises:
            NotTrained: when the intent parser is not fitted
        """
        if isinstance(intents, str):
            intents = {intents}
        elif isinstance(intents, list):
            intents = list(intents)

        if top_n is None:
            intent_result = self.intent_classifier.get_intent(text, intents)
            intent_name = intent_result[RES_INTENT_NAME]
            if intent_name is not None:
                slots = self.slot_fillers[intent_name].get_slots(text)
            else:
                slots = []
            return parsing_result(text, intent_result, slots)

        results = []
        intents_results = self.intent_classifier.get_intents(text)
        for intent_result in intents_results[:top_n]:
            intent_name = intent_result[RES_INTENT_NAME]
            if intent_name is not None:
                slots = self.slot_fillers[intent_name].get_slots(text)
            else:
                slots = []
            results.append(extraction_result(intent_result, slots))
        return results
Esempio n. 6
0
    def _parse_map_output(self, text, output, entities, intents):
        """Parse the map output to the parser's result format"""
        intent_id, slot_ids = output
        intent_name = self._intents_names[intent_id]
        if intents is not None and intent_name not in intents:
            return None

        parsed_intent = intent_classification_result(
            intent_name=intent_name, probability=1.0)
        slots = []
        # assert invariant
        assert len(slot_ids) == len(entities)
        for slot_id, entity in zip(slot_ids, entities):
            slot_name = self._slots_names[slot_id]
            rng_start = entity[RES_MATCH_RANGE][START]
            rng_end = entity[RES_MATCH_RANGE][END]
            slot_value = text[rng_start:rng_end]
            entity_name = entity[ENTITY_KIND]
            slot = unresolved_slot(
                [rng_start, rng_end], slot_value, entity_name, slot_name)
            slots.append(slot)

        return extraction_result(parsed_intent, slots)
Esempio n. 7
0
    def test_should_parse_top_intents(self):
        # Given
        text = "foo bar ban"
        dataset_stream = io.StringIO("""
---
type: intent
name: intent1
utterances:
  - foo [slot1:entity1](bak)
  
---
type: intent
name: intent2
utterances:
  - '[slot2:entity2](foo) baz'
  
---
type: intent
name: intent3
utterances:
  - foo bap""")

        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

        # pylint:disable=unused-variable
        @IntentParser.register("first_intent_parser", True)
        class FirstIntentParser(MockIntentParser):
            def get_intents(self, text):
                return [
                    intent_classification_result("intent1", 0.5),
                    intent_classification_result("intent2", 0.3),
                    intent_classification_result(None, 0.15),
                    intent_classification_result("intent3", 0.05)
                ]

            def get_slots(self, text, intent):
                if intent == "intent1":
                    return []
                if intent == "intent2":
                    return [
                        unresolved_slot((0, 3), "foo", "entity2", "slot2")
                    ]
                return []

        @IntentParser.register("second_intent_parser", True)
        class SecondIntentParser(MockIntentParser):
            def get_intents(self, text):
                return [
                    intent_classification_result("intent2", 0.6),
                    intent_classification_result("intent1", 0.2),
                    intent_classification_result(None, 0.15),
                    intent_classification_result("intent3", 0.05)
                ]

            def get_slots(self, text, intent):
                if intent == "intent1":
                    return [
                        unresolved_slot((0, 3), "foo", "entity1", "slot1")
                    ]
                if intent == "intent2":
                    return [
                        unresolved_slot((8, 11), "ban", "entity2", "slot2")
                    ]
                return []

        # pylint:enable=unused-variable

        config = NLUEngineConfig(
            ["first_intent_parser", "second_intent_parser"])
        nlu_engine = SnipsNLUEngine(config).fit(dataset)

        # When
        results = nlu_engine.parse(text, top_n=3)
        results_with_filter = nlu_engine.parse(
            text, intents=["intent1", "intent3"], top_n=3)

        # Then
        expected_results = [
            extraction_result(
                intent_classification_result("intent2", 0.6),
                [custom_slot(
                    unresolved_slot((0, 3), "foo", "entity2", "slot2"))]
            ),
            extraction_result(
                intent_classification_result("intent1", 0.5),
                [custom_slot(
                    unresolved_slot((0, 3), "foo", "entity1", "slot1"))]
            ),
            extraction_result(
                intent_classification_result(None, 0.15),
                []
            ),
        ]
        expected_results_with_filter = [
            extraction_result(
                intent_classification_result("intent1", 0.5),
                [custom_slot(
                    unresolved_slot((0, 3), "foo", "entity1", "slot1"))]
            ),
            extraction_result(
                intent_classification_result(None, 0.15),
                []
            ),
            extraction_result(
                intent_classification_result("intent3", 0.05),
                []
            ),
        ]
        self.assertListEqual(expected_results, results)
        self.assertListEqual(expected_results_with_filter, results_with_filter)
    def test_should_parse_top_intents(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: intent1
utterances:
  - meeting [time:snips/datetime](today)

---
type: intent
name: intent2
utterances:
  - meeting tomorrow
  
---
type: intent
name: intent3
utterances:
  - "[event_type](call) [time:snips/datetime](at 9pm)"

---
type: entity
name: event_type
values:
  - meeting
  - feedback session""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser = DeterministicIntentParser().fit(dataset)
        text = "meeting tomorrow"

        # When
        results = parser.parse(text, top_n=3)

        # Then
        time_slot = {
            "entity": "snips/datetime",
            "range": {
                "end": 16,
                "start": 8
            },
            "slotName": "time",
            "value": "tomorrow"
        }
        event_slot = {
            "entity": "event_type",
            "range": {
                "end": 7,
                "start": 0
            },
            "slotName": "event_type",
            "value": "meeting"
        }
        weight_intent_1 = 1. / 2.
        weight_intent_2 = 1.
        weight_intent_3 = 1. / 3.
        total_weight = weight_intent_1 + weight_intent_2 + weight_intent_3
        proba_intent2 = weight_intent_2 / total_weight
        proba_intent1 = weight_intent_1 / total_weight
        proba_intent3 = weight_intent_3 / total_weight
        expected_results = [
            extraction_result(intent_classification_result(
                intent_name="intent2", probability=proba_intent2),
                              slots=[]),
            extraction_result(intent_classification_result(
                intent_name="intent1", probability=proba_intent1),
                              slots=[time_slot]),
            extraction_result(intent_classification_result(
                intent_name="intent3", probability=proba_intent3),
                              slots=[event_slot, time_slot])
        ]
        self.assertEqual(expected_results, results)