def parse(self, text, intents=None):
        """Performs intent parsing on the provided *text* by first classifying
        the intent and then using the correspond slot filler to extract slots

        Args:
            text (str): Input
            intents (str or list of str): If provided, reduces the scope of
                intent parsing to the provided list of intents

        Returns:
            dict: The most likely intent along with the extracted slots. See
            :func:`.parsing_result` for the output format.

        Raises:
            NotTrained: When the intent parser is not fitted
        """
        if not self.fitted:
            raise NotTrained("ProbabilisticIntentParser must be fitted")

        if isinstance(intents, str):
            intents = [intents]

        intent_result = self.intent_classifier.get_intent(text, intents)
        if intent_result is None:
            return empty_result(text)

        intent_name = intent_result[RES_INTENT_NAME]
        slots = self.slot_fillers[intent_name].get_slots(text)
        return parsing_result(text, intent_result, slots)
Ejemplo n.º 2
0
    def parse(self, text, intents=None):
        """Performs intent parsing on the provided *text* by first classifying
        the intent and then using the correspond slot filler to extract slots

        Args:
            text (str): Input
            intents (str or list of str): If provided, reduces the scope of
                intent parsing to the provided list of intents

        Returns:
            dict: The most likely intent along with the extracted slots. See
            :func:`.parsing_result` for the output format.

        Raises:
            NotTrained: When the intent parser is not fitted
        """
        if not self.fitted:
            raise NotTrained("ProbabilisticIntentParser must be fitted")

        logger.debug("Probabilistic intent parser parsing '%s'...", text)

        if isinstance(intents, str):
            intents = [intents]

        intent_result = self.intent_classifier.get_intent(text, intents)
        if intent_result is None:
            return empty_result(text)

        intent_name = intent_result[RES_INTENT_NAME]
        slots = self.slot_fillers[intent_name].get_slots(text)
        return parsing_result(text, intent_result, slots)
Ejemplo n.º 3
0
    def parse(self, text, intents=None, top_n=None):
        """Performs intent parsing on the provided *text*

        Intent and slots are extracted simultaneously through pattern matching

        Args:
            text (str): input
            intents (str or list of str): if provided, reduces the scope of
                intent parsing to the provided list of intents
            top_n (int, optional): when provided, this method will return a
                list of at most top_n most likely intents, instead of a single
                parsing result.
                Note that the returned list can contain less than ``top_n``
                elements, for instance when the parameter ``intents`` is not
                None, or when ``top_n`` is greater than the total number of
                intents.

        Returns:
            dict or list: the most likely intent(s) along with the extracted
            slots. See :func:`.parsing_result` and :func:`.extraction_result`
            for the output format.

        Raises:
            NotTrained: when the intent parser is not fitted
        """
        if top_n is None:
            top_intents = self._parse_top_intents(text, top_n=1,
                                                  intents=intents)
            if top_intents:
                intent = top_intents[0][RES_INTENT]
                slots = top_intents[0][RES_SLOTS]
                return parsing_result(text, intent, slots)
            return empty_result(text, probability=1.0)
        return self._parse_top_intents(text, top_n=top_n, intents=intents)
    def test_should_serialize_results(self):
        # Given
        input_ = "hello world"
        intent = intent_classification_result("world", 0.5)
        slots = [
            unresolved_slot([3, 5], "slot_value", "slot_entity", "slot_name")
        ]

        # When
        result = parsing_result(input=input_, intent=intent, slots=slots)

        # Then
        msg = "Result dict should be json serializable"
        with self.fail_if_exception(msg):
            json.dumps(result)

        expected_result = {
            RES_INTENT: {
                RES_INTENT_NAME: 'world',
                RES_PROBA: 0.5
            },
            RES_SLOTS: [{
                RES_MATCH_RANGE: {
                    "start": 3,
                    "end": 5
                },
                RES_ENTITY: 'slot_entity',
                RES_SLOT_NAME: 'slot_name',
                RES_VALUE: 'slot_value'
            }],
            RES_INPUT:
            input_
        }
        self.assertDictEqual(expected_result, result)
Ejemplo n.º 5
0
    def parse(self, text, intents=None):
        """Performs intent parsing on the provided *text* by calling its intent
        parsers successively

        Args:
            text (str): Input
            intents (str or list of str): If provided, reduces the scope of
                intent parsing to the provided list of intents

        Returns:
            dict: The most likely intent along with the extracted slots. See
            :func:`.parsing_result` for the output format.

        Raises:
            NotTrained: When the nlu engine is not fitted
            TypeError: When input type is not unicode
        """
        logging.info("NLU engine parsing: '%s'...", text)
        if not isinstance(text, str):
            raise TypeError("Expected unicode but received: %s" % type(text))

        if isinstance(intents, str):
            intents = [intents]

        for parser in self.intent_parsers:
            res = parser.parse(text, intents)
            if is_empty(res):
                continue
            resolved_slots = self.resolve_slots(text, res[RES_SLOTS])
            return parsing_result(text,
                                  intent=res[RES_INTENT],
                                  slots=resolved_slots)
        return empty_result(text)
    def test_should_parse_slightly_ambiguous_utterances(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: intent_1
utterances:
  - call tomorrow

---
type: intent
name: intent_2
utterances:
  - call [time:snips/datetime](today)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser = DeterministicIntentParser().fit(dataset)
        text = "call tomorrow"

        # When
        res = parser.parse(text)

        # Then
        expected_intent = intent_classification_result(intent_name="intent_1",
                                                       probability=2. / 3.)
        expected_result = parsing_result(text, expected_intent, [])
        self.assertEqual(expected_result, res)
 def _get_matching_result(self, text, processed_text, regex, intent,
                          builtin_entities_ranges_mapping=None):
     found_result = regex.match(processed_text)
     if found_result is None:
         return None
     parsed_intent = intent_classification_result(intent_name=intent,
                                                  probability=1.0)
     slots = []
     for group_name in found_result.groupdict():
         slot_name = self.group_names_to_slot_names[group_name]
         entity = self.slot_names_to_entities[intent][slot_name]
         rng = (found_result.start(group_name),
                found_result.end(group_name))
         if builtin_entities_ranges_mapping is not None:
             if rng in builtin_entities_ranges_mapping:
                 rng = builtin_entities_ranges_mapping[rng]
             else:
                 shift = _get_range_shift(
                     rng, builtin_entities_ranges_mapping)
                 rng = {START: rng[0] + shift, END: rng[1] + shift}
         else:
             rng = {START: rng[0], END: rng[1]}
         value = text[rng[START]:rng[END]]
         parsed_slot = unresolved_slot(
             match_range=rng, value=value, entity=entity,
             slot_name=slot_name)
         slots.append(parsed_slot)
     parsed_slots = _deduplicate_overlapping_slots(
         slots, self.language)
     parsed_slots = sorted(parsed_slots,
                           key=lambda s: s[RES_MATCH_RANGE][START])
     return parsing_result(text, parsed_intent, parsed_slots)
Ejemplo n.º 8
0
    def test_should_serialize_results(self):
        # Given
        input_ = "hello world"
        intent = intent_classification_result("world", 0.5)
        slots = [unresolved_slot([3, 5],
                                 "slot_value",
                                 "slot_entity",
                                 "slot_name")]

        # When
        result = parsing_result(input=input_, intent=intent, slots=slots)

        # Then
        msg = "Result dict should be json serializable"
        with self.fail_if_exception(msg):
            json.dumps(result)

        expected_result = {
            RES_INTENT: {RES_INTENT_NAME: 'world', RES_PROBABILITY: 0.5},
            RES_SLOTS: [{RES_MATCH_RANGE: {"start": 3, "end": 5},
                         RES_ENTITY: 'slot_entity',
                         RES_SLOT_NAME: 'slot_name',
                         RES_VALUE: 'slot_value'}],
            RES_INPUT: input_}
        self.assertDictEqual(expected_result, result)
Ejemplo n.º 9
0
    def parse(self, text, intents=None, top_n=None):
        """Performs intent parsing on the provided *text* by calling its intent
        parsers successively

        Args:
            text (str): Input
            intents (str or list of str, optional): If provided, reduces the
                scope of intent parsing to the provided list of intents
            top_n (int, optional): when provided, this method will return a
                list of at most top_n most likely intents, instead of a single
                parsing result.
                Note that the returned list can contain less than ``top_n``
                elements, for instance when the parameter ``intents`` is not
                None, or when ``top_n`` is greater than the total number of
                intents.

        Returns:
            dict or list: the most likely intent(s) along with the extracted
            slots. See :func:`.parsing_result` and :func:`.extraction_result`
            for the output format.

        Raises:
            NotTrained: When the nlu engine is not fitted
            InvalidInputError: When input type is not unicode
        """
        if not isinstance(text, str):
            raise InvalidInputError("Expected unicode but received: %s" %
                                    type(text))

        if isinstance(intents, str):
            intents = {intents}
        elif isinstance(intents, list):
            intents = set(intents)

        if top_n is None:
            none_proba = 0.0
            for parser in self.intent_parsers:
                res = parser.parse(text, intents)
                if is_empty(res):
                    none_proba = res[RES_INTENT][RES_PROBA]
                    continue
                resolved_slots = self._resolve_slots(text, res[RES_SLOTS])
                return parsing_result(text,
                                      intent=res[RES_INTENT],
                                      slots=resolved_slots)
            return empty_result(text, none_proba)

        intents_results = self.get_intents(text)
        if intents is not None:
            intents_results = [
                res for res in intents_results
                if res[RES_INTENT_NAME] in intents
            ]
        intents_results = intents_results[:top_n]
        results = []
        for intent_res in intents_results:
            slots = self.get_slots(text, intent_res[RES_INTENT_NAME])
            results.append(extraction_result(intent_res, slots))
        return results
Ejemplo n.º 10
0
 def parse(self, text, intents=None, top_n=None):
     slots = [
         unresolved_slot(match_range=(0, len(text)),
                         value=text,
                         entity="entity1",
                         slot_name="slot1")
     ]
     return parsing_result(text, mocked_intent, slots)
Ejemplo n.º 11
0
 def mock_proba_parse(text, intents):
     slots = [
         unresolved_slot(match_range=(0, len(text)),
                         value=text,
                         entity="entity1",
                         slot_name="slot1")
     ]
     return parsing_result(text, mocked_proba_parser_intent, slots)
    def parse(self, text, intents=None):
        """Performs intent parsing on the provided *text*

        Intent and slots are extracted simultaneously through pattern matching

        Args:
            text (str): Input
            intents (str or list of str): If provided, reduces the scope of
            intent parsing to the provided list of intents

        Returns:
            dict: The matched intent, if any, along with the extracted slots.
            See :func:`.parsing_result` for the output format.

        Raises:
            NotTrained: When the intent parser is not fitted
        """
        if not self.fitted:
            raise NotTrained("DeterministicIntentParser must be fitted")

        if isinstance(intents, str):
            intents = [intents]

        ranges_mapping, processed_text = _replace_builtin_entities(
            text, self.language)

        for intent, regexes in iteritems(self.regexes_per_intent):
            if intents is not None and intent not in intents:
                continue
            for regex in regexes:
                match = regex.match(processed_text)
                if match is None:
                    continue
                parsed_intent = intent_classification_result(
                    intent_name=intent, probability=1.0)
                slots = []
                for group_name in match.groupdict():
                    slot_name = self.group_names_to_slot_names[group_name]
                    entity = self.slot_names_to_entities[slot_name]
                    rng = (match.start(group_name), match.end(group_name))
                    value = match.group(group_name)
                    if rng in ranges_mapping:
                        rng = ranges_mapping[rng]
                        value = text[rng[START]:rng[END]]
                    else:
                        rng = {START: rng[0], END: rng[1]}
                    parsed_slot = unresolved_slot(match_range=rng,
                                                  value=value,
                                                  entity=entity,
                                                  slot_name=slot_name)
                    slots.append(parsed_slot)
                parsed_slots = _deduplicate_overlapping_slots(
                    slots, self.language)
                parsed_slots = sorted(parsed_slots,
                                      key=lambda s: s[RES_MATCH_RANGE][START])
                return parsing_result(text, parsed_intent, parsed_slots)
        return empty_result(text)
    def parse(self, text, intents=None):
        """Performs intent parsing on the provided *text*

        Intent and slots are extracted simultaneously through pattern matching

        Args:
            text (str): Input
            intents (str or list of str): If provided, reduces the scope of
            intent parsing to the provided list of intents

        Returns:
            dict: The matched intent, if any, along with the extracted slots.
            See :func:`.parsing_result` for the output format.

        Raises:
            NotTrained: When the intent parser is not fitted
        """
        if not self.fitted:
            raise NotTrained("DeterministicIntentParser must be fitted")

        if isinstance(intents, str):
            intents = [intents]

        ranges_mapping, processed_text = _replace_builtin_entities(
            text, self.language)

        for intent, regexes in iteritems(self.regexes_per_intent):
            if intents is not None and intent not in intents:
                continue
            for regex in regexes:
                match = regex.match(processed_text)
                if match is None:
                    continue
                parsed_intent = intent_classification_result(
                    intent_name=intent, probability=1.0)
                slots = []
                for group_name in match.groupdict():
                    slot_name = self.group_names_to_slot_names[group_name]
                    entity = self.slot_names_to_entities[slot_name]
                    rng = (match.start(group_name), match.end(group_name))
                    value = match.group(group_name)
                    if rng in ranges_mapping:
                        rng = ranges_mapping[rng]
                        value = text[rng[START]:rng[END]]
                    else:
                        rng = {START: rng[0], END: rng[1]}
                    parsed_slot = unresolved_slot(
                        match_range=rng, value=value, entity=entity,
                        slot_name=slot_name)
                    slots.append(parsed_slot)
                parsed_slots = _deduplicate_overlapping_slots(
                    slots, self.language)
                parsed_slots = sorted(parsed_slots,
                                      key=lambda s: s[RES_MATCH_RANGE][START])
                return parsing_result(text, parsed_intent, parsed_slots)
        return empty_result(text)
Ejemplo n.º 14
0
    def test_should_serialize_results_when_none_values(self):
        # Given
        input_ = "hello world"

        # When
        result = parsing_result(input=input_, intent=None, slots=None)

        # Then
        expected_result = {
            RES_INTENT: None,
            RES_SLOTS: None,
            RES_INPUT: input_
        }
        self.assertDictEqual(expected_result, result)
Ejemplo n.º 15
0
    def test_should_serialize_results_when_none_values(self):
        # Given
        input_ = "hello world"

        # When
        result = parsing_result(input=input_, intent=None, slots=None)

        # Then
        expected_result = {
            RES_INTENT: None,
            RES_SLOTS: None,
            RES_INPUT: input_
        }
        self.assertDictEqual(expected_result, result)
    def parse(self, text, intents=None, top_n=None):
        """Performs intent parsing on the provided *text* by first classifying
        the intent and then using the correspond slot filler to extract slots

        Args:
            text (str): input
            intents (str or list of str): if provided, reduces the scope of
                intent parsing to the provided list of intents
            top_n (int, optional): when provided, this method will return a
                list of at most top_n most likely intents, instead of a single
                parsing result.
                Note that the returned list can contain less than ``top_n``
                elements, for instance when the parameter ``intents`` is not
                None, or when ``top_n`` is greater than the total number of
                intents.

        Returns:
            dict or list: the most likely intent(s) along with the extracted
            slots. See :func:`.parsing_result` and :func:`.extraction_result`
            for the output format.

        Raises:
            NotTrained: when the intent parser is not fitted
        """
        if isinstance(intents, str):
            intents = {intents}
        elif isinstance(intents, list):
            intents = list(intents)

        if top_n is None:
            intent_result = self.intent_classifier.get_intent(text, intents)
            intent_name = intent_result[RES_INTENT_NAME]
            if intent_name is not None:
                slots = self.slot_fillers[intent_name].get_slots(text)
            else:
                slots = []
            return parsing_result(text, intent_result, slots)

        results = []
        intents_results = self.intent_classifier.get_intents(text)
        for intent_result in intents_results[:top_n]:
            intent_name = intent_result[RES_INTENT_NAME]
            if intent_name is not None:
                slots = self.slot_fillers[intent_name].get_slots(text)
            else:
                slots = []
            results.append(extraction_result(intent_result, slots))
        return results
Ejemplo n.º 17
0
    def parse(self, text, intents=None):
        """Performs intent parsing on the provided *text* by calling its intent
        parsers successively

        Args:
            text (str): Input
            intents (str or list of str): If provided, reduces the scope of
                intent parsing to the provided list of intents

        Returns:
            dict: The most likely intent along with the extracted slots. See
            :func:`.parsing_result` for the output format.

        Raises:
            NotTrained: When the nlu engine is not fitted
            TypeError: When input type is not unicode
        """
        logging.info("NLU engine parsing: '%s'...", text)
        if not isinstance(text, str):
            raise TypeError("Expected unicode but received: %s" % type(text))

        if not self.fitted:
            raise NotTrained("SnipsNLUEngine must be fitted")

        if isinstance(intents, str):
            intents = [intents]

        language = self._dataset_metadata["language_code"]
        entities = self._dataset_metadata["entities"]

        for parser in self.intent_parsers:
            res = parser.parse(text, intents)
            if is_empty(res):
                continue
            slots = res[RES_SLOTS]
            scope = [
                s[RES_ENTITY] for s in slots
                if is_builtin_entity(s[RES_ENTITY])
            ]
            resolved_slots = resolve_slots(text, slots, entities, language,
                                           scope)
            return parsing_result(text,
                                  intent=res[RES_INTENT],
                                  slots=resolved_slots)
        return empty_result(text)
Ejemplo n.º 18
0
    def test_should_use_parsers_sequentially(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: greeting1
utterances:
- hello [greeted:name](john)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        input_text = "hello snips"
        intent = intent_classification_result(intent_name='greeting1',
                                              probability=0.7)
        slots = [
            unresolved_slot(match_range=(6, 11),
                            value='snips',
                            entity='name',
                            slot_name='greeted')
        ]

        # pylint:disable=unused-variable
        @IntentParser.register("first_intent_parser", True)
        class FirstIntentParser(MockIntentParser):
            pass

        @IntentParser.register("second_intent_parser", True)
        class SecondIntentParser(MockIntentParser):
            def parse(self, text, intents=None, top_n=None):
                if text == input_text:
                    return parsing_result(text, intent, slots)
                return empty_result(text, 1.0)

        # pylint:enable=unused-variable

        config = NLUEngineConfig(
            ["first_intent_parser", "second_intent_parser"])
        engine = SnipsNLUEngine(config).fit(dataset)

        # When
        parse = engine.parse(input_text)

        # Then
        expected_slots = [custom_slot(s) for s in slots]
        expected_parse = parsing_result(input_text, intent, expected_slots)
        self.assertDictEqual(expected_parse, parse)
Ejemplo n.º 19
0
    def parse(self, text, intents=None):
        """Performs intent parsing on the provided *text* by calling its intent
        parsers successively

        Args:
            text (str): Input
            intents (str or list of str): If provided, reduces the scope of
                intent parsing to the provided list of intents

        Returns:
            dict: The most likely intent along with the extracted slots. See
            :func:`.parsing_result` for the output format.

        Raises:
            NotTrained: When the nlu engine is not fitted
            TypeError: When input type is not unicode
        """

        if not isinstance(text, str):
            raise TypeError("Expected unicode but received: %s" % type(text))

        if not self.fitted:
            raise NotTrained("SnipsNLUEngine must be fitted")

        if isinstance(intents, str):
            intents = [intents]

        language = self._dataset_metadata["language_code"]
        entities = self._dataset_metadata["entities"]

        for parser in self.intent_parsers:
            res = parser.parse(text, intents)
            if is_empty(res):
                continue
            slots = res[RES_SLOTS]
            scope = [s[RES_ENTITY] for s in slots
                     if is_builtin_entity(s[RES_ENTITY])]
            resolved_slots = resolve_slots(text, slots, entities, language,
                                           scope)
            return parsing_result(text, intent=res[RES_INTENT],
                                  slots=resolved_slots)
        return empty_result(text)
Ejemplo n.º 20
0
 def parse(self, text, intents):
     if text == input_text:
         return parsing_result(text, intent, slots)
     return empty_result(text)
Ejemplo n.º 21
0
    def test_synonyms_should_point_to_base_value(self):
        # Given
        dataset = {
            "intents": {
                "dummy_intent_1": {
                    "utterances": [
                        {
                            "data": [
                                {
                                    "text": "dummy_1",
                                    "entity": "dummy_entity_1",
                                    "slot_name": "dummy_slot_name"
                                }
                            ]
                        }
                    ]
                }
            },
            "entities": {
                "dummy_entity_1": {
                    "use_synonyms": True,
                    "automatically_extensible": False,
                    "data": [
                        {
                            "value": "dummy1",
                            "synonyms": [
                                "dummy1",
                                "dummy1_bis"
                            ]
                        }
                    ],
                    "matching_strictness": 1.0
                }
            },
            "language": "en"
        }
        mocked_intent = intent_classification_result("dummy_intent_1", 1.0)
        mocked_slots = [
            unresolved_slot(match_range=(0, 10), value="dummy1_bis",
                            entity="dummy_entity_1",
                            slot_name="dummy_slot_name")]

        # pylint:disable=unused-variable
        @IntentParser.register("my_intent_parser", True)
        class MyIntentParser(MockIntentParser):
            def parse(self, text, intents=None, top_n=None):
                return parsing_result(text, mocked_intent, mocked_slots)

        # pylint:enable=unused-variable

        input_ = "dummy1_bis"
        config = NLUEngineConfig(["my_intent_parser"])
        engine = SnipsNLUEngine(config).fit(dataset)

        # When
        result = engine.parse(input_)

        # Then
        expected_slot = {
            RES_MATCH_RANGE: {
                "start": 0,
                "end": 10
            },
            RES_RAW_VALUE: "dummy1_bis",
            RES_VALUE: {
                "kind": "Custom",
                "value": "dummy1"
            },
            RES_ENTITY: "dummy_entity_1",
            RES_SLOT_NAME: "dummy_slot_name"
        }
        expected_result = parsing_result(
            input_, mocked_intent, slots=[expected_slot])
        self.assertEqual(expected_result, result)
Ejemplo n.º 22
0
 def parse(self, text, intents=None, top_n=None):
     if text == input_text:
         return parsing_result(text, intent, slots)
     return empty_result(text, 1.0)
Ejemplo n.º 23
0
    def test_synonyms_should_not_collide_when_remapped_to_base_value(self):
        # Given
        dataset = {
            "intents": {
                "intent1": {
                    "utterances": [
                        {
                            "data": [
                                {
                                    "text": "value",
                                    "entity": "entity1",
                                    "slot_name": "slot1"
                                }
                            ]
                        }
                    ]
                }
            },
            "entities": {
                "entity1": {
                    "data": [
                        {
                            "value": "a",
                            "synonyms": [
                                "favorïte"
                            ]
                        },
                        {
                            "value": "b",
                            "synonyms": [
                                "favorite"
                            ]
                        }
                    ],
                    "use_synonyms": True,
                    "automatically_extensible": False,
                    "matching_strictness": 1.0
                }
            },
            "language": "en",
        }

        mocked_intent = intent_classification_result("intent1", 1.0)

        # pylint:disable=unused-variable
        @IntentParser.register("my_intent_parser", True)
        class MyIntentParser(MockIntentParser):
            def parse(self, text, intents=None, top_n=None):
                slots = [
                    unresolved_slot(match_range=(0, len(text)), value=text,
                                    entity="entity1", slot_name="slot1")]
                return parsing_result(text, mocked_intent, slots)

        # pylint:enable=unused-variable

        config = NLUEngineConfig(["my_intent_parser"])
        engine = SnipsNLUEngine(config).fit(dataset)

        # When
        result1 = engine.parse("favorite")
        result2 = engine.parse("favorïte")

        # Then
        expected_slot1 = {
            RES_MATCH_RANGE: {
                "start": 0,
                "end": 8
            },
            RES_RAW_VALUE: "favorite",
            RES_VALUE: {
                "kind": "Custom",
                "value": "b"
            },
            RES_ENTITY: "entity1",
            RES_SLOT_NAME: "slot1"
        }
        expected_slot2 = {
            RES_MATCH_RANGE: {
                "start": 0,
                "end": 8
            },
            RES_RAW_VALUE: "favorïte",
            RES_VALUE: {
                "kind": "Custom",
                "value": "a"
            },
            RES_ENTITY: "entity1",
            RES_SLOT_NAME: "slot1"
        }
        expected_result1 = parsing_result("favorite", intent=mocked_intent,
                                          slots=[expected_slot1])
        expected_result2 = parsing_result("favorïte", intent=mocked_intent,
                                          slots=[expected_slot2])
        self.assertEqual(expected_result1, result1)
        self.assertEqual(expected_result2, result2)
Ejemplo n.º 24
0
 def parse(self, text, intents=None, top_n=None):
     return parsing_result(text, mocked_intent, mocked_slots)
Ejemplo n.º 25
0
    def test_should_handle_keyword_entities(self, mocked_regex_parse,
                                            mocked_crf_parse):
        # Given
        dataset = {
            "snips_nlu_version": "1.1.1",
            "intents": {
                "dummy_intent_1": {
                    "utterances": [{
                        "data": [{
                            "text": "dummy_1",
                            "entity": "dummy_entity_1",
                            "slot_name": "dummy_slot_name"
                        }, {
                            "text": " dummy_2",
                            "entity": "dummy_entity_2",
                            "slot_name": "other_dummy_slot_name"
                        }]
                    }]
                }
            },
            "entities": {
                "dummy_entity_1": {
                    "use_synonyms":
                    True,
                    "automatically_extensible":
                    False,
                    "data": [{
                        "value": "dummy1",
                        "synonyms": ["dummy1", "dummy1_bis"]
                    }, {
                        "value": "dummy2",
                        "synonyms": ["dummy2", "dummy2_bis"]
                    }]
                },
                "dummy_entity_2": {
                    "use_synonyms": False,
                    "automatically_extensible": True,
                    "data": [{
                        "value": "dummy2",
                        "synonyms": ["dummy2"]
                    }]
                }
            },
            "language": "en"
        }

        text = "dummy_3 dummy_4"
        mocked_crf_intent = intent_classification_result("dummy_intent_1", 1.0)
        mocked_crf_slots = [
            unresolved_slot(match_range=(0, 7),
                            value="dummy_3",
                            entity="dummy_entity_1",
                            slot_name="dummy_slot_name"),
            unresolved_slot(match_range=(8, 15),
                            value="dummy_4",
                            entity="dummy_entity_2",
                            slot_name="other_dummy_slot_name")
        ]

        mocked_regex_parse.return_value = empty_result(text)
        mocked_crf_parse.return_value = parsing_result(text, mocked_crf_intent,
                                                       mocked_crf_slots)

        engine = SnipsNLUEngine()

        # When
        engine = engine.fit(dataset)
        result = engine.parse(text)

        # Then
        expected_slot = custom_slot(
            unresolved_slot(match_range=(8, 15),
                            value="dummy_4",
                            entity="dummy_entity_2",
                            slot_name="other_dummy_slot_name"))
        expected_result = parsing_result(text,
                                         intent=mocked_crf_intent,
                                         slots=[expected_slot])
        self.assertEqual(expected_result, result)
Ejemplo n.º 26
0
    def test_should_handle_keyword_entities(self, mocked_regex_parse,
                                            mocked_crf_parse):
        # Given
        dataset = {
            "snips_nlu_version": "1.1.1",
            "intents": {
                "dummy_intent_1": {
                    "utterances": [
                        {
                            "data": [
                                {
                                    "text": "dummy_1",
                                    "entity": "dummy_entity_1",
                                    "slot_name": "dummy_slot_name"
                                },
                                {
                                    "text": " dummy_2",
                                    "entity": "dummy_entity_2",
                                    "slot_name": "other_dummy_slot_name"
                                }
                            ]
                        }
                    ]
                }
            },
            "entities": {
                "dummy_entity_1": {
                    "use_synonyms": True,
                    "automatically_extensible": False,
                    "data": [
                        {
                            "value": "dummy1",
                            "synonyms": [
                                "dummy1",
                                "dummy1_bis"
                            ]
                        },
                        {
                            "value": "dummy2",
                            "synonyms": [
                                "dummy2",
                                "dummy2_bis"
                            ]
                        }
                    ]
                },
                "dummy_entity_2": {
                    "use_synonyms": False,
                    "automatically_extensible": True,
                    "data": [
                        {
                            "value": "dummy2",
                            "synonyms": [
                                "dummy2"
                            ]
                        }
                    ]
                }
            },
            "language": "en"
        }

        text = "dummy_3 dummy_4"
        mocked_crf_intent = intent_classification_result("dummy_intent_1", 1.0)
        mocked_crf_slots = [unresolved_slot(match_range=(0, 7),
                                            value="dummy_3",
                                            entity="dummy_entity_1",
                                            slot_name="dummy_slot_name"),
                            unresolved_slot(match_range=(8, 15),
                                            value="dummy_4",
                                            entity="dummy_entity_2",
                                            slot_name="other_dummy_slot_name")]

        mocked_regex_parse.return_value = empty_result(text)
        mocked_crf_parse.return_value = parsing_result(
            text, mocked_crf_intent, mocked_crf_slots)

        engine = SnipsNLUEngine()

        # When
        engine = engine.fit(dataset)
        result = engine.parse(text)

        # Then
        expected_slot = custom_slot(unresolved_slot(
            match_range=(8, 15), value="dummy_4", entity="dummy_entity_2",
            slot_name="other_dummy_slot_name"))
        expected_result = parsing_result(text, intent=mocked_crf_intent,
                                         slots=[expected_slot])
        self.assertEqual(expected_result, result)
Ejemplo n.º 27
0
    def test_synonyms_should_point_to_base_value(self, mocked_deter_parse,
                                                 mocked_proba_parse):
        # Given
        dataset = {
            "snips_nlu_version": "1.1.1",
            "intents": {
                "dummy_intent_1": {
                    "utterances": [
                        {
                            "data": [
                                {
                                    "text": "dummy_1",
                                    "entity": "dummy_entity_1",
                                    "slot_name": "dummy_slot_name"
                                }
                            ]
                        }
                    ]
                }
            },
            "entities": {
                "dummy_entity_1": {
                    "use_synonyms": True,
                    "automatically_extensible": False,
                    "data": [
                        {
                            "value": "dummy1",
                            "synonyms": [
                                "dummy1",
                                "dummy1_bis"
                            ]
                        }
                    ]
                }
            },
            "language": "en"
        }

        text = "dummy1_bis"
        mocked_proba_parser_intent = intent_classification_result(
            "dummy_intent_1", 1.0)
        mocked_proba_parser_slots = [
            unresolved_slot(match_range=(0, 10), value="dummy1_bis",
                            entity="dummy_entity_1",
                            slot_name="dummy_slot_name")]

        mocked_deter_parse.return_value = empty_result(text)
        mocked_proba_parse.return_value = parsing_result(
            text, mocked_proba_parser_intent, mocked_proba_parser_slots)

        engine = SnipsNLUEngine().fit(dataset)

        # When
        result = engine.parse(text)

        # Then
        expected_slot = {
            RES_MATCH_RANGE: {
                "start": 0,
                "end": 10
            },
            RES_RAW_VALUE: "dummy1_bis",
            RES_VALUE: {
                "kind": "Custom",
                "value": "dummy1"
            },
            RES_ENTITY: "dummy_entity_1",
            RES_SLOT_NAME: "dummy_slot_name"
        }
        expected_result = parsing_result(
            text, intent=mocked_proba_parser_intent, slots=[expected_slot])
        self.assertEqual(expected_result, result)
Ejemplo n.º 28
0
    def test_should_use_parsers_sequentially(self):
        # Given
        input_text = "hello world"
        intent = intent_classification_result(intent_name='dummy_intent_1',
                                              probability=0.7)
        slots = [
            unresolved_slot(match_range=(6, 11),
                            value='world',
                            entity='mocked_entity',
                            slot_name='mocked_slot_name')
        ]

        class FirstIntentParserConfig(ProcessingUnitConfig):
            unit_name = "first_intent_parser"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return FirstIntentParserConfig()

            def get_required_resources(self):
                return None

        class FirstIntentParser(IntentParser):
            unit_name = "first_intent_parser"
            config_type = FirstIntentParserConfig

            def fit(self, dataset, force_retrain):
                self._fitted = True
                return self

            @property
            def fitted(self):
                return hasattr(self, '_fitted') and self._fitted

            def parse(self, text, intents):
                return empty_result(text)

            def persist(self, path):
                path = Path(path)
                path.mkdir()
                with (path / "metadata.json").open(mode="w") as f:
                    f.write(json_string({"unit_name": self.unit_name}))

            @classmethod
            def from_path(cls, path):
                cfg = cls.config_type()
                return cls(cfg)

        class SecondIntentParserConfig(ProcessingUnitConfig):
            unit_name = "second_intent_parser"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return SecondIntentParserConfig()

            def get_required_resources(self):
                return None

        class SecondIntentParser(IntentParser):
            unit_name = "second_intent_parser"
            config_type = SecondIntentParserConfig

            def fit(self, dataset, force_retrain):
                self._fitted = True
                return self

            @property
            def fitted(self):
                return hasattr(self, '_fitted') and self._fitted

            def parse(self, text, intents):
                if text == input_text:
                    return parsing_result(text, intent, slots)
                return empty_result(text)

            def persist(self, path):
                path = Path(path)
                path.mkdir()
                with (path / "metadata.json").open(mode="w") as f:
                    f.write(json_string({"unit_name": self.unit_name}))

            @classmethod
            def from_path(cls, path):
                cfg = cls.config_type()
                return cls(cfg)

        register_processing_unit(FirstIntentParser)
        register_processing_unit(SecondIntentParser)

        mocked_dataset_metadata = {
            "language_code": "en",
            "entities": {
                "mocked_entity": {
                    "automatically_extensible": True,
                    "utterances": dict()
                }
            },
            "slot_name_mappings": {
                "dummy_intent_1": {
                    "mocked_slot_name": "mocked_entity"
                }
            }
        }

        config = NLUEngineConfig(
            [FirstIntentParserConfig(),
             SecondIntentParserConfig()])
        engine = SnipsNLUEngine(config).fit(SAMPLE_DATASET)
        # pylint:disable=protected-access
        engine._dataset_metadata = mocked_dataset_metadata
        # pylint:enable=protected-access

        # When
        parse = engine.parse(input_text)

        # Then
        expected_slots = [custom_slot(s) for s in slots]
        expected_parse = parsing_result(input_text, intent, expected_slots)
        self.assertDictEqual(expected_parse, parse)
Ejemplo n.º 29
0
 def parse(self, text, intents):
     if text == input_text:
         return parsing_result(text, intent, slots)
     return empty_result(text)
Ejemplo n.º 30
0
    def test_should_use_parsers_sequentially(self):
        # Given
        input_text = "hello world"
        intent = intent_classification_result(
            intent_name='dummy_intent_1', probability=0.7)
        slots = [unresolved_slot(match_range=(6, 11),
                                 value='world',
                                 entity='mocked_entity',
                                 slot_name='mocked_slot_name')]

        class TestIntentParser1Config(ProcessingUnitConfig):
            unit_name = "test_intent_parser1"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return TestIntentParser1Config()

        class TestIntentParser1(IntentParser):
            unit_name = "test_intent_parser1"
            config_type = TestIntentParser1Config

            def fit(self, dataset, force_retrain):
                self._fitted = True
                return self

            @property
            def fitted(self):
                return hasattr(self, '_fitted') and self._fitted

            def parse(self, text, intents):
                return empty_result(text)

            def to_dict(self):
                return {
                    "unit_name": self.unit_name,
                }

            @classmethod
            def from_dict(cls, unit_dict):
                conf = cls.config_type()
                return TestIntentParser1(conf)

        class TestIntentParser2Config(ProcessingUnitConfig):
            unit_name = "test_intent_parser2"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return TestIntentParser2Config()

        class TestIntentParser2(IntentParser):
            unit_name = "test_intent_parser2"
            config_type = TestIntentParser2Config

            def fit(self, dataset, force_retrain):
                self._fitted = True
                return self

            @property
            def fitted(self):
                return hasattr(self, '_fitted') and self._fitted

            def parse(self, text, intents):
                if text == input_text:
                    return parsing_result(text, intent, slots)
                return empty_result(text)

            def to_dict(self):
                return {
                    "unit_name": self.unit_name,
                }

            @classmethod
            def from_dict(cls, unit_dict):
                conf = cls.config_type()
                return TestIntentParser2(conf)

        register_processing_unit(TestIntentParser1)
        register_processing_unit(TestIntentParser2)

        mocked_dataset_metadata = {
            "language_code": "en",
            "entities": {
                "mocked_entity": {
                    "automatically_extensible": True,
                    "utterances": dict()
                }
            },
            "slot_name_mappings": {
                "dummy_intent_1": {
                    "mocked_slot_name": "mocked_entity"
                }
            }
        }

        config = NLUEngineConfig([TestIntentParser1Config(),
                                  TestIntentParser2Config()])
        engine = SnipsNLUEngine(config).fit(SAMPLE_DATASET)
        # pylint:disable=protected-access
        engine._dataset_metadata = mocked_dataset_metadata
        # pylint:enable=protected-access

        # When
        parse = engine.parse(input_text)

        # Then
        expected_slots = [custom_slot(s) for s in slots]
        expected_parse = parsing_result(input_text, intent, expected_slots)
        self.assertDictEqual(expected_parse, parse)
Ejemplo n.º 31
0
    def test_should_use_parsers_sequentially(self):
        # Given
        input_text = "hello world"
        intent = intent_classification_result(intent_name='dummy_intent_1',
                                              probability=0.7)
        slots = [
            unresolved_slot(match_range=(6, 11),
                            value='world',
                            entity='mocked_entity',
                            slot_name='mocked_slot_name')
        ]

        class TestIntentParser1Config(ProcessingUnitConfig):
            unit_name = "test_intent_parser1"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return TestIntentParser1Config()

        class TestIntentParser1(IntentParser):
            unit_name = "test_intent_parser1"
            config_type = TestIntentParser1Config

            def fit(self, dataset, force_retrain):
                self._fitted = True
                return self

            @property
            def fitted(self):
                return hasattr(self, '_fitted') and self._fitted

            def parse(self, text, intents):
                return empty_result(text)

            def to_dict(self):
                return {
                    "unit_name": self.unit_name,
                }

            @classmethod
            def from_dict(cls, unit_dict):
                conf = cls.config_type()
                return TestIntentParser1(conf)

        class TestIntentParser2Config(ProcessingUnitConfig):
            unit_name = "test_intent_parser2"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return TestIntentParser2Config()

        class TestIntentParser2(IntentParser):
            unit_name = "test_intent_parser2"
            config_type = TestIntentParser2Config

            def fit(self, dataset, force_retrain):
                self._fitted = True
                return self

            @property
            def fitted(self):
                return hasattr(self, '_fitted') and self._fitted

            def parse(self, text, intents):
                if text == input_text:
                    return parsing_result(text, intent, slots)
                return empty_result(text)

            def to_dict(self):
                return {
                    "unit_name": self.unit_name,
                }

            @classmethod
            def from_dict(cls, unit_dict):
                conf = cls.config_type()
                return TestIntentParser2(conf)

        register_processing_unit(TestIntentParser1)
        register_processing_unit(TestIntentParser2)

        mocked_dataset_metadata = {
            "language_code": "en",
            "entities": {
                "mocked_entity": {
                    "automatically_extensible": True,
                    "utterances": dict()
                }
            },
            "slot_name_mappings": {
                "dummy_intent_1": {
                    "mocked_slot_name": "mocked_entity"
                }
            }
        }

        config = NLUEngineConfig(
            [TestIntentParser1Config(),
             TestIntentParser2Config()])
        engine = SnipsNLUEngine(config).fit(SAMPLE_DATASET)
        # pylint:disable=protected-access
        engine._dataset_metadata = mocked_dataset_metadata
        # pylint:enable=protected-access

        # When
        parse = engine.parse(input_text)

        # Then
        expected_slots = [custom_slot(s) for s in slots]
        expected_parse = parsing_result(input_text, intent, expected_slots)
        self.assertDictEqual(expected_parse, parse)
Ejemplo n.º 32
0
    def test_synonyms_should_not_collide_when_remapped_to_base_value(
            self, mocked_proba_parse):
        # Given
        # Given
        dataset = {
            "intents": {
                "intent1": {
                    "utterances": [{
                        "data": [{
                            "text": "value",
                            "entity": "entity1",
                            "slot_name": "slot1"
                        }]
                    }]
                }
            },
            "entities": {
                "entity1": {
                    "data": [{
                        "value": "a",
                        "synonyms": ["favorïte"]
                    }, {
                        "value": "b",
                        "synonyms": ["favorite"]
                    }],
                    "use_synonyms":
                    True,
                    "automatically_extensible":
                    False
                }
            },
            "language": "en",
        }

        mocked_proba_parser_intent = intent_classification_result(
            "intent1", 1.0)

        def mock_proba_parse(text, intents):
            slots = [
                unresolved_slot(match_range=(0, len(text)),
                                value=text,
                                entity="entity1",
                                slot_name="slot1")
            ]
            return parsing_result(text, mocked_proba_parser_intent, slots)

        mocked_proba_parse.side_effect = mock_proba_parse

        config = NLUEngineConfig([ProbabilisticIntentParserConfig()])
        engine = SnipsNLUEngine(config).fit(dataset)

        # When
        result1 = engine.parse("favorite")
        result2 = engine.parse("favorïte")

        # Then
        expected_slot1 = {
            RES_MATCH_RANGE: {
                "start": 0,
                "end": 8
            },
            RES_RAW_VALUE: "favorite",
            RES_VALUE: {
                "kind": "Custom",
                "value": "b"
            },
            RES_ENTITY: "entity1",
            RES_SLOT_NAME: "slot1"
        }
        expected_slot2 = {
            RES_MATCH_RANGE: {
                "start": 0,
                "end": 8
            },
            RES_RAW_VALUE: "favorïte",
            RES_VALUE: {
                "kind": "Custom",
                "value": "a"
            },
            RES_ENTITY: "entity1",
            RES_SLOT_NAME: "slot1"
        }
        expected_result1 = parsing_result("favorite",
                                          intent=mocked_proba_parser_intent,
                                          slots=[expected_slot1])
        expected_result2 = parsing_result("favorïte",
                                          intent=mocked_proba_parser_intent,
                                          slots=[expected_slot2])
        self.assertEqual(expected_result1, result1)
        self.assertEqual(expected_result2, result2)
Ejemplo n.º 33
0
    def test_synonyms_should_point_to_base_value(self, mocked_deter_parse,
                                                 mocked_proba_parse):
        # Given
        dataset = {
            "snips_nlu_version": "1.1.1",
            "intents": {
                "dummy_intent_1": {
                    "utterances": [{
                        "data": [{
                            "text": "dummy_1",
                            "entity": "dummy_entity_1",
                            "slot_name": "dummy_slot_name"
                        }]
                    }]
                }
            },
            "entities": {
                "dummy_entity_1": {
                    "use_synonyms":
                    True,
                    "automatically_extensible":
                    False,
                    "data": [{
                        "value": "dummy1",
                        "synonyms": ["dummy1", "dummy1_bis"]
                    }]
                }
            },
            "language": "en"
        }

        text = "dummy1_bis"
        mocked_proba_parser_intent = intent_classification_result(
            "dummy_intent_1", 1.0)
        mocked_proba_parser_slots = [
            unresolved_slot(match_range=(0, 10),
                            value="dummy1_bis",
                            entity="dummy_entity_1",
                            slot_name="dummy_slot_name")
        ]

        mocked_deter_parse.return_value = empty_result(text)
        mocked_proba_parse.return_value = parsing_result(
            text, mocked_proba_parser_intent, mocked_proba_parser_slots)

        engine = SnipsNLUEngine().fit(dataset)

        # When
        result = engine.parse(text)

        # Then
        expected_slot = {
            RES_MATCH_RANGE: {
                "start": 0,
                "end": 10
            },
            RES_RAW_VALUE: "dummy1_bis",
            RES_VALUE: {
                "kind": "Custom",
                "value": "dummy1"
            },
            RES_ENTITY: "dummy_entity_1",
            RES_SLOT_NAME: "dummy_slot_name"
        }
        expected_result = parsing_result(text,
                                         intent=mocked_proba_parser_intent,
                                         slots=[expected_slot])
        self.assertEqual(expected_result, result)
Ejemplo n.º 34
0
    def test_should_be_deserializable(self):
        # Given
        parser_dict = {
            "config": {
                "unit_name": "lookup_intent_parser",
                "ignore_stop_words": True
            },
            "language_code": "en",
            "map": {
                hash_str("make coffee"): [0, []],
                hash_str("prepare % snipsnumber % coffees"): [0, [0]],
                hash_str("% snipsnumber % teas at % snipstemperature %"):
                    [1, [0, 1]],
            },
            "slots_names": ["nb_cups", "tea_temperature"],
            "intents_names": ["MakeCoffee", "MakeTea"],
            "entity_scopes": [
                {
                    "entity_scope": {
                        "builtin": ["snips/number"],
                        "custom": [],
                    },
                    "intent_group": ["MakeCoffee"]
                },
                {
                    "entity_scope": {
                        "builtin": ["snips/number", "snips/temperature"],
                        "custom": [],
                    },
                    "intent_group": ["MakeTea"]
                },
            ],
            "stop_words_whitelist": dict()
        }
        self.tmp_file_path.mkdir()
        metadata = {"unit_name": "lookup_intent_parser"}
        self.writeJsonContent(
            self.tmp_file_path / "intent_parser.json", parser_dict)
        self.writeJsonContent(self.tmp_file_path / "metadata.json", metadata)
        resources = self.get_resources("en")
        builtin_entity_parser = BuiltinEntityParser.build(language="en")
        custom_entity_parser = EntityParserMock()

        # When
        parser = LookupIntentParser.from_path(
            self.tmp_file_path, custom_entity_parser=custom_entity_parser,
            builtin_entity_parser=builtin_entity_parser,
            resources=resources)
        res_make_coffee = parser.parse("make me a coffee")
        res_make_tea = parser.parse("two teas at 90°C please")

        # Then
        expected_result_coffee = parsing_result(
            input="make me a coffee",
            intent=intent_classification_result("MakeCoffee", 1.0),
            slots=[])
        expected_result_tea = parsing_result(
            input="two teas at 90°C please",
            intent=intent_classification_result("MakeTea", 1.0),
            slots=[
                {
                    "entity": "snips/number",
                    "range": {"end": 3, "start": 0},
                    "slotName": "nb_cups",
                    "value": "two"
                },
                {
                    "entity": "snips/temperature",
                    "range": {"end": 16, "start": 12},
                    "slotName": "tea_temperature",
                    "value": "90°C"
                }
            ])
        self.assertEqual(expected_result_coffee, res_make_coffee)
        self.assertEqual(expected_result_tea, res_make_tea)