Esempio n. 1
0
    def test_should_parse_after_deserialization(self):
        # Given
        dataset = BEVERAGE_DATASET
        engine = SnipsNLUEngine().fit(dataset)
        input_ = "Give me 3 cups of hot tea please"

        # When
        engine_dict = engine.to_dict()
        deserialized_engine = SnipsNLUEngine.from_dict(engine_dict)
        result = deserialized_engine.parse(input_)

        # Then
        msg = "SnipsNLUEngine dict should be json serializable to utf-8"
        with self.fail_if_exception(msg):
            json.dumps(engine_dict).encode("utf-8")
        expected_slots = [
            resolved_slot({
                START: 8,
                END: 9
            }, '3', {
                'kind': 'Number',
                'value': 3.0
            }, 'snips/number', 'number_of_cups'),
            custom_slot(
                unresolved_slot({
                    START: 18,
                    END: 21
                }, 'hot', 'Temperature', 'beverage_temperature'))
        ]
        self.assertEqual(result[RES_INPUT], input_)
        self.assertEqual(result[RES_INTENT][RES_INTENT_NAME], 'MakeTea')
        self.assertListEqual(result[RES_SLOTS], expected_slots)
Esempio n. 2
0
def resolve_slots(input, slots, dataset_entities, language, scope):
    builtin_entities = get_builtin_entities(input, language, scope)
    resolved_slots = []
    for slot in slots:
        entity_name = slot[RES_ENTITY]
        raw_value = slot[RES_VALUE]
        if is_builtin_entity(entity_name):
            found = False
            for ent in builtin_entities:
                if ent[ENTITY_KIND] == entity_name and \
                        ent[RES_MATCH_RANGE] == slot[RES_MATCH_RANGE]:
                    resolved_slot = builtin_slot(slot, ent[ENTITY])
                    resolved_slots.append(resolved_slot)
                    found = True
                    break
            if not found:
                builtin_matches = get_builtin_entities(raw_value, language,
                                                       scope=[entity_name])
                if builtin_matches:
                    resolved_slot = builtin_slot(slot,
                                                 builtin_matches[0][VALUE])
                    resolved_slots.append(resolved_slot)
        else:  # custom slot
            entity = dataset_entities[entity_name]
            if raw_value in entity[UTTERANCES]:
                resolved_value = entity[UTTERANCES][raw_value]
            elif entity[AUTOMATICALLY_EXTENSIBLE]:
                resolved_value = raw_value
            else:
                # entity is skipped
                resolved_value = None

            if resolved_value is not None:
                resolved_slots.append(custom_slot(slot, resolved_value))
    return resolved_slots
Esempio n. 3
0
    def test_should_parse_after_deserialization_from_dir(self):
        # Given
        dataset = BEVERAGE_DATASET
        engine = SnipsNLUEngine().fit(dataset)
        input_ = "Give me 3 cups of hot tea please"

        # When
        engine.persist(self.tmp_file_path)
        deserialized_engine = SnipsNLUEngine.from_path(self.tmp_file_path)
        result = deserialized_engine.parse(input_)

        # Then
        expected_slots = [
            resolved_slot({
                START: 8,
                END: 9
            }, "3", {
                "kind": "Number",
                "value": 3.0
            }, "snips/number", "number_of_cups"),
            custom_slot(
                unresolved_slot({
                    START: 18,
                    END: 21
                }, "hot", "Temperature", "beverage_temperature"))
        ]
        self.assertEqual(result[RES_INPUT], input_)
        self.assertEqual(result[RES_INTENT][RES_INTENT_NAME], "MakeTea")
        self.assertListEqual(result[RES_SLOTS], expected_slots)
Esempio n. 4
0
    def test_should_parse_after_deserialization(self):
        # Given
        dataset = BEVERAGE_DATASET
        engine = SnipsNLUEngine().fit(dataset)
        input_ = "Give me 3 cups of hot tea please"

        # When
        engine_dict = engine.to_dict()
        deserialized_engine = SnipsNLUEngine.from_dict(engine_dict)
        result = deserialized_engine.parse(input_)

        # Then
        msg = "SnipsNLUEngine dict should be json serializable to utf-8"
        with self.fail_if_exception(msg):
            json.dumps(engine_dict).encode("utf-8")
        expected_slots = [
            resolved_slot({START: 8, END: 9}, '3',
                          {'kind': 'Number', 'value': 3.0},
                          'snips/number', 'number_of_cups'),
            custom_slot(
                unresolved_slot({START: 18, END: 21}, 'hot', 'Temperature',
                                'beverage_temperature'))
        ]
        self.assertEqual(result[RES_INPUT], input_)
        self.assertEqual(result[RES_INTENT][RES_INTENT_NAME], 'MakeTea')
        self.assertListEqual(result[RES_SLOTS], expected_slots)
    def test_engine_with_keyword_slot_filler_should_be_serializable(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: SetLightColor
utterances:
- set the light to [color](blue) in the [room](kitchen)
- please make the lights [color](red) in the [room](bathroom)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        intent = "SetLightColor"
        slot_filler_config = {
            "unit_name": "keyword_slot_filler",
            "lowercase": True
        }
        parser_config = ProbabilisticIntentParserConfig(
            slot_filler_config=slot_filler_config)
        engine_config = NLUEngineConfig([parser_config])
        engine = SnipsNLUEngine(engine_config).fit(dataset, intent)
        engine.persist(self.tmp_file_path)
        text = "I want Red lights in the kitchen now"

        # When
        loaded_engine = SnipsNLUEngine.from_path(self.tmp_file_path)
        res = loaded_engine.parse(text)

        # Then
        expected_slots = [
            custom_slot(
                unresolved_slot(match_range={
                    START: 7,
                    END: 10
                },
                                value="Red",
                                entity="color",
                                slot_name="color"), "red"),
            custom_slot(
                unresolved_slot(match_range={
                    START: 25,
                    END: 32
                },
                                value="kitchen",
                                entity="room",
                                slot_name="room"))
        ]
        self.assertListEqual(expected_slots, res["slots"])
Esempio n. 6
0
    def test_should_parse_after_deserialization_from_dir(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups
- i want [number_of_cups] cups of [beverage_temperature](boiling hot) tea pls
- can you prepare [number_of_cups] cup of [beverage_temperature](cold) tea ?

---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee
- can you prepare [number_of_cups] cup of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        shared = self.get_shared_data(dataset)
        engine = SnipsNLUEngine(**shared).fit(dataset)
        text = "Give me 3 cups of hot tea please"

        # When
        engine.persist(self.tmp_file_path)
        deserialized_engine = SnipsNLUEngine.from_path(self.tmp_file_path)
        result = deserialized_engine.parse(text)

        # Then
        expected_slots = [
            resolved_slot({
                START: 8,
                END: 9
            }, "3", {
                "kind": "Number",
                "value": 3.0
            }, "snips/number", "number_of_cups"),
            custom_slot(
                unresolved_slot({
                    START: 18,
                    END: 21
                }, "hot", "Temperature", "beverage_temperature"))
        ]
        self.assertEqual(result[RES_INPUT], text)
        self.assertEqual(result[RES_INTENT][RES_INTENT_NAME], "MakeTea")
        self.assertListEqual(result[RES_SLOTS], expected_slots)
Esempio n. 7
0
def resolve_slots(input, slots, dataset_entities, language, scope):
    # Do not use cached entities here as datetimes must be computed using
    # current context
    builtin_entities = get_builtin_entities(input,
                                            language,
                                            scope,
                                            use_cache=False)
    resolved_slots = []
    for slot in slots:
        entity_name = slot[RES_ENTITY]
        raw_value = slot[RES_VALUE]
        if is_builtin_entity(entity_name):
            found = False
            for ent in builtin_entities:
                if ent[ENTITY_KIND] == entity_name and \
                        ent[RES_MATCH_RANGE] == slot[RES_MATCH_RANGE]:
                    resolved_slot = builtin_slot(slot, ent[ENTITY])
                    resolved_slots.append(resolved_slot)
                    found = True
                    break
            if not found:
                builtin_matches = get_builtin_entities(raw_value,
                                                       language,
                                                       scope=[entity_name],
                                                       use_cache=False)
                if builtin_matches:
                    resolved_slot = builtin_slot(slot,
                                                 builtin_matches[0][VALUE])
                    resolved_slots.append(resolved_slot)
        else:  # custom slot
            entity = dataset_entities[entity_name]
            normalized_raw_value = normalize(raw_value)
            if raw_value in entity[UTTERANCES]:
                resolved_value = entity[UTTERANCES][raw_value]
            elif normalized_raw_value in entity[UTTERANCES]:
                resolved_value = entity[UTTERANCES][normalized_raw_value]
            elif entity[AUTOMATICALLY_EXTENSIBLE]:
                resolved_value = raw_value
            else:
                # entity is skipped
                resolved_value = None

            if resolved_value is not None:
                resolved_slots.append(custom_slot(slot, resolved_value))
    return resolved_slots
Esempio n. 8
0
    def test_should_use_parsers_sequentially(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: greeting1
utterances:
- hello [greeted:name](john)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        input_text = "hello snips"
        intent = intent_classification_result(intent_name='greeting1',
                                              probability=0.7)
        slots = [
            unresolved_slot(match_range=(6, 11),
                            value='snips',
                            entity='name',
                            slot_name='greeted')
        ]

        # pylint:disable=unused-variable
        @IntentParser.register("first_intent_parser", True)
        class FirstIntentParser(MockIntentParser):
            pass

        @IntentParser.register("second_intent_parser", True)
        class SecondIntentParser(MockIntentParser):
            def parse(self, text, intents=None, top_n=None):
                if text == input_text:
                    return parsing_result(text, intent, slots)
                return empty_result(text, 1.0)

        # pylint:enable=unused-variable

        config = NLUEngineConfig(
            ["first_intent_parser", "second_intent_parser"])
        engine = SnipsNLUEngine(config).fit(dataset)

        # When
        parse = engine.parse(input_text)

        # Then
        expected_slots = [custom_slot(s) for s in slots]
        expected_parse = parsing_result(input_text, intent, expected_slots)
        self.assertDictEqual(expected_parse, parse)
Esempio n. 9
0
    def test_should_get_slots(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: greeting
utterances:
- hello [greeted:name](john)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        input_text = "hello snips"
        greeting_intent = "greeting"
        expected_slots = [
            unresolved_slot(match_range=(6, 11),
                            value="snips",
                            entity="name",
                            slot_name="greeted")
        ]

        # pylint:disable=unused-variable
        @IntentParser.register("first_intent_parser", True)
        class FirstIntentParser(MockIntentParser):
            pass

        @IntentParser.register("second_intent_parser", True)
        class SecondIntentParser(MockIntentParser):
            def get_slots(self, text, intent):
                if text == input_text and intent == greeting_intent:
                    return expected_slots
                return []

        # pylint:enable=unused-variable

        config = NLUEngineConfig(
            ["first_intent_parser", "second_intent_parser"])
        engine = SnipsNLUEngine(config).fit(dataset)

        # When
        res_slots = engine.get_slots(input_text, greeting_intent)

        # Then
        expected_slots = [custom_slot(s) for s in expected_slots]
        self.assertListEqual(expected_slots, res_slots)
Esempio n. 10
0
    def test_should_handle_keyword_entities(self, mocked_regex_parse,
                                            mocked_crf_parse):
        # Given
        dataset = {
            "snips_nlu_version": "1.1.1",
            "intents": {
                "dummy_intent_1": {
                    "utterances": [{
                        "data": [{
                            "text": "dummy_1",
                            "entity": "dummy_entity_1",
                            "slot_name": "dummy_slot_name"
                        }, {
                            "text": " dummy_2",
                            "entity": "dummy_entity_2",
                            "slot_name": "other_dummy_slot_name"
                        }]
                    }]
                }
            },
            "entities": {
                "dummy_entity_1": {
                    "use_synonyms":
                    True,
                    "automatically_extensible":
                    False,
                    "data": [{
                        "value": "dummy1",
                        "synonyms": ["dummy1", "dummy1_bis"]
                    }, {
                        "value": "dummy2",
                        "synonyms": ["dummy2", "dummy2_bis"]
                    }]
                },
                "dummy_entity_2": {
                    "use_synonyms": False,
                    "automatically_extensible": True,
                    "data": [{
                        "value": "dummy2",
                        "synonyms": ["dummy2"]
                    }]
                }
            },
            "language": "en"
        }

        text = "dummy_3 dummy_4"
        mocked_crf_intent = intent_classification_result("dummy_intent_1", 1.0)
        mocked_crf_slots = [
            unresolved_slot(match_range=(0, 7),
                            value="dummy_3",
                            entity="dummy_entity_1",
                            slot_name="dummy_slot_name"),
            unresolved_slot(match_range=(8, 15),
                            value="dummy_4",
                            entity="dummy_entity_2",
                            slot_name="other_dummy_slot_name")
        ]

        mocked_regex_parse.return_value = empty_result(text)
        mocked_crf_parse.return_value = parsing_result(text, mocked_crf_intent,
                                                       mocked_crf_slots)

        engine = SnipsNLUEngine()

        # When
        engine = engine.fit(dataset)
        result = engine.parse(text)

        # Then
        expected_slot = custom_slot(
            unresolved_slot(match_range=(8, 15),
                            value="dummy_4",
                            entity="dummy_entity_2",
                            slot_name="other_dummy_slot_name"))
        expected_result = parsing_result(text,
                                         intent=mocked_crf_intent,
                                         slots=[expected_slot])
        self.assertEqual(expected_result, result)
Esempio n. 11
0
    def test_should_use_parsers_sequentially(self):
        # Given
        input_text = "hello world"
        intent = intent_classification_result(intent_name='dummy_intent_1',
                                              probability=0.7)
        slots = [
            unresolved_slot(match_range=(6, 11),
                            value='world',
                            entity='mocked_entity',
                            slot_name='mocked_slot_name')
        ]

        class TestIntentParser1Config(ProcessingUnitConfig):
            unit_name = "test_intent_parser1"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return TestIntentParser1Config()

        class TestIntentParser1(IntentParser):
            unit_name = "test_intent_parser1"
            config_type = TestIntentParser1Config

            def fit(self, dataset, force_retrain):
                self._fitted = True
                return self

            @property
            def fitted(self):
                return hasattr(self, '_fitted') and self._fitted

            def parse(self, text, intents):
                return empty_result(text)

            def to_dict(self):
                return {
                    "unit_name": self.unit_name,
                }

            @classmethod
            def from_dict(cls, unit_dict):
                conf = cls.config_type()
                return TestIntentParser1(conf)

        class TestIntentParser2Config(ProcessingUnitConfig):
            unit_name = "test_intent_parser2"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return TestIntentParser2Config()

        class TestIntentParser2(IntentParser):
            unit_name = "test_intent_parser2"
            config_type = TestIntentParser2Config

            def fit(self, dataset, force_retrain):
                self._fitted = True
                return self

            @property
            def fitted(self):
                return hasattr(self, '_fitted') and self._fitted

            def parse(self, text, intents):
                if text == input_text:
                    return parsing_result(text, intent, slots)
                return empty_result(text)

            def to_dict(self):
                return {
                    "unit_name": self.unit_name,
                }

            @classmethod
            def from_dict(cls, unit_dict):
                conf = cls.config_type()
                return TestIntentParser2(conf)

        register_processing_unit(TestIntentParser1)
        register_processing_unit(TestIntentParser2)

        mocked_dataset_metadata = {
            "language_code": "en",
            "entities": {
                "mocked_entity": {
                    "automatically_extensible": True,
                    "utterances": dict()
                }
            },
            "slot_name_mappings": {
                "dummy_intent_1": {
                    "mocked_slot_name": "mocked_entity"
                }
            }
        }

        config = NLUEngineConfig(
            [TestIntentParser1Config(),
             TestIntentParser2Config()])
        engine = SnipsNLUEngine(config).fit(SAMPLE_DATASET)
        # pylint:disable=protected-access
        engine._dataset_metadata = mocked_dataset_metadata
        # pylint:enable=protected-access

        # When
        parse = engine.parse(input_text)

        # Then
        expected_slots = [custom_slot(s) for s in slots]
        expected_parse = parsing_result(input_text, intent, expected_slots)
        self.assertDictEqual(expected_parse, parse)
Esempio n. 12
0
    def test_should_parse_top_intents(self):
        # Given
        text = "foo bar ban"
        dataset_stream = io.StringIO("""
---
type: intent
name: intent1
utterances:
  - foo [slot1:entity1](bak)
  
---
type: intent
name: intent2
utterances:
  - '[slot2:entity2](foo) baz'
  
---
type: intent
name: intent3
utterances:
  - foo bap""")

        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

        # pylint:disable=unused-variable
        @IntentParser.register("first_intent_parser", True)
        class FirstIntentParser(MockIntentParser):
            def get_intents(self, text):
                return [
                    intent_classification_result("intent1", 0.5),
                    intent_classification_result("intent2", 0.3),
                    intent_classification_result(None, 0.15),
                    intent_classification_result("intent3", 0.05)
                ]

            def get_slots(self, text, intent):
                if intent == "intent1":
                    return []
                if intent == "intent2":
                    return [
                        unresolved_slot((0, 3), "foo", "entity2", "slot2")
                    ]
                return []

        @IntentParser.register("second_intent_parser", True)
        class SecondIntentParser(MockIntentParser):
            def get_intents(self, text):
                return [
                    intent_classification_result("intent2", 0.6),
                    intent_classification_result("intent1", 0.2),
                    intent_classification_result(None, 0.15),
                    intent_classification_result("intent3", 0.05)
                ]

            def get_slots(self, text, intent):
                if intent == "intent1":
                    return [
                        unresolved_slot((0, 3), "foo", "entity1", "slot1")
                    ]
                if intent == "intent2":
                    return [
                        unresolved_slot((8, 11), "ban", "entity2", "slot2")
                    ]
                return []

        # pylint:enable=unused-variable

        config = NLUEngineConfig(
            ["first_intent_parser", "second_intent_parser"])
        nlu_engine = SnipsNLUEngine(config).fit(dataset)

        # When
        results = nlu_engine.parse(text, top_n=3)
        results_with_filter = nlu_engine.parse(
            text, intents=["intent1", "intent3"], top_n=3)

        # Then
        expected_results = [
            extraction_result(
                intent_classification_result("intent2", 0.6),
                [custom_slot(
                    unresolved_slot((0, 3), "foo", "entity2", "slot2"))]
            ),
            extraction_result(
                intent_classification_result("intent1", 0.5),
                [custom_slot(
                    unresolved_slot((0, 3), "foo", "entity1", "slot1"))]
            ),
            extraction_result(
                intent_classification_result(None, 0.15),
                []
            ),
        ]
        expected_results_with_filter = [
            extraction_result(
                intent_classification_result("intent1", 0.5),
                [custom_slot(
                    unresolved_slot((0, 3), "foo", "entity1", "slot1"))]
            ),
            extraction_result(
                intent_classification_result(None, 0.15),
                []
            ),
            extraction_result(
                intent_classification_result("intent3", 0.05),
                []
            ),
        ]
        self.assertListEqual(expected_results, results)
        self.assertListEqual(expected_results_with_filter, results_with_filter)
Esempio n. 13
0
    def test_should_use_parsers_sequentially(self):
        # Given
        input_text = "hello world"
        intent = intent_classification_result(intent_name='dummy_intent_1',
                                              probability=0.7)
        slots = [
            unresolved_slot(match_range=(6, 11),
                            value='world',
                            entity='mocked_entity',
                            slot_name='mocked_slot_name')
        ]

        class FirstIntentParserConfig(ProcessingUnitConfig):
            unit_name = "first_intent_parser"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return FirstIntentParserConfig()

            def get_required_resources(self):
                return None

        class FirstIntentParser(IntentParser):
            unit_name = "first_intent_parser"
            config_type = FirstIntentParserConfig

            def fit(self, dataset, force_retrain):
                self._fitted = True
                return self

            @property
            def fitted(self):
                return hasattr(self, '_fitted') and self._fitted

            def parse(self, text, intents):
                return empty_result(text)

            def persist(self, path):
                path = Path(path)
                path.mkdir()
                with (path / "metadata.json").open(mode="w") as f:
                    f.write(json_string({"unit_name": self.unit_name}))

            @classmethod
            def from_path(cls, path):
                cfg = cls.config_type()
                return cls(cfg)

        class SecondIntentParserConfig(ProcessingUnitConfig):
            unit_name = "second_intent_parser"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return SecondIntentParserConfig()

            def get_required_resources(self):
                return None

        class SecondIntentParser(IntentParser):
            unit_name = "second_intent_parser"
            config_type = SecondIntentParserConfig

            def fit(self, dataset, force_retrain):
                self._fitted = True
                return self

            @property
            def fitted(self):
                return hasattr(self, '_fitted') and self._fitted

            def parse(self, text, intents):
                if text == input_text:
                    return parsing_result(text, intent, slots)
                return empty_result(text)

            def persist(self, path):
                path = Path(path)
                path.mkdir()
                with (path / "metadata.json").open(mode="w") as f:
                    f.write(json_string({"unit_name": self.unit_name}))

            @classmethod
            def from_path(cls, path):
                cfg = cls.config_type()
                return cls(cfg)

        register_processing_unit(FirstIntentParser)
        register_processing_unit(SecondIntentParser)

        mocked_dataset_metadata = {
            "language_code": "en",
            "entities": {
                "mocked_entity": {
                    "automatically_extensible": True,
                    "utterances": dict()
                }
            },
            "slot_name_mappings": {
                "dummy_intent_1": {
                    "mocked_slot_name": "mocked_entity"
                }
            }
        }

        config = NLUEngineConfig(
            [FirstIntentParserConfig(),
             SecondIntentParserConfig()])
        engine = SnipsNLUEngine(config).fit(SAMPLE_DATASET)
        # pylint:disable=protected-access
        engine._dataset_metadata = mocked_dataset_metadata
        # pylint:enable=protected-access

        # When
        parse = engine.parse(input_text)

        # Then
        expected_slots = [custom_slot(s) for s in slots]
        expected_parse = parsing_result(input_text, intent, expected_slots)
        self.assertDictEqual(expected_parse, parse)
Esempio n. 14
0
    def test_should_handle_keyword_entities(self, mocked_regex_parse,
                                            mocked_crf_parse):
        # Given
        dataset = {
            "snips_nlu_version": "1.1.1",
            "intents": {
                "dummy_intent_1": {
                    "utterances": [
                        {
                            "data": [
                                {
                                    "text": "dummy_1",
                                    "entity": "dummy_entity_1",
                                    "slot_name": "dummy_slot_name"
                                },
                                {
                                    "text": " dummy_2",
                                    "entity": "dummy_entity_2",
                                    "slot_name": "other_dummy_slot_name"
                                }
                            ]
                        }
                    ]
                }
            },
            "entities": {
                "dummy_entity_1": {
                    "use_synonyms": True,
                    "automatically_extensible": False,
                    "data": [
                        {
                            "value": "dummy1",
                            "synonyms": [
                                "dummy1",
                                "dummy1_bis"
                            ]
                        },
                        {
                            "value": "dummy2",
                            "synonyms": [
                                "dummy2",
                                "dummy2_bis"
                            ]
                        }
                    ]
                },
                "dummy_entity_2": {
                    "use_synonyms": False,
                    "automatically_extensible": True,
                    "data": [
                        {
                            "value": "dummy2",
                            "synonyms": [
                                "dummy2"
                            ]
                        }
                    ]
                }
            },
            "language": "en"
        }

        text = "dummy_3 dummy_4"
        mocked_crf_intent = intent_classification_result("dummy_intent_1", 1.0)
        mocked_crf_slots = [unresolved_slot(match_range=(0, 7),
                                            value="dummy_3",
                                            entity="dummy_entity_1",
                                            slot_name="dummy_slot_name"),
                            unresolved_slot(match_range=(8, 15),
                                            value="dummy_4",
                                            entity="dummy_entity_2",
                                            slot_name="other_dummy_slot_name")]

        mocked_regex_parse.return_value = empty_result(text)
        mocked_crf_parse.return_value = parsing_result(
            text, mocked_crf_intent, mocked_crf_slots)

        engine = SnipsNLUEngine()

        # When
        engine = engine.fit(dataset)
        result = engine.parse(text)

        # Then
        expected_slot = custom_slot(unresolved_slot(
            match_range=(8, 15), value="dummy_4", entity="dummy_entity_2",
            slot_name="other_dummy_slot_name"))
        expected_result = parsing_result(text, intent=mocked_crf_intent,
                                         slots=[expected_slot])
        self.assertEqual(expected_result, result)
Esempio n. 15
0
    def test_should_use_parsers_sequentially(self):
        # Given
        input_text = "hello world"
        intent = intent_classification_result(
            intent_name='dummy_intent_1', probability=0.7)
        slots = [unresolved_slot(match_range=(6, 11),
                                 value='world',
                                 entity='mocked_entity',
                                 slot_name='mocked_slot_name')]

        class TestIntentParser1Config(ProcessingUnitConfig):
            unit_name = "test_intent_parser1"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return TestIntentParser1Config()

        class TestIntentParser1(IntentParser):
            unit_name = "test_intent_parser1"
            config_type = TestIntentParser1Config

            def fit(self, dataset, force_retrain):
                self._fitted = True
                return self

            @property
            def fitted(self):
                return hasattr(self, '_fitted') and self._fitted

            def parse(self, text, intents):
                return empty_result(text)

            def to_dict(self):
                return {
                    "unit_name": self.unit_name,
                }

            @classmethod
            def from_dict(cls, unit_dict):
                conf = cls.config_type()
                return TestIntentParser1(conf)

        class TestIntentParser2Config(ProcessingUnitConfig):
            unit_name = "test_intent_parser2"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return TestIntentParser2Config()

        class TestIntentParser2(IntentParser):
            unit_name = "test_intent_parser2"
            config_type = TestIntentParser2Config

            def fit(self, dataset, force_retrain):
                self._fitted = True
                return self

            @property
            def fitted(self):
                return hasattr(self, '_fitted') and self._fitted

            def parse(self, text, intents):
                if text == input_text:
                    return parsing_result(text, intent, slots)
                return empty_result(text)

            def to_dict(self):
                return {
                    "unit_name": self.unit_name,
                }

            @classmethod
            def from_dict(cls, unit_dict):
                conf = cls.config_type()
                return TestIntentParser2(conf)

        register_processing_unit(TestIntentParser1)
        register_processing_unit(TestIntentParser2)

        mocked_dataset_metadata = {
            "language_code": "en",
            "entities": {
                "mocked_entity": {
                    "automatically_extensible": True,
                    "utterances": dict()
                }
            },
            "slot_name_mappings": {
                "dummy_intent_1": {
                    "mocked_slot_name": "mocked_entity"
                }
            }
        }

        config = NLUEngineConfig([TestIntentParser1Config(),
                                  TestIntentParser2Config()])
        engine = SnipsNLUEngine(config).fit(SAMPLE_DATASET)
        # pylint:disable=protected-access
        engine._dataset_metadata = mocked_dataset_metadata
        # pylint:enable=protected-access

        # When
        parse = engine.parse(input_text)

        # Then
        expected_slots = [custom_slot(s) for s in slots]
        expected_parse = parsing_result(input_text, intent, expected_slots)
        self.assertDictEqual(expected_parse, parse)