def test_should_serialize_duplicated_intent_parsers(self):
        # Given
        register_processing_unit(TestIntentParser1)
        parser1_config = TestIntentParser1Config()
        parser1bis_config = TestIntentParser1Config()

        parsers_configs = [parser1_config, parser1bis_config]
        config = NLUEngineConfig(parsers_configs)
        engine = SnipsNLUEngine(config).fit(BEVERAGE_DATASET)

        # When
        engine.persist(self.tmp_file_path)

        # Then
        expected_engine_dict = {
            "unit_name": "nlu_engine",
            "dataset_metadata": {
                "language_code": "en",
                "entities": {
                    "Temperature": {
                        "automatically_extensible": True,
                        "utterances": {
                            "boiling": "hot",
                            "Boiling": "hot",
                            "cold": "cold",
                            "Cold": "cold",
                            "hot": "hot",
                            "Hot": "hot",
                            "iced": "cold",
                            "Iced": "cold"
                        }
                    }
                },
                "slot_name_mappings": {
                    "MakeCoffee": {
                        "number_of_cups": "snips/number"
                    },
                    "MakeTea": {
                        "beverage_temperature": "Temperature",
                        "number_of_cups": "snips/number"
                    }
                },
            },
            "config": config.to_dict(),
            "intent_parsers": [
                "test_intent_parser1",
                "test_intent_parser1_2"
            ],
            "model_version": snips_nlu.__model_version__,
            "training_package_version": snips_nlu.__version__
        }
        self.assertJsonContent(self.tmp_file_path / "nlu_engine.json",
                               expected_engine_dict)
        self.assertJsonContent(
            self.tmp_file_path / "test_intent_parser1" / "metadata.json",
            {"unit_name": "test_intent_parser1"})
        self.assertJsonContent(
            self.tmp_file_path / "test_intent_parser1_2" / "metadata.json",
            {"unit_name": "test_intent_parser1"})
    def test_parse_should_raise_with_unknown_intent_in_filter(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: greeting1
utterances:
  - Hello [name1](John)

---
type: intent
name: goodbye
utterances:
  - Goodbye [name](Eric)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

        # pylint:disable=unused-variable
        @IntentParser.register("my_intent_parser", True)
        class FirstIntentParser(MockIntentParser):
            pass

        # pylint:enable=unused-variable

        config = NLUEngineConfig(["my_intent_parser"])
        nlu_engine = SnipsNLUEngine(config).fit(dataset)

        # When / Then
        with self.assertRaises(IntentNotFoundError):
            nlu_engine.parse("Hello John", intents="greeting3")

        with self.assertRaises(IntentNotFoundError):
            nlu_engine.parse("Hello John", intents=["greeting3"])
    def test_nlu_engine_should_raise_error_with_bytes_input(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups

---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        bytes_input = b"brew me an espresso"

        # pylint:disable=unused-variable
        @IntentParser.register("my_intent_parser", True)
        class MyIntentParser(MockIntentParser):
            pass

        # pylint:enable=unused-variable
        config = NLUEngineConfig(["my_intent_parser"])
        engine = SnipsNLUEngine(config).fit(dataset)

        # When / Then
        with self.assertRaises(InvalidInputError) as cm:
            engine.parse(bytes_input)
        message = str(cm.exception.args[0])
        self.assertTrue("Expected unicode but received" in message)
    def test_should_retrain_only_non_trained_subunits(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: greeting1
utterances:
- hello [greeted:name](john)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

        @IntentParser.register("test_intent_parser", True)
        class TestIntentParser(MockIntentParser):
            def __init__(self, config=None, **shared):
                super(TestIntentParser, self).__init__(config, **shared)
                self.sub_unit_1 = dict(fitted=False, calls=0)
                self.sub_unit_2 = dict(fitted=False, calls=0)

            def fit(self, dataset, force_retrain):
                if force_retrain:
                    self.sub_unit_1["fitted"] = True
                    self.sub_unit_1["calls"] += 1
                    self.sub_unit_2["fitted"] = True
                    self.sub_unit_2["calls"] += 1
                else:
                    if not self.sub_unit_1["fitted"]:
                        self.sub_unit_1["fitted"] = True
                        self.sub_unit_1["calls"] += 1
                    if not self.sub_unit_2["fitted"]:
                        self.sub_unit_2["fitted"] = True
                        self.sub_unit_2["calls"] += 1

                return self

            @property
            def fitted(self):
                return self.sub_unit_1["fitted"] and \
                       self.sub_unit_2["fitted"]

        nlu_engine_config = NLUEngineConfig(["test_intent_parser"])
        nlu_engine = SnipsNLUEngine(nlu_engine_config)

        intent_parser = TestIntentParser()
        intent_parser.sub_unit_1.update(dict(fitted=True, calls=0))
        nlu_engine.intent_parsers.append(intent_parser)

        # When
        nlu_engine.fit(dataset, force_retrain=False)

        # Then
        self.assertDictEqual(dict(fitted=True, calls=0),
                             intent_parser.sub_unit_1)
        self.assertDictEqual(dict(fitted=True, calls=1),
                             intent_parser.sub_unit_2)
    def test_should_get_intents(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: greeting1
utterances:
- hello

---
type: intent
name: greeting2
utterances:
- how are you""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        input_text = "hello world"

        # pylint:disable=unused-variable
        @IntentParser.register("first_intent_parser", True)
        class FirstIntentParser(MockIntentParser):
            def get_intents(self, text):
                return [
                    intent_classification_result("greeting1", 0.5),
                    intent_classification_result("greeting2", 0.3),
                    intent_classification_result(None, 0.2)
                ]

        @IntentParser.register("second_intent_parser", True)
        class SecondIntentParser(MockIntentParser):
            def get_intents(self, text):
                return [
                    intent_classification_result("greeting2", 0.6),
                    intent_classification_result("greeting1", 0.2),
                    intent_classification_result(None, 0.1)
                ]

        # pylint:enable=unused-variable
        config = NLUEngineConfig(["first_intent_parser",
                                  "second_intent_parser"])
        engine = SnipsNLUEngine(config).fit(dataset)

        # When
        res_intents = engine.get_intents(input_text)

        # Then
        expected_intents = [
            intent_classification_result("greeting2", 0.6),
            intent_classification_result("greeting1", 0.5),
            intent_classification_result(None, 0.2)
        ]
        self.assertListEqual(expected_intents, res_intents)
Beispiel #6
0
    def test_nlu_config_from_dict(self):
        # Given
        config_dict = {
            "unit_name": "nlu_engine",
            "intent_parsers_configs": [
                DeterministicIntentParserConfig().to_dict(),
                ProbabilisticIntentParserConfig().to_dict()
            ]
        }

        # When
        config = NLUEngineConfig.from_dict(config_dict)
        serialized_config = config.to_dict()

        # Then
        self.assertDictEqual(config_dict, serialized_config)
Beispiel #7
0
    def test_should_use_parsers_sequentially(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: greeting1
utterances:
- hello [greeted:name](john)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        input_text = "hello snips"
        intent = intent_classification_result(intent_name='greeting1',
                                              probability=0.7)
        slots = [
            unresolved_slot(match_range=(6, 11),
                            value='snips',
                            entity='name',
                            slot_name='greeted')
        ]

        # pylint:disable=unused-variable
        @IntentParser.register("first_intent_parser", True)
        class FirstIntentParser(MockIntentParser):
            pass

        @IntentParser.register("second_intent_parser", True)
        class SecondIntentParser(MockIntentParser):
            def parse(self, text, intents=None, top_n=None):
                if text == input_text:
                    return parsing_result(text, intent, slots)
                return empty_result(text, 1.0)

        # pylint:enable=unused-variable

        config = NLUEngineConfig(
            ["first_intent_parser", "second_intent_parser"])
        engine = SnipsNLUEngine(config).fit(dataset)

        # When
        parse = engine.parse(input_text)

        # Then
        expected_slots = [custom_slot(s) for s in slots]
        expected_parse = parsing_result(input_text, intent, expected_slots)
        self.assertDictEqual(expected_parse, parse)
Beispiel #8
0
    def test_should_get_slots(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: greeting
utterances:
- hello [greeted:name](john)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        input_text = "hello snips"
        greeting_intent = "greeting"
        expected_slots = [
            unresolved_slot(match_range=(6, 11),
                            value="snips",
                            entity="name",
                            slot_name="greeted")
        ]

        # pylint:disable=unused-variable
        @IntentParser.register("first_intent_parser", True)
        class FirstIntentParser(MockIntentParser):
            pass

        @IntentParser.register("second_intent_parser", True)
        class SecondIntentParser(MockIntentParser):
            def get_slots(self, text, intent):
                if text == input_text and intent == greeting_intent:
                    return expected_slots
                return []

        # pylint:enable=unused-variable

        config = NLUEngineConfig(
            ["first_intent_parser", "second_intent_parser"])
        engine = SnipsNLUEngine(config).fit(dataset)

        # When
        res_slots = engine.get_slots(input_text, greeting_intent)

        # Then
        expected_slots = [custom_slot(s) for s in expected_slots]
        self.assertListEqual(expected_slots, res_slots)
    def test_synonyms_should_point_to_base_value(self):
        # Given
        dataset = {
            "intents": {
                "dummy_intent_1": {
                    "utterances": [
                        {
                            "data": [
                                {
                                    "text": "dummy_1",
                                    "entity": "dummy_entity_1",
                                    "slot_name": "dummy_slot_name"
                                }
                            ]
                        }
                    ]
                }
            },
            "entities": {
                "dummy_entity_1": {
                    "use_synonyms": True,
                    "automatically_extensible": False,
                    "data": [
                        {
                            "value": "dummy1",
                            "synonyms": [
                                "dummy1",
                                "dummy1_bis"
                            ]
                        }
                    ],
                    "matching_strictness": 1.0
                }
            },
            "language": "en"
        }
        mocked_intent = intent_classification_result("dummy_intent_1", 1.0)
        mocked_slots = [
            unresolved_slot(match_range=(0, 10), value="dummy1_bis",
                            entity="dummy_entity_1",
                            slot_name="dummy_slot_name")]

        # pylint:disable=unused-variable
        @IntentParser.register("my_intent_parser", True)
        class MyIntentParser(MockIntentParser):
            def parse(self, text, intents=None, top_n=None):
                return parsing_result(text, mocked_intent, mocked_slots)

        # pylint:enable=unused-variable

        input_ = "dummy1_bis"
        config = NLUEngineConfig(["my_intent_parser"])
        engine = SnipsNLUEngine(config).fit(dataset)

        # When
        result = engine.parse(input_)

        # Then
        expected_slot = {
            RES_MATCH_RANGE: {
                "start": 0,
                "end": 10
            },
            RES_RAW_VALUE: "dummy1_bis",
            RES_VALUE: {
                "kind": "Custom",
                "value": "dummy1"
            },
            RES_ENTITY: "dummy_entity_1",
            RES_SLOT_NAME: "dummy_slot_name"
        }
        expected_result = parsing_result(
            input_, mocked_intent, slots=[expected_slot])
        self.assertEqual(expected_result, result)
    def test_should_serialize_duplicated_intent_parsers(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups

---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

        # pylint:disable=unused-variable
        @IntentParser.register("my_intent_parser", True)
        class MyIntentParser(MockIntentParser):
            pass

        # pylint:enable=unused-variable

        parsers_configs = ["my_intent_parser", "my_intent_parser"]
        config = NLUEngineConfig(parsers_configs)
        engine = SnipsNLUEngine(config).fit(dataset)

        # When
        engine.persist(self.tmp_file_path)

        # Then
        expected_engine_dict = {
            "unit_name": "nlu_engine",
            "dataset_metadata": {
                "language_code": "en",
                "entities": {
                    "Temperature": {
                        "automatically_extensible": True,
                    }
                },
                "slot_name_mappings": {
                    "MakeCoffee": {
                        "number_of_cups": "snips/number"
                    },
                    "MakeTea": {
                        "beverage_temperature": "Temperature",
                        "number_of_cups": "snips/number"
                    }
                },
            },
            "config": {
                "unit_name": "nlu_engine",
                "intent_parsers_configs": [
                    {
                        "unit_name": "my_intent_parser"
                    },
                    {
                        "unit_name": "my_intent_parser"
                    }
                ]
            },
            "intent_parsers": [
                "my_intent_parser",
                "my_intent_parser_2"
            ],
            "builtin_entity_parser": "builtin_entity_parser",
            "custom_entity_parser": "custom_entity_parser",
            "model_version": snips_nlu.__model_version__,
            "training_package_version": snips_nlu.__version__
        }
        self.assertJsonContent(self.tmp_file_path / "nlu_engine.json",
                               expected_engine_dict)
        self.assertJsonContent(
            self.tmp_file_path / "my_intent_parser" / "metadata.json",
            {"unit_name": "my_intent_parser", "fitted": True})
        self.assertJsonContent(
            self.tmp_file_path / "my_intent_parser_2" / "metadata.json",
            {"unit_name": "my_intent_parser", "fitted": True})
Beispiel #11
0
    def test_should_retrain_only_non_trained_subunits(self):
        # Given
        class TestIntentParserConfig(ProcessingUnitConfig):
            unit_name = "test_intent_parser"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return TestIntentParserConfig()

            def get_required_resources(self):
                return None

        class TestIntentParser(IntentParser):
            unit_name = "test_intent_parser"
            config_type = TestIntentParserConfig

            def __init__(self, config):
                super(TestIntentParser, self).__init__(config)
                self.sub_unit_1 = dict(fitted=False, calls=0)
                self.sub_unit_2 = dict(fitted=False, calls=0)

            def fit(self, dataset, force_retrain):
                if force_retrain:
                    self.sub_unit_1["fitted"] = True
                    self.sub_unit_1["calls"] += 1
                    self.sub_unit_2["fitted"] = True
                    self.sub_unit_2["calls"] += 1
                else:
                    if not self.sub_unit_1["fitted"]:
                        self.sub_unit_1["fitted"] = True
                        self.sub_unit_1["calls"] += 1
                    if not self.sub_unit_2["fitted"]:
                        self.sub_unit_2["fitted"] = True
                        self.sub_unit_2["calls"] += 1

                return self

            @property
            def fitted(self):
                return self.sub_unit_1["fitted"] and \
                       self.sub_unit_2["fitted"]

            def parse(self, text, intents):
                return empty_result(text)

            def persist(self, path):
                path = Path(path)
                path.mkdir()
                with (path / "metadata.json").open(mode="w") as f:
                    f.write(json_string({"unit_name": self.unit_name}))

            @classmethod
            def from_path(cls, path):
                cfg = cls.config_type()
                return cls(cfg)

        register_processing_unit(TestIntentParser)

        intent_parser_config = TestIntentParserConfig()
        nlu_engine_config = NLUEngineConfig([intent_parser_config])
        nlu_engine = SnipsNLUEngine(nlu_engine_config)

        intent_parser = TestIntentParser(intent_parser_config)
        intent_parser.sub_unit_1.update(dict(fitted=True, calls=0))
        nlu_engine.intent_parsers.append(intent_parser)

        # When
        nlu_engine.fit(SAMPLE_DATASET, force_retrain=False)

        # Then
        self.assertDictEqual(dict(fitted=True, calls=0),
                             intent_parser.sub_unit_1)
        self.assertDictEqual(dict(fitted=True, calls=1),
                             intent_parser.sub_unit_2)
    def test_should_parse_top_intents(self):
        # Given
        text = "foo bar ban"
        dataset_stream = io.StringIO("""
---
type: intent
name: intent1
utterances:
  - foo [slot1:entity1](bak)
  
---
type: intent
name: intent2
utterances:
  - '[slot2:entity2](foo) baz'
  
---
type: intent
name: intent3
utterances:
  - foo bap""")

        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

        # pylint:disable=unused-variable
        @IntentParser.register("first_intent_parser", True)
        class FirstIntentParser(MockIntentParser):
            def get_intents(self, text):
                return [
                    intent_classification_result("intent1", 0.5),
                    intent_classification_result("intent2", 0.3),
                    intent_classification_result(None, 0.15),
                    intent_classification_result("intent3", 0.05)
                ]

            def get_slots(self, text, intent):
                if intent == "intent1":
                    return []
                if intent == "intent2":
                    return [
                        unresolved_slot((0, 3), "foo", "entity2", "slot2")
                    ]
                return []

        @IntentParser.register("second_intent_parser", True)
        class SecondIntentParser(MockIntentParser):
            def get_intents(self, text):
                return [
                    intent_classification_result("intent2", 0.6),
                    intent_classification_result("intent1", 0.2),
                    intent_classification_result(None, 0.15),
                    intent_classification_result("intent3", 0.05)
                ]

            def get_slots(self, text, intent):
                if intent == "intent1":
                    return [
                        unresolved_slot((0, 3), "foo", "entity1", "slot1")
                    ]
                if intent == "intent2":
                    return [
                        unresolved_slot((8, 11), "ban", "entity2", "slot2")
                    ]
                return []

        # pylint:enable=unused-variable

        config = NLUEngineConfig(
            ["first_intent_parser", "second_intent_parser"])
        nlu_engine = SnipsNLUEngine(config).fit(dataset)

        # When
        results = nlu_engine.parse(text, top_n=3)
        results_with_filter = nlu_engine.parse(
            text, intents=["intent1", "intent3"], top_n=3)

        # Then
        expected_results = [
            extraction_result(
                intent_classification_result("intent2", 0.6),
                [custom_slot(
                    unresolved_slot((0, 3), "foo", "entity2", "slot2"))]
            ),
            extraction_result(
                intent_classification_result("intent1", 0.5),
                [custom_slot(
                    unresolved_slot((0, 3), "foo", "entity1", "slot1"))]
            ),
            extraction_result(
                intent_classification_result(None, 0.15),
                []
            ),
        ]
        expected_results_with_filter = [
            extraction_result(
                intent_classification_result("intent1", 0.5),
                [custom_slot(
                    unresolved_slot((0, 3), "foo", "entity1", "slot1"))]
            ),
            extraction_result(
                intent_classification_result(None, 0.15),
                []
            ),
            extraction_result(
                intent_classification_result("intent3", 0.05),
                []
            ),
        ]
        self.assertListEqual(expected_results, results)
        self.assertListEqual(expected_results_with_filter, results_with_filter)
Beispiel #13
0
    def test_should_use_parsers_sequentially(self):
        # Given
        input_text = "hello world"
        intent = intent_classification_result(intent_name='dummy_intent_1',
                                              probability=0.7)
        slots = [
            unresolved_slot(match_range=(6, 11),
                            value='world',
                            entity='mocked_entity',
                            slot_name='mocked_slot_name')
        ]

        class FirstIntentParserConfig(ProcessingUnitConfig):
            unit_name = "first_intent_parser"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return FirstIntentParserConfig()

            def get_required_resources(self):
                return None

        class FirstIntentParser(IntentParser):
            unit_name = "first_intent_parser"
            config_type = FirstIntentParserConfig

            def fit(self, dataset, force_retrain):
                self._fitted = True
                return self

            @property
            def fitted(self):
                return hasattr(self, '_fitted') and self._fitted

            def parse(self, text, intents):
                return empty_result(text)

            def persist(self, path):
                path = Path(path)
                path.mkdir()
                with (path / "metadata.json").open(mode="w") as f:
                    f.write(json_string({"unit_name": self.unit_name}))

            @classmethod
            def from_path(cls, path):
                cfg = cls.config_type()
                return cls(cfg)

        class SecondIntentParserConfig(ProcessingUnitConfig):
            unit_name = "second_intent_parser"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return SecondIntentParserConfig()

            def get_required_resources(self):
                return None

        class SecondIntentParser(IntentParser):
            unit_name = "second_intent_parser"
            config_type = SecondIntentParserConfig

            def fit(self, dataset, force_retrain):
                self._fitted = True
                return self

            @property
            def fitted(self):
                return hasattr(self, '_fitted') and self._fitted

            def parse(self, text, intents):
                if text == input_text:
                    return parsing_result(text, intent, slots)
                return empty_result(text)

            def persist(self, path):
                path = Path(path)
                path.mkdir()
                with (path / "metadata.json").open(mode="w") as f:
                    f.write(json_string({"unit_name": self.unit_name}))

            @classmethod
            def from_path(cls, path):
                cfg = cls.config_type()
                return cls(cfg)

        register_processing_unit(FirstIntentParser)
        register_processing_unit(SecondIntentParser)

        mocked_dataset_metadata = {
            "language_code": "en",
            "entities": {
                "mocked_entity": {
                    "automatically_extensible": True,
                    "utterances": dict()
                }
            },
            "slot_name_mappings": {
                "dummy_intent_1": {
                    "mocked_slot_name": "mocked_entity"
                }
            }
        }

        config = NLUEngineConfig(
            [FirstIntentParserConfig(),
             SecondIntentParserConfig()])
        engine = SnipsNLUEngine(config).fit(SAMPLE_DATASET)
        # pylint:disable=protected-access
        engine._dataset_metadata = mocked_dataset_metadata
        # pylint:enable=protected-access

        # When
        parse = engine.parse(input_text)

        # Then
        expected_slots = [custom_slot(s) for s in slots]
        expected_parse = parsing_result(input_text, intent, expected_slots)
        self.assertDictEqual(expected_parse, parse)
Beispiel #14
0
    def test_should_be_deserializable_from_dir(self):
        # Given
        register_processing_unit(TestIntentParser1)
        register_processing_unit(TestIntentParser2)

        dataset_metadata = {
            "language_code": "en",
            "entities": {
                "Temperature": {
                    "automatically_extensible": True,
                    "utterances": {
                        "boiling": "hot",
                        "cold": "cold",
                        "hot": "hot",
                        "iced": "cold"
                    }
                }
            },
            "slot_name_mappings": {
                "MakeCoffee": {
                    "number_of_cups": "snips/number"
                },
                "MakeTea": {
                    "beverage_temperature": "Temperature",
                    "number_of_cups": "snips/number"
                }
            },
        }
        parser1_config = TestIntentParser1Config()
        parser2_config = TestIntentParser2Config()
        engine_config = NLUEngineConfig([parser1_config, parser2_config])
        engine_dict = {
            "unit_name": "nlu_engine",
            "dataset_metadata": dataset_metadata,
            "config": engine_config.to_dict(),
            "intent_parsers": [
                "test_intent_parser1",
                "test_intent_parser2",
            ],
            "model_version": snips_nlu.__model_version__,
            "training_package_version": snips_nlu.__version__
        }
        self.tmp_file_path.mkdir()
        parser1_path = self.tmp_file_path / "test_intent_parser1"
        parser1_path.mkdir()
        parser2_path = self.tmp_file_path / "test_intent_parser2"
        parser2_path.mkdir()
        (self.tmp_file_path / "resources").mkdir()
        self.writeJsonContent(self.tmp_file_path / "nlu_engine.json",
                              engine_dict)
        self.writeJsonContent(parser1_path / "metadata.json",
                              {"unit_name": "test_intent_parser1"})
        self.writeJsonContent(parser2_path / "metadata.json",
                              {"unit_name": "test_intent_parser2"})

        # When
        engine = SnipsNLUEngine.from_path(self.tmp_file_path)

        # Then
        parser1_config = TestIntentParser1Config()
        parser2_config = TestIntentParser2Config()
        expected_engine_config = NLUEngineConfig(
            [parser1_config, parser2_config]).to_dict()
        # pylint:disable=protected-access
        self.assertDictEqual(engine._dataset_metadata, dataset_metadata)
        # pylint:enable=protected-access
        self.assertDictEqual(engine.config.to_dict(), expected_engine_config)
Beispiel #15
0
    def test_synonyms_should_point_to_base_value(self, mocked_proba_parse):
        # Given
        dataset = {
            "intents": {
                "dummy_intent_1": {
                    "utterances": [{
                        "data": [{
                            "text": "dummy_1",
                            "entity": "dummy_entity_1",
                            "slot_name": "dummy_slot_name"
                        }]
                    }]
                }
            },
            "entities": {
                "dummy_entity_1": {
                    "use_synonyms":
                    True,
                    "automatically_extensible":
                    False,
                    "data": [{
                        "value": "dummy1",
                        "synonyms": ["dummy1", "dummy1_bis"]
                    }]
                }
            },
            "language": "en"
        }

        text = "dummy1_bis"
        mocked_proba_parser_intent = intent_classification_result(
            "dummy_intent_1", 1.0)
        mocked_proba_parser_slots = [
            unresolved_slot(match_range=(0, 10),
                            value="dummy1_bis",
                            entity="dummy_entity_1",
                            slot_name="dummy_slot_name")
        ]

        mocked_proba_parse.return_value = parsing_result(
            text, mocked_proba_parser_intent, mocked_proba_parser_slots)

        config = NLUEngineConfig([ProbabilisticIntentParserConfig()])
        engine = SnipsNLUEngine(config).fit(dataset)

        # When
        result = engine.parse(text)

        # Then
        expected_slot = {
            RES_MATCH_RANGE: {
                "start": 0,
                "end": 10
            },
            RES_RAW_VALUE: "dummy1_bis",
            RES_VALUE: {
                "kind": "Custom",
                "value": "dummy1"
            },
            RES_ENTITY: "dummy_entity_1",
            RES_SLOT_NAME: "dummy_slot_name"
        }
        expected_result = parsing_result(text,
                                         intent=mocked_proba_parser_intent,
                                         slots=[expected_slot])
        self.assertEqual(expected_result, result)
Beispiel #16
0
    def test_should_use_parsers_sequentially(self):
        # Given
        input_text = "hello world"
        intent = intent_classification_result(intent_name='dummy_intent_1',
                                              probability=0.7)
        slots = [
            unresolved_slot(match_range=(6, 11),
                            value='world',
                            entity='mocked_entity',
                            slot_name='mocked_slot_name')
        ]

        class TestIntentParser1Config(ProcessingUnitConfig):
            unit_name = "test_intent_parser1"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return TestIntentParser1Config()

        class TestIntentParser1(IntentParser):
            unit_name = "test_intent_parser1"
            config_type = TestIntentParser1Config

            def fit(self, dataset, force_retrain):
                self._fitted = True
                return self

            @property
            def fitted(self):
                return hasattr(self, '_fitted') and self._fitted

            def parse(self, text, intents):
                return empty_result(text)

            def to_dict(self):
                return {
                    "unit_name": self.unit_name,
                }

            @classmethod
            def from_dict(cls, unit_dict):
                conf = cls.config_type()
                return TestIntentParser1(conf)

        class TestIntentParser2Config(ProcessingUnitConfig):
            unit_name = "test_intent_parser2"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return TestIntentParser2Config()

        class TestIntentParser2(IntentParser):
            unit_name = "test_intent_parser2"
            config_type = TestIntentParser2Config

            def fit(self, dataset, force_retrain):
                self._fitted = True
                return self

            @property
            def fitted(self):
                return hasattr(self, '_fitted') and self._fitted

            def parse(self, text, intents):
                if text == input_text:
                    return parsing_result(text, intent, slots)
                return empty_result(text)

            def to_dict(self):
                return {
                    "unit_name": self.unit_name,
                }

            @classmethod
            def from_dict(cls, unit_dict):
                conf = cls.config_type()
                return TestIntentParser2(conf)

        register_processing_unit(TestIntentParser1)
        register_processing_unit(TestIntentParser2)

        mocked_dataset_metadata = {
            "language_code": "en",
            "entities": {
                "mocked_entity": {
                    "automatically_extensible": True,
                    "utterances": dict()
                }
            },
            "slot_name_mappings": {
                "dummy_intent_1": {
                    "mocked_slot_name": "mocked_entity"
                }
            }
        }

        config = NLUEngineConfig(
            [TestIntentParser1Config(),
             TestIntentParser2Config()])
        engine = SnipsNLUEngine(config).fit(SAMPLE_DATASET)
        # pylint:disable=protected-access
        engine._dataset_metadata = mocked_dataset_metadata
        # pylint:enable=protected-access

        # When
        parse = engine.parse(input_text)

        # Then
        expected_slots = [custom_slot(s) for s in slots]
        expected_parse = parsing_result(input_text, intent, expected_slots)
        self.assertDictEqual(expected_parse, parse)
Beispiel #17
0
    def test_synonyms_should_not_collide_when_remapped_to_base_value(
            self, mocked_proba_parse):
        # Given
        # Given
        dataset = {
            "intents": {
                "intent1": {
                    "utterances": [{
                        "data": [{
                            "text": "value",
                            "entity": "entity1",
                            "slot_name": "slot1"
                        }]
                    }]
                }
            },
            "entities": {
                "entity1": {
                    "data": [{
                        "value": "a",
                        "synonyms": ["favorïte"]
                    }, {
                        "value": "b",
                        "synonyms": ["favorite"]
                    }],
                    "use_synonyms":
                    True,
                    "automatically_extensible":
                    False
                }
            },
            "language": "en",
        }

        mocked_proba_parser_intent = intent_classification_result(
            "intent1", 1.0)

        def mock_proba_parse(text, intents):
            slots = [
                unresolved_slot(match_range=(0, len(text)),
                                value=text,
                                entity="entity1",
                                slot_name="slot1")
            ]
            return parsing_result(text, mocked_proba_parser_intent, slots)

        mocked_proba_parse.side_effect = mock_proba_parse

        config = NLUEngineConfig([ProbabilisticIntentParserConfig()])
        engine = SnipsNLUEngine(config).fit(dataset)

        # When
        result1 = engine.parse("favorite")
        result2 = engine.parse("favorïte")

        # Then
        expected_slot1 = {
            RES_MATCH_RANGE: {
                "start": 0,
                "end": 8
            },
            RES_RAW_VALUE: "favorite",
            RES_VALUE: {
                "kind": "Custom",
                "value": "b"
            },
            RES_ENTITY: "entity1",
            RES_SLOT_NAME: "slot1"
        }
        expected_slot2 = {
            RES_MATCH_RANGE: {
                "start": 0,
                "end": 8
            },
            RES_RAW_VALUE: "favorïte",
            RES_VALUE: {
                "kind": "Custom",
                "value": "a"
            },
            RES_ENTITY: "entity1",
            RES_SLOT_NAME: "slot1"
        }
        expected_result1 = parsing_result("favorite",
                                          intent=mocked_proba_parser_intent,
                                          slots=[expected_slot1])
        expected_result2 = parsing_result("favorïte",
                                          intent=mocked_proba_parser_intent,
                                          slots=[expected_slot2])
        self.assertEqual(expected_result1, result1)
        self.assertEqual(expected_result2, result2)
Beispiel #18
0
    def test_should_retrain_only_non_trained_subunits(self):
        # Given
        class TestIntentParserConfig(ProcessingUnitConfig):
            unit_name = "test_intent_parser"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return TestIntentParserConfig()

        class TestIntentParser(IntentParser):
            unit_name = "test_intent_parser"
            config_type = TestIntentParserConfig

            def __init__(self, config):
                super(TestIntentParser, self).__init__(config)
                self.sub_unit_1 = dict(fitted=False, calls=0)
                self.sub_unit_2 = dict(fitted=False, calls=0)

            def fit(self, dataset, force_retrain):
                if force_retrain:
                    self.sub_unit_1["fitted"] = True
                    self.sub_unit_1["calls"] += 1
                    self.sub_unit_2["fitted"] = True
                    self.sub_unit_2["calls"] += 1
                else:
                    if not self.sub_unit_1["fitted"]:
                        self.sub_unit_1["fitted"] = True
                        self.sub_unit_1["calls"] += 1
                    if not self.sub_unit_2["fitted"]:
                        self.sub_unit_2["fitted"] = True
                        self.sub_unit_2["calls"] += 1

                return self

            @property
            def fitted(self):
                return self.sub_unit_1["fitted"] and \
                       self.sub_unit_2["fitted"]

            def parse(self, text, intents):
                return empty_result(text)

            def to_dict(self):
                return {
                    "unit_name": self.unit_name,
                }

            @classmethod
            def from_dict(cls, unit_dict):
                conf = cls.config_type()
                return TestIntentParser(conf)

        register_processing_unit(TestIntentParser)

        intent_parser_config = TestIntentParserConfig()
        nlu_engine_config = NLUEngineConfig([intent_parser_config])
        nlu_engine = SnipsNLUEngine(nlu_engine_config)

        intent_parser = TestIntentParser(intent_parser_config)
        intent_parser.sub_unit_1.update(dict(fitted=True, calls=0))
        nlu_engine.intent_parsers.append(intent_parser)

        # When
        nlu_engine.fit(SAMPLE_DATASET, force_retrain=False)

        # Then
        self.assertDictEqual(dict(fitted=True, calls=0),
                             intent_parser.sub_unit_1)
        self.assertDictEqual(dict(fitted=True, calls=1),
                             intent_parser.sub_unit_2)
    def test_synonyms_should_not_collide_when_remapped_to_base_value(self):
        # Given
        dataset = {
            "intents": {
                "intent1": {
                    "utterances": [
                        {
                            "data": [
                                {
                                    "text": "value",
                                    "entity": "entity1",
                                    "slot_name": "slot1"
                                }
                            ]
                        }
                    ]
                }
            },
            "entities": {
                "entity1": {
                    "data": [
                        {
                            "value": "a",
                            "synonyms": [
                                "favorïte"
                            ]
                        },
                        {
                            "value": "b",
                            "synonyms": [
                                "favorite"
                            ]
                        }
                    ],
                    "use_synonyms": True,
                    "automatically_extensible": False,
                    "matching_strictness": 1.0
                }
            },
            "language": "en",
        }

        mocked_intent = intent_classification_result("intent1", 1.0)

        # pylint:disable=unused-variable
        @IntentParser.register("my_intent_parser", True)
        class MyIntentParser(MockIntentParser):
            def parse(self, text, intents=None, top_n=None):
                slots = [
                    unresolved_slot(match_range=(0, len(text)), value=text,
                                    entity="entity1", slot_name="slot1")]
                return parsing_result(text, mocked_intent, slots)

        # pylint:enable=unused-variable

        config = NLUEngineConfig(["my_intent_parser"])
        engine = SnipsNLUEngine(config).fit(dataset)

        # When
        result1 = engine.parse("favorite")
        result2 = engine.parse("favorïte")

        # Then
        expected_slot1 = {
            RES_MATCH_RANGE: {
                "start": 0,
                "end": 8
            },
            RES_RAW_VALUE: "favorite",
            RES_VALUE: {
                "kind": "Custom",
                "value": "b"
            },
            RES_ENTITY: "entity1",
            RES_SLOT_NAME: "slot1"
        }
        expected_slot2 = {
            RES_MATCH_RANGE: {
                "start": 0,
                "end": 8
            },
            RES_RAW_VALUE: "favorïte",
            RES_VALUE: {
                "kind": "Custom",
                "value": "a"
            },
            RES_ENTITY: "entity1",
            RES_SLOT_NAME: "slot1"
        }
        expected_result1 = parsing_result("favorite", intent=mocked_intent,
                                          slots=[expected_slot1])
        expected_result2 = parsing_result("favorïte", intent=mocked_intent,
                                          slots=[expected_slot2])
        self.assertEqual(expected_result1, result1)
        self.assertEqual(expected_result2, result2)
Beispiel #20
0
    def test_should_be_deserializable(self):
        # When
        class TestIntentParser1Config(ProcessingUnitConfig):
            unit_name = "test_intent_parser1"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return TestIntentParser1Config()

        class TestIntentParser1(IntentParser):
            unit_name = "test_intent_parser1"
            config_type = TestIntentParser1Config

            def fit(self, dataset, force_retrain):
                self._fitted = True
                return self

            @property
            def fitted(self):
                return hasattr(self, '_fitted') and self._fitted

            def parse(self, text, intents):
                return empty_result(text)

            def to_dict(self):
                return {
                    "unit_name": self.unit_name,
                }

            @classmethod
            def from_dict(cls, unit_dict):
                config = cls.config_type()
                return TestIntentParser1(config)

        class TestIntentParser2Config(ProcessingUnitConfig):
            unit_name = "test_intent_parser2"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return TestIntentParser2Config()

        class TestIntentParser2(IntentParser):
            unit_name = "test_intent_parser2"
            config_type = TestIntentParser2Config

            def fit(self, dataset, force_retrain):
                self._fitted = True
                return self

            @property
            def fitted(self):
                return hasattr(self, '_fitted') and self._fitted

            def parse(self, text, intents):
                return empty_result(text)

            def to_dict(self):
                return {
                    "unit_name": self.unit_name,
                }

            @classmethod
            def from_dict(cls, unit_dict):
                config = cls.config_type()
                return TestIntentParser2(config)

        register_processing_unit(TestIntentParser1)
        register_processing_unit(TestIntentParser2)

        dataset_metadata = {
            "language_code": "en",
            "entities": {
                "Temperature": {
                    "automatically_extensible": True,
                    "utterances": {
                        "boiling": "hot",
                        "cold": "cold",
                        "hot": "hot",
                        "iced": "cold"
                    }
                }
            },
            "slot_name_mappings": {
                "MakeCoffee": {
                    "number_of_cups": "snips/number"
                },
                "MakeTea": {
                    "beverage_temperature": "Temperature",
                    "number_of_cups": "snips/number"
                }
            },
        }
        parser1_config = TestIntentParser1Config()
        parser2_config = TestIntentParser2Config()
        engine_config = NLUEngineConfig([parser1_config, parser2_config])
        engine_dict = {
            "unit_name":
            "nlu_engine",
            "dataset_metadata":
            dataset_metadata,
            "config":
            engine_config.to_dict(),
            "intent_parsers": [
                {
                    "unit_name": "test_intent_parser1"
                },
                {
                    "unit_name": "test_intent_parser2"
                },
            ],
            "model_version":
            snips_nlu.__model_version__,
            "training_package_version":
            snips_nlu.__version__
        }
        engine = SnipsNLUEngine.from_dict(engine_dict)

        # Then
        parser1_config = TestIntentParser1Config()
        parser2_config = TestIntentParser2Config()
        expected_engine_config = NLUEngineConfig(
            [parser1_config, parser2_config]).to_dict()
        # pylint:disable=protected-access
        self.assertDictEqual(engine._dataset_metadata, dataset_metadata)
        # pylint:enable=protected-access
        self.assertDictEqual(engine.config.to_dict(), expected_engine_config)
Beispiel #21
0
    def test_should_be_deserializable(self):
        # When
        class TestIntentParser1Config(ProcessingUnitConfig):
            unit_name = "test_intent_parser1"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return TestIntentParser1Config()

        class TestIntentParser1(IntentParser):
            unit_name = "test_intent_parser1"
            config_type = TestIntentParser1Config

            def fit(self, dataset, force_retrain):
                self._fitted = True
                return self

            @property
            def fitted(self):
                return hasattr(self, '_fitted') and self._fitted

            def parse(self, text, intents):
                return empty_result(text)

            def to_dict(self):
                return {
                    "unit_name": self.unit_name,
                }

            @classmethod
            def from_dict(cls, unit_dict):
                config = cls.config_type()
                return TestIntentParser1(config)

        class TestIntentParser2Config(ProcessingUnitConfig):
            unit_name = "test_intent_parser2"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return TestIntentParser2Config()

        class TestIntentParser2(IntentParser):
            unit_name = "test_intent_parser2"
            config_type = TestIntentParser2Config

            def fit(self, dataset, force_retrain):
                self._fitted = True
                return self

            @property
            def fitted(self):
                return hasattr(self, '_fitted') and self._fitted

            def parse(self, text, intents):
                return empty_result(text)

            def to_dict(self):
                return {
                    "unit_name": self.unit_name,
                }

            @classmethod
            def from_dict(cls, unit_dict):
                config = cls.config_type()
                return TestIntentParser2(config)

        register_processing_unit(TestIntentParser1)
        register_processing_unit(TestIntentParser2)

        dataset_metadata = {
            "language_code": "en",
            "entities": {
                "Temperature": {
                    "automatically_extensible": True,
                    "utterances": {
                        "boiling": "hot",
                        "cold": "cold",
                        "hot": "hot",
                        "iced": "cold"
                    }
                }
            },
            "slot_name_mappings": {
                "MakeCoffee": {
                    "number_of_cups": "snips/number"
                },
                "MakeTea": {
                    "beverage_temperature": "Temperature",
                    "number_of_cups": "snips/number"
                }
            },
        }
        parser1_config = TestIntentParser1Config()
        parser2_config = TestIntentParser2Config()
        engine_config = NLUEngineConfig([parser1_config, parser2_config])
        engine_dict = {
            "unit_name": "nlu_engine",
            "dataset_metadata": dataset_metadata,
            "config": engine_config.to_dict(),
            "intent_parsers": [
                {"unit_name": "test_intent_parser1"},
                {"unit_name": "test_intent_parser2"},
            ],
            "model_version": snips_nlu.version.__model_version__,
            "training_package_version": snips_nlu.__version__
        }
        engine = SnipsNLUEngine.from_dict(engine_dict)

        # Then
        parser1_config = TestIntentParser1Config()
        parser2_config = TestIntentParser2Config()
        expected_engine_config = NLUEngineConfig(
            [parser1_config, parser2_config]).to_dict()
        # pylint:disable=protected-access
        self.assertDictEqual(engine._dataset_metadata, dataset_metadata)
        # pylint:enable=protected-access
        self.assertDictEqual(engine.config.to_dict(), expected_engine_config)