def test_fit_transform(self, mocked_preprocess):
        t = "a b c d e f"
        u = text_to_utterance(t)
        builtin_ents = [
            {
                "value": "e",
                "resolved_value": "e",
                "range": {
                    "start": 8,
                    "end": 9
                },
                "entity_kind": "the_snips_e_entity"
            }
        ]
        custom_ents = [
            {
                "value": "c",
                "resolved_value": "c",
                "range": {
                    "start": 4,
                    "end": 5
                },
                "entity_kind": "the_c_entity"
            }
        ]
        mocked_preprocess.return_value = [u], [builtin_ents], [custom_ents]

        config = CooccurrenceVectorizerConfig(
            window_size=3,
            unknown_words_replacement_string="b",
            filter_stop_words=False
        )

        dataset = get_empty_dataset("en")

        builtin_parser = EntityParserMock({t: builtin_ents})
        custom_parser = EntityParserMock({t: custom_ents})
        resources = {STOP_WORDS: set()}
        vectorizer1 = CooccurrenceVectorizer(
            config, builtin_entity_parser=builtin_parser,
            custom_entity_parser=custom_parser, resources=resources)
        vectorizer2 = CooccurrenceVectorizer(
            config, builtin_entity_parser=builtin_parser,
            custom_entity_parser=custom_parser, resources=resources)

        # When
        x = [u]
        x_0 = vectorizer1.fit(x, dataset).transform(x).todense().tolist()
        x_1 = vectorizer2.fit_transform(x, dataset).todense().tolist()

        # Then
        self.assertListEqual(x_0, x_1)
    def test_should_create_number_variation(self):
        # Given
        args = {
            1: {
                "numbers": True,
                "and_": True,
                "case": True,
                "punctuation": True,
            },
            1001: {
                "numbers": False,
                "and_": True,
                "case": True,
                "punctuation": True,
            },
            10001: {
                "numbers": False,
                "and_": False,
                "case": False,
                "punctuation": False,
            }
        }

        for num_ents, expected_args in iteritems(args):
            entity = {
                "matching_strictness":
                1.0,
                "use_synonyms":
                False,
                "automatically_extensible":
                False,
                "data": [{
                    "value": str(i),
                    "synonyms": []
                } for i in range(num_ents)]
            }
            builtin_entity_parser = EntityParserMock(dict())
            with patch("snips_nlu.dataset.validation"
                       ".get_string_variations") as mocked_string_variations:
                mocked_string_variations.return_value = []
                # When
                _validate_and_format_custom_entity(entity, [], "en",
                                                   builtin_entity_parser)
                # Then
                for call in mocked_string_variations.mock_calls:
                    kwargs = call[2]
                    for k in expected_args:
                        self.assertEqual(expected_args[k], kwargs[k])
Example #3
0
    def test_transform(self):
        # Given
        config = CooccurrenceVectorizerConfig(
            filter_stop_words=True,
            window_size=3,
            unknown_words_replacement_string="d")

        t_0 = "yo a b c d e f yo"
        t_1 = "yo a b c d e"
        u_0 = text_to_utterance(t_0)
        u_1 = text_to_utterance(t_1)

        resources = {STOP_WORDS: {"b"}}

        builtin_ents = [{
            "value": "e",
            "resolved_value": "e",
            "range": {
                "start": 11,
                "end": 12
            },
            "entity_kind": "the_snips_e_entity"
        }]
        custom_ents = [{
            "value": "c",
            "resolved_value": "c",
            "range": {
                "start": 7,
                "end": 8
            },
            "entity_kind": "the_c_entity"
        }]

        builtin_parser = EntityParserMock({
            t_0: builtin_ents,
            t_1: builtin_ents
        })
        custom_parser = EntityParserMock({t_0: custom_ents, t_1: custom_ents})

        vectorizer = CooccurrenceVectorizer(
            config,
            builtin_entity_parser=builtin_parser,
            custom_entity_parser=custom_parser,
            resources=resources)

        vectorizer._language = "en"
        vectorizer._word_pairs = {
            ("THE_SNIPS_E_ENTITY", "f"): 0,
            ("a", "THE_C_ENTITY"): 1,
            ("a", "THE_SNIPS_E_ENTITY"): 2,
            ("b", "THE_SNIPS_E_ENTITY"): 3,
            ("yo", "yo"): 4,
            ("d", "THE_SNIPS_E_ENTITY"): 5
        }

        data = [u_0, u_1]

        # When
        x = vectorizer.transform(data)

        # Then
        expected = [[1, 1, 1, 0, 0, 0], [0, 1, 1, 0, 0, 0]]
        self.assertEqual(expected, x.todense().tolist())
Example #4
0
    def test_should_be_deserializable(self):
        # Given
        parser_dict = {
            "config": {
                "unit_name": "lookup_intent_parser",
                "ignore_stop_words": True
            },
            "language_code": "en",
            "map": {
                hash_str("make coffee"): [0, []],
                hash_str("prepare % snipsnumber % coffees"): [0, [0]],
                hash_str("% snipsnumber % teas at % snipstemperature %"):
                    [1, [0, 1]],
            },
            "slots_names": ["nb_cups", "tea_temperature"],
            "intents_names": ["MakeCoffee", "MakeTea"],
            "entity_scopes": [
                {
                    "entity_scope": {
                        "builtin": ["snips/number"],
                        "custom": [],
                    },
                    "intent_group": ["MakeCoffee"]
                },
                {
                    "entity_scope": {
                        "builtin": ["snips/number", "snips/temperature"],
                        "custom": [],
                    },
                    "intent_group": ["MakeTea"]
                },
            ],
            "stop_words_whitelist": dict()
        }
        self.tmp_file_path.mkdir()
        metadata = {"unit_name": "lookup_intent_parser"}
        self.writeJsonContent(
            self.tmp_file_path / "intent_parser.json", parser_dict)
        self.writeJsonContent(self.tmp_file_path / "metadata.json", metadata)
        resources = self.get_resources("en")
        builtin_entity_parser = BuiltinEntityParser.build(language="en")
        custom_entity_parser = EntityParserMock()

        # When
        parser = LookupIntentParser.from_path(
            self.tmp_file_path, custom_entity_parser=custom_entity_parser,
            builtin_entity_parser=builtin_entity_parser,
            resources=resources)
        res_make_coffee = parser.parse("make me a coffee")
        res_make_tea = parser.parse("two teas at 90°C please")

        # Then
        expected_result_coffee = parsing_result(
            input="make me a coffee",
            intent=intent_classification_result("MakeCoffee", 1.0),
            slots=[])
        expected_result_tea = parsing_result(
            input="two teas at 90°C please",
            intent=intent_classification_result("MakeTea", 1.0),
            slots=[
                {
                    "entity": "snips/number",
                    "range": {"end": 3, "start": 0},
                    "slotName": "nb_cups",
                    "value": "two"
                },
                {
                    "entity": "snips/temperature",
                    "range": {"end": 16, "start": 12},
                    "slotName": "tea_temperature",
                    "value": "90°C"
                }
            ])
        self.assertEqual(expected_result_coffee, res_make_coffee)
        self.assertEqual(expected_result_tea, res_make_tea)