def test_nlu_engine_should_raise_error_with_bytes_input(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me a [beverage_temperature:Temperature](hot) cup of tea - make me [number_of_cups:snips/number](five) tea cups --- type: intent name: MakeCoffee utterances: - make me [number_of_cups:snips/number](one) cup of coffee please - brew [number_of_cups] cups of coffee""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json bytes_input = b"brew me an espresso" # pylint:disable=unused-variable @IntentParser.register("my_intent_parser", True) class MyIntentParser(MockIntentParser): pass # pylint:enable=unused-variable config = NLUEngineConfig(["my_intent_parser"]) engine = SnipsNLUEngine(config).fit(dataset) # When / Then with self.assertRaises(InvalidInputError) as cm: engine.parse(bytes_input) message = str(cm.exception.args[0]) self.assertTrue("Expected unicode but received" in message)
def test_should_fit_and_parse_empty_intent(self): # Given dataset = { "intents": { "dummy_intent": { "utterances": [ { "data": [ { "text": " " } ] } ] } }, "language": "en", "entities": dict() } engine = SnipsNLUEngine() # When / Then engine.fit(dataset) engine.parse("ya", intents=["dummy_intent"])
def test_should_not_parse_slots_when_not_fitted(self): # Given engine = SnipsNLUEngine() # When / Then self.assertFalse(engine.fitted) with self.assertRaises(NotTrained): engine.parse("foobar")
def test_nlu_engine_should_raise_error_with_bytes_input(self): # Given bytes_input = b"brew me an espresso" engine = SnipsNLUEngine().fit(BEVERAGE_DATASET) # When / Then with self.assertRaises(TypeError) as cm: engine.parse(bytes_input) message = str(cm.exception.args[0]) self.assertTrue("Expected unicode but received" in message)
def test_nlu_engine_should_train_and_parse_in_all_languages(self): # Given text = "brew me an espresso" for language in get_all_languages(): dataset = deepcopy(BEVERAGE_DATASET) dataset[LANGUAGE] = language engine = SnipsNLUEngine() # When / Then msg = "Could not fit engine in '%s'" % language with self.fail_if_exception(msg): engine = engine.fit(dataset) msg = "Could not parse in '%s'" % language with self.fail_if_exception(msg): engine.parse(text)
def test_nlu_engine_should_train_and_parse_in_all_languages(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me a [beverage_temperature:Temperature](hot) cup of tea - make me [number_of_cups:snips/number](five) tea cups - i want [number_of_cups] cups of [beverage_temperature](boiling hot) tea pls - can you prepare [number_of_cups] cup of [beverage_temperature](cold) tea ? --- type: intent name: MakeCoffee utterances: - make me [number_of_cups:snips/number](one) cup of coffee please - brew [number_of_cups] cups of coffee - can you prepare [number_of_cups] cup of coffee""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json text = "please brew me a cup of coffee" for language in get_all_languages(): dataset[LANGUAGE] = language engine = SnipsNLUEngine() # When / Then msg = "Could not fit engine in '%s'" % language with self.fail_if_exception(msg): engine = engine.fit(dataset) msg = "Could not parse in '%s'" % language with self.fail_if_exception(msg): res = engine.parse(text) self.assertEqual("MakeCoffee", res[RES_INTENT][RES_INTENT_NAME])
def test_should_handle_empty_dataset(self): # Given dataset = validate_and_format_dataset(get_empty_dataset(LANGUAGE_EN)) engine = SnipsNLUEngine().fit(dataset) # When result = engine.parse("hello world") # Then self.assertEqual(empty_result("hello world"), result)
def test_should_handle_empty_dataset(self): # Given dataset = get_empty_dataset(LANGUAGE_EN) shared = self.get_shared_data(dataset) engine = SnipsNLUEngine(**shared).fit(dataset) # When result = engine.parse("hello world") # Then self.assertEqual(empty_result("hello world", 1.0), result)
def test_should_use_parsers_sequentially(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: greeting1 utterances: - hello [greeted:name](john)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json input_text = "hello snips" intent = intent_classification_result(intent_name='greeting1', probability=0.7) slots = [ unresolved_slot(match_range=(6, 11), value='snips', entity='name', slot_name='greeted') ] # pylint:disable=unused-variable @IntentParser.register("first_intent_parser", True) class FirstIntentParser(MockIntentParser): pass @IntentParser.register("second_intent_parser", True) class SecondIntentParser(MockIntentParser): def parse(self, text, intents=None, top_n=None): if text == input_text: return parsing_result(text, intent, slots) return empty_result(text, 1.0) # pylint:enable=unused-variable config = NLUEngineConfig( ["first_intent_parser", "second_intent_parser"]) engine = SnipsNLUEngine(config).fit(dataset) # When parse = engine.parse(input_text) # Then expected_slots = [custom_slot(s) for s in slots] expected_parse = parsing_result(input_text, intent, expected_slots) self.assertDictEqual(expected_parse, parse)
def test_should_use_parsers_sequentially(self): # Given input_text = "hello world" intent = intent_classification_result(intent_name='dummy_intent_1', probability=0.7) slots = [ unresolved_slot(match_range=(6, 11), value='world', entity='mocked_entity', slot_name='mocked_slot_name') ] class TestIntentParser1Config(ProcessingUnitConfig): unit_name = "test_intent_parser1" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return TestIntentParser1Config() class TestIntentParser1(IntentParser): unit_name = "test_intent_parser1" config_type = TestIntentParser1Config def fit(self, dataset, force_retrain): self._fitted = True return self @property def fitted(self): return hasattr(self, '_fitted') and self._fitted def parse(self, text, intents): return empty_result(text) def to_dict(self): return { "unit_name": self.unit_name, } @classmethod def from_dict(cls, unit_dict): conf = cls.config_type() return TestIntentParser1(conf) class TestIntentParser2Config(ProcessingUnitConfig): unit_name = "test_intent_parser2" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return TestIntentParser2Config() class TestIntentParser2(IntentParser): unit_name = "test_intent_parser2" config_type = TestIntentParser2Config def fit(self, dataset, force_retrain): self._fitted = True return self @property def fitted(self): return hasattr(self, '_fitted') and self._fitted def parse(self, text, intents): if text == input_text: return parsing_result(text, intent, slots) return empty_result(text) def to_dict(self): return { "unit_name": self.unit_name, } @classmethod def from_dict(cls, unit_dict): conf = cls.config_type() return TestIntentParser2(conf) register_processing_unit(TestIntentParser1) register_processing_unit(TestIntentParser2) mocked_dataset_metadata = { "language_code": "en", "entities": { "mocked_entity": { "automatically_extensible": True, "utterances": dict() } }, "slot_name_mappings": { "dummy_intent_1": { "mocked_slot_name": "mocked_entity" } } } config = NLUEngineConfig( [TestIntentParser1Config(), TestIntentParser2Config()]) engine = SnipsNLUEngine(config).fit(SAMPLE_DATASET) # pylint:disable=protected-access engine._dataset_metadata = mocked_dataset_metadata # pylint:enable=protected-access # When parse = engine.parse(input_text) # Then expected_slots = [custom_slot(s) for s in slots] expected_parse = parsing_result(input_text, intent, expected_slots) self.assertDictEqual(expected_parse, parse)
def test_synonyms_should_point_to_base_value(self): # Given dataset = { "intents": { "dummy_intent_1": { "utterances": [ { "data": [ { "text": "dummy_1", "entity": "dummy_entity_1", "slot_name": "dummy_slot_name" } ] } ] } }, "entities": { "dummy_entity_1": { "use_synonyms": True, "automatically_extensible": False, "data": [ { "value": "dummy1", "synonyms": [ "dummy1", "dummy1_bis" ] } ], "matching_strictness": 1.0 } }, "language": "en" } mocked_intent = intent_classification_result("dummy_intent_1", 1.0) mocked_slots = [ unresolved_slot(match_range=(0, 10), value="dummy1_bis", entity="dummy_entity_1", slot_name="dummy_slot_name")] # pylint:disable=unused-variable @IntentParser.register("my_intent_parser", True) class MyIntentParser(MockIntentParser): def parse(self, text, intents=None, top_n=None): return parsing_result(text, mocked_intent, mocked_slots) # pylint:enable=unused-variable input_ = "dummy1_bis" config = NLUEngineConfig(["my_intent_parser"]) engine = SnipsNLUEngine(config).fit(dataset) # When result = engine.parse(input_) # Then expected_slot = { RES_MATCH_RANGE: { "start": 0, "end": 10 }, RES_RAW_VALUE: "dummy1_bis", RES_VALUE: { "kind": "Custom", "value": "dummy1" }, RES_ENTITY: "dummy_entity_1", RES_SLOT_NAME: "dummy_slot_name" } expected_result = parsing_result( input_, mocked_intent, slots=[expected_slot]) self.assertEqual(expected_result, result)
def test_should_use_parsers_sequentially(self): # Given input_text = "hello world" intent = intent_classification_result( intent_name='dummy_intent_1', probability=0.7) slots = [unresolved_slot(match_range=(6, 11), value='world', entity='mocked_entity', slot_name='mocked_slot_name')] class TestIntentParser1Config(ProcessingUnitConfig): unit_name = "test_intent_parser1" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return TestIntentParser1Config() class TestIntentParser1(IntentParser): unit_name = "test_intent_parser1" config_type = TestIntentParser1Config def fit(self, dataset, force_retrain): self._fitted = True return self @property def fitted(self): return hasattr(self, '_fitted') and self._fitted def parse(self, text, intents): return empty_result(text) def to_dict(self): return { "unit_name": self.unit_name, } @classmethod def from_dict(cls, unit_dict): conf = cls.config_type() return TestIntentParser1(conf) class TestIntentParser2Config(ProcessingUnitConfig): unit_name = "test_intent_parser2" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return TestIntentParser2Config() class TestIntentParser2(IntentParser): unit_name = "test_intent_parser2" config_type = TestIntentParser2Config def fit(self, dataset, force_retrain): self._fitted = True return self @property def fitted(self): return hasattr(self, '_fitted') and self._fitted def parse(self, text, intents): if text == input_text: return parsing_result(text, intent, slots) return empty_result(text) def to_dict(self): return { "unit_name": self.unit_name, } @classmethod def from_dict(cls, unit_dict): conf = cls.config_type() return TestIntentParser2(conf) register_processing_unit(TestIntentParser1) register_processing_unit(TestIntentParser2) mocked_dataset_metadata = { "language_code": "en", "entities": { "mocked_entity": { "automatically_extensible": True, "utterances": dict() } }, "slot_name_mappings": { "dummy_intent_1": { "mocked_slot_name": "mocked_entity" } } } config = NLUEngineConfig([TestIntentParser1Config(), TestIntentParser2Config()]) engine = SnipsNLUEngine(config).fit(SAMPLE_DATASET) # pylint:disable=protected-access engine._dataset_metadata = mocked_dataset_metadata # pylint:enable=protected-access # When parse = engine.parse(input_text) # Then expected_slots = [custom_slot(s) for s in slots] expected_parse = parsing_result(input_text, intent, expected_slots) self.assertDictEqual(expected_parse, parse)
def test_synonyms_should_not_collide_when_remapped_to_base_value( self, mocked_proba_parse): # Given # Given dataset = { "intents": { "intent1": { "utterances": [{ "data": [{ "text": "value", "entity": "entity1", "slot_name": "slot1" }] }] } }, "entities": { "entity1": { "data": [{ "value": "a", "synonyms": ["favorïte"] }, { "value": "b", "synonyms": ["favorite"] }], "use_synonyms": True, "automatically_extensible": False } }, "language": "en", } mocked_proba_parser_intent = intent_classification_result( "intent1", 1.0) def mock_proba_parse(text, intents): slots = [ unresolved_slot(match_range=(0, len(text)), value=text, entity="entity1", slot_name="slot1") ] return parsing_result(text, mocked_proba_parser_intent, slots) mocked_proba_parse.side_effect = mock_proba_parse config = NLUEngineConfig([ProbabilisticIntentParserConfig()]) engine = SnipsNLUEngine(config).fit(dataset) # When result1 = engine.parse("favorite") result2 = engine.parse("favorïte") # Then expected_slot1 = { RES_MATCH_RANGE: { "start": 0, "end": 8 }, RES_RAW_VALUE: "favorite", RES_VALUE: { "kind": "Custom", "value": "b" }, RES_ENTITY: "entity1", RES_SLOT_NAME: "slot1" } expected_slot2 = { RES_MATCH_RANGE: { "start": 0, "end": 8 }, RES_RAW_VALUE: "favorïte", RES_VALUE: { "kind": "Custom", "value": "a" }, RES_ENTITY: "entity1", RES_SLOT_NAME: "slot1" } expected_result1 = parsing_result("favorite", intent=mocked_proba_parser_intent, slots=[expected_slot1]) expected_result2 = parsing_result("favorïte", intent=mocked_proba_parser_intent, slots=[expected_slot2]) self.assertEqual(expected_result1, result1) self.assertEqual(expected_result2, result2)
def test_should_use_parsers_sequentially(self): # Given input_text = "hello world" intent = intent_classification_result(intent_name='dummy_intent_1', probability=0.7) slots = [ unresolved_slot(match_range=(6, 11), value='world', entity='mocked_entity', slot_name='mocked_slot_name') ] class FirstIntentParserConfig(ProcessingUnitConfig): unit_name = "first_intent_parser" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return FirstIntentParserConfig() def get_required_resources(self): return None class FirstIntentParser(IntentParser): unit_name = "first_intent_parser" config_type = FirstIntentParserConfig def fit(self, dataset, force_retrain): self._fitted = True return self @property def fitted(self): return hasattr(self, '_fitted') and self._fitted def parse(self, text, intents): return empty_result(text) def persist(self, path): path = Path(path) path.mkdir() with (path / "metadata.json").open(mode="w") as f: f.write(json_string({"unit_name": self.unit_name})) @classmethod def from_path(cls, path): cfg = cls.config_type() return cls(cfg) class SecondIntentParserConfig(ProcessingUnitConfig): unit_name = "second_intent_parser" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return SecondIntentParserConfig() def get_required_resources(self): return None class SecondIntentParser(IntentParser): unit_name = "second_intent_parser" config_type = SecondIntentParserConfig def fit(self, dataset, force_retrain): self._fitted = True return self @property def fitted(self): return hasattr(self, '_fitted') and self._fitted def parse(self, text, intents): if text == input_text: return parsing_result(text, intent, slots) return empty_result(text) def persist(self, path): path = Path(path) path.mkdir() with (path / "metadata.json").open(mode="w") as f: f.write(json_string({"unit_name": self.unit_name})) @classmethod def from_path(cls, path): cfg = cls.config_type() return cls(cfg) register_processing_unit(FirstIntentParser) register_processing_unit(SecondIntentParser) mocked_dataset_metadata = { "language_code": "en", "entities": { "mocked_entity": { "automatically_extensible": True, "utterances": dict() } }, "slot_name_mappings": { "dummy_intent_1": { "mocked_slot_name": "mocked_entity" } } } config = NLUEngineConfig( [FirstIntentParserConfig(), SecondIntentParserConfig()]) engine = SnipsNLUEngine(config).fit(SAMPLE_DATASET) # pylint:disable=protected-access engine._dataset_metadata = mocked_dataset_metadata # pylint:enable=protected-access # When parse = engine.parse(input_text) # Then expected_slots = [custom_slot(s) for s in slots] expected_parse = parsing_result(input_text, intent, expected_slots) self.assertDictEqual(expected_parse, parse)
def test_should_handle_keyword_entities(self, mocked_regex_parse, mocked_crf_parse): # Given dataset = { "snips_nlu_version": "1.1.1", "intents": { "dummy_intent_1": { "utterances": [ { "data": [ { "text": "dummy_1", "entity": "dummy_entity_1", "slot_name": "dummy_slot_name" }, { "text": " dummy_2", "entity": "dummy_entity_2", "slot_name": "other_dummy_slot_name" } ] } ] } }, "entities": { "dummy_entity_1": { "use_synonyms": True, "automatically_extensible": False, "data": [ { "value": "dummy1", "synonyms": [ "dummy1", "dummy1_bis" ] }, { "value": "dummy2", "synonyms": [ "dummy2", "dummy2_bis" ] } ] }, "dummy_entity_2": { "use_synonyms": False, "automatically_extensible": True, "data": [ { "value": "dummy2", "synonyms": [ "dummy2" ] } ] } }, "language": "en" } text = "dummy_3 dummy_4" mocked_crf_intent = intent_classification_result("dummy_intent_1", 1.0) mocked_crf_slots = [unresolved_slot(match_range=(0, 7), value="dummy_3", entity="dummy_entity_1", slot_name="dummy_slot_name"), unresolved_slot(match_range=(8, 15), value="dummy_4", entity="dummy_entity_2", slot_name="other_dummy_slot_name")] mocked_regex_parse.return_value = empty_result(text) mocked_crf_parse.return_value = parsing_result( text, mocked_crf_intent, mocked_crf_slots) engine = SnipsNLUEngine() # When engine = engine.fit(dataset) result = engine.parse(text) # Then expected_slot = custom_slot(unresolved_slot( match_range=(8, 15), value="dummy_4", entity="dummy_entity_2", slot_name="other_dummy_slot_name")) expected_result = parsing_result(text, intent=mocked_crf_intent, slots=[expected_slot]) self.assertEqual(expected_result, result)
def test_should_handle_keyword_entities(self, mocked_regex_parse, mocked_crf_parse): # Given dataset = { "snips_nlu_version": "1.1.1", "intents": { "dummy_intent_1": { "utterances": [{ "data": [{ "text": "dummy_1", "entity": "dummy_entity_1", "slot_name": "dummy_slot_name" }, { "text": " dummy_2", "entity": "dummy_entity_2", "slot_name": "other_dummy_slot_name" }] }] } }, "entities": { "dummy_entity_1": { "use_synonyms": True, "automatically_extensible": False, "data": [{ "value": "dummy1", "synonyms": ["dummy1", "dummy1_bis"] }, { "value": "dummy2", "synonyms": ["dummy2", "dummy2_bis"] }] }, "dummy_entity_2": { "use_synonyms": False, "automatically_extensible": True, "data": [{ "value": "dummy2", "synonyms": ["dummy2"] }] } }, "language": "en" } text = "dummy_3 dummy_4" mocked_crf_intent = intent_classification_result("dummy_intent_1", 1.0) mocked_crf_slots = [ unresolved_slot(match_range=(0, 7), value="dummy_3", entity="dummy_entity_1", slot_name="dummy_slot_name"), unresolved_slot(match_range=(8, 15), value="dummy_4", entity="dummy_entity_2", slot_name="other_dummy_slot_name") ] mocked_regex_parse.return_value = empty_result(text) mocked_crf_parse.return_value = parsing_result(text, mocked_crf_intent, mocked_crf_slots) engine = SnipsNLUEngine() # When engine = engine.fit(dataset) result = engine.parse(text) # Then expected_slot = custom_slot( unresolved_slot(match_range=(8, 15), value="dummy_4", entity="dummy_entity_2", slot_name="other_dummy_slot_name")) expected_result = parsing_result(text, intent=mocked_crf_intent, slots=[expected_slot]) self.assertEqual(expected_result, result)
def test_synonyms_should_point_to_base_value(self, mocked_deter_parse, mocked_proba_parse): # Given dataset = { "snips_nlu_version": "1.1.1", "intents": { "dummy_intent_1": { "utterances": [{ "data": [{ "text": "dummy_1", "entity": "dummy_entity_1", "slot_name": "dummy_slot_name" }] }] } }, "entities": { "dummy_entity_1": { "use_synonyms": True, "automatically_extensible": False, "data": [{ "value": "dummy1", "synonyms": ["dummy1", "dummy1_bis"] }] } }, "language": "en" } text = "dummy1_bis" mocked_proba_parser_intent = intent_classification_result( "dummy_intent_1", 1.0) mocked_proba_parser_slots = [ unresolved_slot(match_range=(0, 10), value="dummy1_bis", entity="dummy_entity_1", slot_name="dummy_slot_name") ] mocked_deter_parse.return_value = empty_result(text) mocked_proba_parse.return_value = parsing_result( text, mocked_proba_parser_intent, mocked_proba_parser_slots) engine = SnipsNLUEngine().fit(dataset) # When result = engine.parse(text) # Then expected_slot = { RES_MATCH_RANGE: { "start": 0, "end": 10 }, RES_RAW_VALUE: "dummy1_bis", RES_VALUE: { "kind": "Custom", "value": "dummy1" }, RES_ENTITY: "dummy_entity_1", RES_SLOT_NAME: "dummy_slot_name" } expected_result = parsing_result(text, intent=mocked_proba_parser_intent, slots=[expected_slot]) self.assertEqual(expected_result, result)
def test_should_parse_top_intents(self): # Given text = "foo bar ban" dataset_stream = io.StringIO(""" --- type: intent name: intent1 utterances: - foo [slot1:entity1](bak) --- type: intent name: intent2 utterances: - '[slot2:entity2](foo) baz' --- type: intent name: intent3 utterances: - foo bap""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json # pylint:disable=unused-variable @IntentParser.register("first_intent_parser", True) class FirstIntentParser(MockIntentParser): def get_intents(self, text): return [ intent_classification_result("intent1", 0.5), intent_classification_result("intent2", 0.3), intent_classification_result(None, 0.15), intent_classification_result("intent3", 0.05) ] def get_slots(self, text, intent): if intent == "intent1": return [] if intent == "intent2": return [ unresolved_slot((0, 3), "foo", "entity2", "slot2") ] return [] @IntentParser.register("second_intent_parser", True) class SecondIntentParser(MockIntentParser): def get_intents(self, text): return [ intent_classification_result("intent2", 0.6), intent_classification_result("intent1", 0.2), intent_classification_result(None, 0.15), intent_classification_result("intent3", 0.05) ] def get_slots(self, text, intent): if intent == "intent1": return [ unresolved_slot((0, 3), "foo", "entity1", "slot1") ] if intent == "intent2": return [ unresolved_slot((8, 11), "ban", "entity2", "slot2") ] return [] # pylint:enable=unused-variable config = NLUEngineConfig( ["first_intent_parser", "second_intent_parser"]) nlu_engine = SnipsNLUEngine(config).fit(dataset) # When results = nlu_engine.parse(text, top_n=3) results_with_filter = nlu_engine.parse( text, intents=["intent1", "intent3"], top_n=3) # Then expected_results = [ extraction_result( intent_classification_result("intent2", 0.6), [custom_slot( unresolved_slot((0, 3), "foo", "entity2", "slot2"))] ), extraction_result( intent_classification_result("intent1", 0.5), [custom_slot( unresolved_slot((0, 3), "foo", "entity1", "slot1"))] ), extraction_result( intent_classification_result(None, 0.15), [] ), ] expected_results_with_filter = [ extraction_result( intent_classification_result("intent1", 0.5), [custom_slot( unresolved_slot((0, 3), "foo", "entity1", "slot1"))] ), extraction_result( intent_classification_result(None, 0.15), [] ), extraction_result( intent_classification_result("intent3", 0.05), [] ), ] self.assertListEqual(expected_results, results) self.assertListEqual(expected_results_with_filter, results_with_filter)
def test_synonyms_should_not_collide_when_remapped_to_base_value(self): # Given dataset = { "intents": { "intent1": { "utterances": [ { "data": [ { "text": "value", "entity": "entity1", "slot_name": "slot1" } ] } ] } }, "entities": { "entity1": { "data": [ { "value": "a", "synonyms": [ "favorïte" ] }, { "value": "b", "synonyms": [ "favorite" ] } ], "use_synonyms": True, "automatically_extensible": False, "matching_strictness": 1.0 } }, "language": "en", } mocked_intent = intent_classification_result("intent1", 1.0) # pylint:disable=unused-variable @IntentParser.register("my_intent_parser", True) class MyIntentParser(MockIntentParser): def parse(self, text, intents=None, top_n=None): slots = [ unresolved_slot(match_range=(0, len(text)), value=text, entity="entity1", slot_name="slot1")] return parsing_result(text, mocked_intent, slots) # pylint:enable=unused-variable config = NLUEngineConfig(["my_intent_parser"]) engine = SnipsNLUEngine(config).fit(dataset) # When result1 = engine.parse("favorite") result2 = engine.parse("favorïte") # Then expected_slot1 = { RES_MATCH_RANGE: { "start": 0, "end": 8 }, RES_RAW_VALUE: "favorite", RES_VALUE: { "kind": "Custom", "value": "b" }, RES_ENTITY: "entity1", RES_SLOT_NAME: "slot1" } expected_slot2 = { RES_MATCH_RANGE: { "start": 0, "end": 8 }, RES_RAW_VALUE: "favorïte", RES_VALUE: { "kind": "Custom", "value": "a" }, RES_ENTITY: "entity1", RES_SLOT_NAME: "slot1" } expected_result1 = parsing_result("favorite", intent=mocked_intent, slots=[expected_slot1]) expected_result2 = parsing_result("favorïte", intent=mocked_intent, slots=[expected_slot2]) self.assertEqual(expected_result1, result1) self.assertEqual(expected_result2, result2)
def test_synonyms_should_point_to_base_value(self, mocked_deter_parse, mocked_proba_parse): # Given dataset = { "snips_nlu_version": "1.1.1", "intents": { "dummy_intent_1": { "utterances": [ { "data": [ { "text": "dummy_1", "entity": "dummy_entity_1", "slot_name": "dummy_slot_name" } ] } ] } }, "entities": { "dummy_entity_1": { "use_synonyms": True, "automatically_extensible": False, "data": [ { "value": "dummy1", "synonyms": [ "dummy1", "dummy1_bis" ] } ] } }, "language": "en" } text = "dummy1_bis" mocked_proba_parser_intent = intent_classification_result( "dummy_intent_1", 1.0) mocked_proba_parser_slots = [ unresolved_slot(match_range=(0, 10), value="dummy1_bis", entity="dummy_entity_1", slot_name="dummy_slot_name")] mocked_deter_parse.return_value = empty_result(text) mocked_proba_parse.return_value = parsing_result( text, mocked_proba_parser_intent, mocked_proba_parser_slots) engine = SnipsNLUEngine().fit(dataset) # When result = engine.parse(text) # Then expected_slot = { RES_MATCH_RANGE: { "start": 0, "end": 10 }, RES_RAW_VALUE: "dummy1_bis", RES_VALUE: { "kind": "Custom", "value": "dummy1" }, RES_ENTITY: "dummy_entity_1", RES_SLOT_NAME: "dummy_slot_name" } expected_result = parsing_result( text, intent=mocked_proba_parser_intent, slots=[expected_slot]) self.assertEqual(expected_result, result)