def test_should_be_serializable_into_dir(self): # Given register_processing_unit(TestIntentParser1) register_processing_unit(TestIntentParser2) parser1_config = TestIntentParser1Config() parser2_config = TestIntentParser2Config() parsers_configs = [parser1_config, parser2_config] config = NLUEngineConfig(parsers_configs) engine = SnipsNLUEngine(config).fit(BEVERAGE_DATASET) # When engine.persist(self.tmp_file_path) # Then parser1_config = TestIntentParser1Config() parser2_config = TestIntentParser2Config() parsers_configs = [parser1_config, parser2_config] expected_engine_config = NLUEngineConfig(parsers_configs) expected_engine_dict = { "unit_name": "nlu_engine", "dataset_metadata": { "language_code": "en", "entities": { "Temperature": { "automatically_extensible": True, } }, "slot_name_mappings": { "MakeCoffee": { "number_of_cups": "snips/number" }, "MakeTea": { "beverage_temperature": "Temperature", "number_of_cups": "snips/number" } }, }, "config": expected_engine_config.to_dict(), "intent_parsers": [ "test_intent_parser1", "test_intent_parser2" ], "builtin_entity_parser": "builtin_entity_parser", "custom_entity_parser": "custom_entity_parser", "model_version": snips_nlu.__model_version__, "training_package_version": snips_nlu.__version__ } self.assertJsonContent(self.tmp_file_path / "nlu_engine.json", expected_engine_dict) self.assertJsonContent( self.tmp_file_path / "test_intent_parser1" / "metadata.json", {"unit_name": "test_intent_parser1"}) self.assertJsonContent( self.tmp_file_path / "test_intent_parser2" / "metadata.json", {"unit_name": "test_intent_parser2"})
def test_nlu_engine_should_raise_error_with_bytes_input(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me a [beverage_temperature:Temperature](hot) cup of tea - make me [number_of_cups:snips/number](five) tea cups --- type: intent name: MakeCoffee utterances: - make me [number_of_cups:snips/number](one) cup of coffee please - brew [number_of_cups] cups of coffee""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json bytes_input = b"brew me an espresso" # pylint:disable=unused-variable @IntentParser.register("my_intent_parser", True) class MyIntentParser(MockIntentParser): pass # pylint:enable=unused-variable config = NLUEngineConfig(["my_intent_parser"]) engine = SnipsNLUEngine(config).fit(dataset) # When / Then with self.assertRaises(InvalidInputError) as cm: engine.parse(bytes_input) message = str(cm.exception.args[0]) self.assertTrue("Expected unicode but received" in message)
def test_parse_should_raise_with_unknown_intent_in_filter(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: greeting1 utterances: - Hello [name1](John) --- type: intent name: goodbye utterances: - Goodbye [name](Eric)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json # pylint:disable=unused-variable @IntentParser.register("my_intent_parser", True) class FirstIntentParser(MockIntentParser): pass # pylint:enable=unused-variable config = NLUEngineConfig(["my_intent_parser"]) nlu_engine = SnipsNLUEngine(config).fit(dataset) # When / Then with self.assertRaises(IntentNotFoundError): nlu_engine.parse("Hello John", intents="greeting3") with self.assertRaises(IntentNotFoundError): nlu_engine.parse("Hello John", intents=["greeting3"])
def test_should_retrain_only_non_trained_subunits(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: greeting1 utterances: - hello [greeted:name](john)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json @IntentParser.register("test_intent_parser", True) class TestIntentParser(MockIntentParser): def __init__(self, config=None, **shared): super(TestIntentParser, self).__init__(config, **shared) self.sub_unit_1 = dict(fitted=False, calls=0) self.sub_unit_2 = dict(fitted=False, calls=0) def fit(self, dataset, force_retrain): if force_retrain: self.sub_unit_1["fitted"] = True self.sub_unit_1["calls"] += 1 self.sub_unit_2["fitted"] = True self.sub_unit_2["calls"] += 1 else: if not self.sub_unit_1["fitted"]: self.sub_unit_1["fitted"] = True self.sub_unit_1["calls"] += 1 if not self.sub_unit_2["fitted"]: self.sub_unit_2["fitted"] = True self.sub_unit_2["calls"] += 1 return self @property def fitted(self): return self.sub_unit_1["fitted"] and \ self.sub_unit_2["fitted"] nlu_engine_config = NLUEngineConfig(["test_intent_parser"]) nlu_engine = SnipsNLUEngine(nlu_engine_config) intent_parser = TestIntentParser() intent_parser.sub_unit_1.update(dict(fitted=True, calls=0)) nlu_engine.intent_parsers.append(intent_parser) # When nlu_engine.fit(dataset, force_retrain=False) # Then self.assertDictEqual(dict(fitted=True, calls=0), intent_parser.sub_unit_1) self.assertDictEqual(dict(fitted=True, calls=1), intent_parser.sub_unit_2)
def test_should_get_intents(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: greeting1 utterances: - hello --- type: intent name: greeting2 utterances: - how are you""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json input_text = "hello world" # pylint:disable=unused-variable @IntentParser.register("first_intent_parser", True) class FirstIntentParser(MockIntentParser): def get_intents(self, text): return [ intent_classification_result("greeting1", 0.5), intent_classification_result("greeting2", 0.3), intent_classification_result(None, 0.2) ] @IntentParser.register("second_intent_parser", True) class SecondIntentParser(MockIntentParser): def get_intents(self, text): return [ intent_classification_result("greeting2", 0.6), intent_classification_result("greeting1", 0.2), intent_classification_result(None, 0.1) ] # pylint:enable=unused-variable config = NLUEngineConfig(["first_intent_parser", "second_intent_parser"]) engine = SnipsNLUEngine(config).fit(dataset) # When res_intents = engine.get_intents(input_text) # Then expected_intents = [ intent_classification_result("greeting2", 0.6), intent_classification_result("greeting1", 0.5), intent_classification_result(None, 0.2) ] self.assertListEqual(expected_intents, res_intents)
def test_should_use_parsers_sequentially(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: greeting1 utterances: - hello [greeted:name](john)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json input_text = "hello snips" intent = intent_classification_result(intent_name='greeting1', probability=0.7) slots = [ unresolved_slot(match_range=(6, 11), value='snips', entity='name', slot_name='greeted') ] # pylint:disable=unused-variable @IntentParser.register("first_intent_parser", True) class FirstIntentParser(MockIntentParser): pass @IntentParser.register("second_intent_parser", True) class SecondIntentParser(MockIntentParser): def parse(self, text, intents=None, top_n=None): if text == input_text: return parsing_result(text, intent, slots) return empty_result(text, 1.0) # pylint:enable=unused-variable config = NLUEngineConfig( ["first_intent_parser", "second_intent_parser"]) engine = SnipsNLUEngine(config).fit(dataset) # When parse = engine.parse(input_text) # Then expected_slots = [custom_slot(s) for s in slots] expected_parse = parsing_result(input_text, intent, expected_slots) self.assertDictEqual(expected_parse, parse)
def test_should_get_slots(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: greeting utterances: - hello [greeted:name](john)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json input_text = "hello snips" greeting_intent = "greeting" expected_slots = [ unresolved_slot(match_range=(6, 11), value="snips", entity="name", slot_name="greeted") ] # pylint:disable=unused-variable @IntentParser.register("first_intent_parser", True) class FirstIntentParser(MockIntentParser): pass @IntentParser.register("second_intent_parser", True) class SecondIntentParser(MockIntentParser): def get_slots(self, text, intent): if text == input_text and intent == greeting_intent: return expected_slots return [] # pylint:enable=unused-variable config = NLUEngineConfig( ["first_intent_parser", "second_intent_parser"]) engine = SnipsNLUEngine(config).fit(dataset) # When res_slots = engine.get_slots(input_text, greeting_intent) # Then expected_slots = [custom_slot(s) for s in expected_slots] self.assertListEqual(expected_slots, res_slots)
def test_should_retrain_only_non_trained_subunits(self): # Given class TestIntentParserConfig(ProcessingUnitConfig): unit_name = "test_intent_parser" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return TestIntentParserConfig() class TestIntentParser(IntentParser): unit_name = "test_intent_parser" config_type = TestIntentParserConfig def __init__(self, config): super(TestIntentParser, self).__init__(config) self.sub_unit_1 = dict(fitted=False, calls=0) self.sub_unit_2 = dict(fitted=False, calls=0) def fit(self, dataset, force_retrain): if force_retrain: self.sub_unit_1["fitted"] = True self.sub_unit_1["calls"] += 1 self.sub_unit_2["fitted"] = True self.sub_unit_2["calls"] += 1 else: if not self.sub_unit_1["fitted"]: self.sub_unit_1["fitted"] = True self.sub_unit_1["calls"] += 1 if not self.sub_unit_2["fitted"]: self.sub_unit_2["fitted"] = True self.sub_unit_2["calls"] += 1 return self @property def fitted(self): return self.sub_unit_1["fitted"] and \ self.sub_unit_2["fitted"] def parse(self, text, intents): return empty_result(text) def to_dict(self): return { "unit_name": self.unit_name, } @classmethod def from_dict(cls, unit_dict): conf = cls.config_type() return TestIntentParser(conf) register_processing_unit(TestIntentParser) intent_parser_config = TestIntentParserConfig() nlu_engine_config = NLUEngineConfig([intent_parser_config]) nlu_engine = SnipsNLUEngine(nlu_engine_config) intent_parser = TestIntentParser(intent_parser_config) intent_parser.sub_unit_1.update(dict(fitted=True, calls=0)) nlu_engine.intent_parsers.append(intent_parser) # When nlu_engine.fit(SAMPLE_DATASET, force_retrain=False) # Then self.assertDictEqual(dict(fitted=True, calls=0), intent_parser.sub_unit_1) self.assertDictEqual(dict(fitted=True, calls=1), intent_parser.sub_unit_2)
def test_synonyms_should_point_to_base_value(self): # Given dataset = { "intents": { "dummy_intent_1": { "utterances": [ { "data": [ { "text": "dummy_1", "entity": "dummy_entity_1", "slot_name": "dummy_slot_name" } ] } ] } }, "entities": { "dummy_entity_1": { "use_synonyms": True, "automatically_extensible": False, "data": [ { "value": "dummy1", "synonyms": [ "dummy1", "dummy1_bis" ] } ], "matching_strictness": 1.0 } }, "language": "en" } mocked_intent = intent_classification_result("dummy_intent_1", 1.0) mocked_slots = [ unresolved_slot(match_range=(0, 10), value="dummy1_bis", entity="dummy_entity_1", slot_name="dummy_slot_name")] # pylint:disable=unused-variable @IntentParser.register("my_intent_parser", True) class MyIntentParser(MockIntentParser): def parse(self, text, intents=None, top_n=None): return parsing_result(text, mocked_intent, mocked_slots) # pylint:enable=unused-variable input_ = "dummy1_bis" config = NLUEngineConfig(["my_intent_parser"]) engine = SnipsNLUEngine(config).fit(dataset) # When result = engine.parse(input_) # Then expected_slot = { RES_MATCH_RANGE: { "start": 0, "end": 10 }, RES_RAW_VALUE: "dummy1_bis", RES_VALUE: { "kind": "Custom", "value": "dummy1" }, RES_ENTITY: "dummy_entity_1", RES_SLOT_NAME: "dummy_slot_name" } expected_result = parsing_result( input_, mocked_intent, slots=[expected_slot]) self.assertEqual(expected_result, result)
def test_should_serialize_duplicated_intent_parsers(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me a [beverage_temperature:Temperature](hot) cup of tea - make me [number_of_cups:snips/number](five) tea cups --- type: intent name: MakeCoffee utterances: - make me [number_of_cups:snips/number](one) cup of coffee please - brew [number_of_cups] cups of coffee""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json # pylint:disable=unused-variable @IntentParser.register("my_intent_parser", True) class MyIntentParser(MockIntentParser): pass # pylint:enable=unused-variable parsers_configs = ["my_intent_parser", "my_intent_parser"] config = NLUEngineConfig(parsers_configs) engine = SnipsNLUEngine(config).fit(dataset) # When engine.persist(self.tmp_file_path) # Then expected_engine_dict = { "unit_name": "nlu_engine", "dataset_metadata": { "language_code": "en", "entities": { "Temperature": { "automatically_extensible": True, } }, "slot_name_mappings": { "MakeCoffee": { "number_of_cups": "snips/number" }, "MakeTea": { "beverage_temperature": "Temperature", "number_of_cups": "snips/number" } }, }, "config": { "unit_name": "nlu_engine", "intent_parsers_configs": [ { "unit_name": "my_intent_parser" }, { "unit_name": "my_intent_parser" } ] }, "intent_parsers": [ "my_intent_parser", "my_intent_parser_2" ], "builtin_entity_parser": "builtin_entity_parser", "custom_entity_parser": "custom_entity_parser", "model_version": snips_nlu.__model_version__, "training_package_version": snips_nlu.__version__ } self.assertJsonContent(self.tmp_file_path / "nlu_engine.json", expected_engine_dict) self.assertJsonContent( self.tmp_file_path / "my_intent_parser" / "metadata.json", {"unit_name": "my_intent_parser", "fitted": True}) self.assertJsonContent( self.tmp_file_path / "my_intent_parser_2" / "metadata.json", {"unit_name": "my_intent_parser", "fitted": True})
def test_should_parse_top_intents(self): # Given text = "foo bar ban" dataset_stream = io.StringIO(""" --- type: intent name: intent1 utterances: - foo [slot1:entity1](bak) --- type: intent name: intent2 utterances: - '[slot2:entity2](foo) baz' --- type: intent name: intent3 utterances: - foo bap""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json # pylint:disable=unused-variable @IntentParser.register("first_intent_parser", True) class FirstIntentParser(MockIntentParser): def get_intents(self, text): return [ intent_classification_result("intent1", 0.5), intent_classification_result("intent2", 0.3), intent_classification_result(None, 0.15), intent_classification_result("intent3", 0.05) ] def get_slots(self, text, intent): if intent == "intent1": return [] if intent == "intent2": return [ unresolved_slot((0, 3), "foo", "entity2", "slot2") ] return [] @IntentParser.register("second_intent_parser", True) class SecondIntentParser(MockIntentParser): def get_intents(self, text): return [ intent_classification_result("intent2", 0.6), intent_classification_result("intent1", 0.2), intent_classification_result(None, 0.15), intent_classification_result("intent3", 0.05) ] def get_slots(self, text, intent): if intent == "intent1": return [ unresolved_slot((0, 3), "foo", "entity1", "slot1") ] if intent == "intent2": return [ unresolved_slot((8, 11), "ban", "entity2", "slot2") ] return [] # pylint:enable=unused-variable config = NLUEngineConfig( ["first_intent_parser", "second_intent_parser"]) nlu_engine = SnipsNLUEngine(config).fit(dataset) # When results = nlu_engine.parse(text, top_n=3) results_with_filter = nlu_engine.parse( text, intents=["intent1", "intent3"], top_n=3) # Then expected_results = [ extraction_result( intent_classification_result("intent2", 0.6), [custom_slot( unresolved_slot((0, 3), "foo", "entity2", "slot2"))] ), extraction_result( intent_classification_result("intent1", 0.5), [custom_slot( unresolved_slot((0, 3), "foo", "entity1", "slot1"))] ), extraction_result( intent_classification_result(None, 0.15), [] ), ] expected_results_with_filter = [ extraction_result( intent_classification_result("intent1", 0.5), [custom_slot( unresolved_slot((0, 3), "foo", "entity1", "slot1"))] ), extraction_result( intent_classification_result(None, 0.15), [] ), extraction_result( intent_classification_result("intent3", 0.05), [] ), ] self.assertListEqual(expected_results, results) self.assertListEqual(expected_results_with_filter, results_with_filter)
def test_should_be_deserializable_from_dir(self): # Given register_processing_unit(TestIntentParser1) register_processing_unit(TestIntentParser2) dataset_metadata = { "language_code": "en", "entities": { "Temperature": { "automatically_extensible": True, "utterances": { "boiling": "hot", "cold": "cold", "hot": "hot", "iced": "cold" } } }, "slot_name_mappings": { "MakeCoffee": { "number_of_cups": "snips/number" }, "MakeTea": { "beverage_temperature": "Temperature", "number_of_cups": "snips/number" } }, } parser1_config = TestIntentParser1Config() parser2_config = TestIntentParser2Config() engine_config = NLUEngineConfig([parser1_config, parser2_config]) engine_dict = { "unit_name": "nlu_engine", "dataset_metadata": dataset_metadata, "config": engine_config.to_dict(), "intent_parsers": [ "test_intent_parser1", "test_intent_parser2", ], "model_version": snips_nlu.__model_version__, "training_package_version": snips_nlu.__version__ } self.tmp_file_path.mkdir() parser1_path = self.tmp_file_path / "test_intent_parser1" parser1_path.mkdir() parser2_path = self.tmp_file_path / "test_intent_parser2" parser2_path.mkdir() (self.tmp_file_path / "resources").mkdir() self.writeJsonContent(self.tmp_file_path / "nlu_engine.json", engine_dict) self.writeJsonContent(parser1_path / "metadata.json", {"unit_name": "test_intent_parser1"}) self.writeJsonContent(parser2_path / "metadata.json", {"unit_name": "test_intent_parser2"}) # When engine = SnipsNLUEngine.from_path(self.tmp_file_path) # Then parser1_config = TestIntentParser1Config() parser2_config = TestIntentParser2Config() expected_engine_config = NLUEngineConfig( [parser1_config, parser2_config]).to_dict() # pylint:disable=protected-access self.assertDictEqual(engine._dataset_metadata, dataset_metadata) # pylint:enable=protected-access self.assertDictEqual(engine.config.to_dict(), expected_engine_config)
def test_should_retrain_only_non_trained_subunits(self): # Given class TestIntentParserConfig(ProcessingUnitConfig): unit_name = "test_intent_parser" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return TestIntentParserConfig() def get_required_resources(self): return None class TestIntentParser(IntentParser): unit_name = "test_intent_parser" config_type = TestIntentParserConfig def __init__(self, config): super(TestIntentParser, self).__init__(config) self.sub_unit_1 = dict(fitted=False, calls=0) self.sub_unit_2 = dict(fitted=False, calls=0) def fit(self, dataset, force_retrain): if force_retrain: self.sub_unit_1["fitted"] = True self.sub_unit_1["calls"] += 1 self.sub_unit_2["fitted"] = True self.sub_unit_2["calls"] += 1 else: if not self.sub_unit_1["fitted"]: self.sub_unit_1["fitted"] = True self.sub_unit_1["calls"] += 1 if not self.sub_unit_2["fitted"]: self.sub_unit_2["fitted"] = True self.sub_unit_2["calls"] += 1 return self @property def fitted(self): return self.sub_unit_1["fitted"] and \ self.sub_unit_2["fitted"] def parse(self, text, intents): return empty_result(text) def persist(self, path): path = Path(path) path.mkdir() with (path / "metadata.json").open(mode="w") as f: f.write(json_string({"unit_name": self.unit_name})) @classmethod def from_path(cls, path): cfg = cls.config_type() return cls(cfg) register_processing_unit(TestIntentParser) intent_parser_config = TestIntentParserConfig() nlu_engine_config = NLUEngineConfig([intent_parser_config]) nlu_engine = SnipsNLUEngine(nlu_engine_config) intent_parser = TestIntentParser(intent_parser_config) intent_parser.sub_unit_1.update(dict(fitted=True, calls=0)) nlu_engine.intent_parsers.append(intent_parser) # When nlu_engine.fit(SAMPLE_DATASET, force_retrain=False) # Then self.assertDictEqual(dict(fitted=True, calls=0), intent_parser.sub_unit_1) self.assertDictEqual(dict(fitted=True, calls=1), intent_parser.sub_unit_2)
def test_synonyms_should_not_collide_when_remapped_to_base_value(self): # Given dataset = { "intents": { "intent1": { "utterances": [ { "data": [ { "text": "value", "entity": "entity1", "slot_name": "slot1" } ] } ] } }, "entities": { "entity1": { "data": [ { "value": "a", "synonyms": [ "favorïte" ] }, { "value": "b", "synonyms": [ "favorite" ] } ], "use_synonyms": True, "automatically_extensible": False, "matching_strictness": 1.0 } }, "language": "en", } mocked_intent = intent_classification_result("intent1", 1.0) # pylint:disable=unused-variable @IntentParser.register("my_intent_parser", True) class MyIntentParser(MockIntentParser): def parse(self, text, intents=None, top_n=None): slots = [ unresolved_slot(match_range=(0, len(text)), value=text, entity="entity1", slot_name="slot1")] return parsing_result(text, mocked_intent, slots) # pylint:enable=unused-variable config = NLUEngineConfig(["my_intent_parser"]) engine = SnipsNLUEngine(config).fit(dataset) # When result1 = engine.parse("favorite") result2 = engine.parse("favorïte") # Then expected_slot1 = { RES_MATCH_RANGE: { "start": 0, "end": 8 }, RES_RAW_VALUE: "favorite", RES_VALUE: { "kind": "Custom", "value": "b" }, RES_ENTITY: "entity1", RES_SLOT_NAME: "slot1" } expected_slot2 = { RES_MATCH_RANGE: { "start": 0, "end": 8 }, RES_RAW_VALUE: "favorïte", RES_VALUE: { "kind": "Custom", "value": "a" }, RES_ENTITY: "entity1", RES_SLOT_NAME: "slot1" } expected_result1 = parsing_result("favorite", intent=mocked_intent, slots=[expected_slot1]) expected_result2 = parsing_result("favorïte", intent=mocked_intent, slots=[expected_slot2]) self.assertEqual(expected_result1, result1) self.assertEqual(expected_result2, result2)
def test_synonyms_should_not_collide_when_remapped_to_base_value( self, mocked_proba_parse): # Given # Given dataset = { "intents": { "intent1": { "utterances": [{ "data": [{ "text": "value", "entity": "entity1", "slot_name": "slot1" }] }] } }, "entities": { "entity1": { "data": [{ "value": "a", "synonyms": ["favorïte"] }, { "value": "b", "synonyms": ["favorite"] }], "use_synonyms": True, "automatically_extensible": False } }, "language": "en", } mocked_proba_parser_intent = intent_classification_result( "intent1", 1.0) def mock_proba_parse(text, intents): slots = [ unresolved_slot(match_range=(0, len(text)), value=text, entity="entity1", slot_name="slot1") ] return parsing_result(text, mocked_proba_parser_intent, slots) mocked_proba_parse.side_effect = mock_proba_parse config = NLUEngineConfig([ProbabilisticIntentParserConfig()]) engine = SnipsNLUEngine(config).fit(dataset) # When result1 = engine.parse("favorite") result2 = engine.parse("favorïte") # Then expected_slot1 = { RES_MATCH_RANGE: { "start": 0, "end": 8 }, RES_RAW_VALUE: "favorite", RES_VALUE: { "kind": "Custom", "value": "b" }, RES_ENTITY: "entity1", RES_SLOT_NAME: "slot1" } expected_slot2 = { RES_MATCH_RANGE: { "start": 0, "end": 8 }, RES_RAW_VALUE: "favorïte", RES_VALUE: { "kind": "Custom", "value": "a" }, RES_ENTITY: "entity1", RES_SLOT_NAME: "slot1" } expected_result1 = parsing_result("favorite", intent=mocked_proba_parser_intent, slots=[expected_slot1]) expected_result2 = parsing_result("favorïte", intent=mocked_proba_parser_intent, slots=[expected_slot2]) self.assertEqual(expected_result1, result1) self.assertEqual(expected_result2, result2)
def test_synonyms_should_point_to_base_value(self, mocked_proba_parse): # Given dataset = { "intents": { "dummy_intent_1": { "utterances": [{ "data": [{ "text": "dummy_1", "entity": "dummy_entity_1", "slot_name": "dummy_slot_name" }] }] } }, "entities": { "dummy_entity_1": { "use_synonyms": True, "automatically_extensible": False, "data": [{ "value": "dummy1", "synonyms": ["dummy1", "dummy1_bis"] }] } }, "language": "en" } text = "dummy1_bis" mocked_proba_parser_intent = intent_classification_result( "dummy_intent_1", 1.0) mocked_proba_parser_slots = [ unresolved_slot(match_range=(0, 10), value="dummy1_bis", entity="dummy_entity_1", slot_name="dummy_slot_name") ] mocked_proba_parse.return_value = parsing_result( text, mocked_proba_parser_intent, mocked_proba_parser_slots) config = NLUEngineConfig([ProbabilisticIntentParserConfig()]) engine = SnipsNLUEngine(config).fit(dataset) # When result = engine.parse(text) # Then expected_slot = { RES_MATCH_RANGE: { "start": 0, "end": 10 }, RES_RAW_VALUE: "dummy1_bis", RES_VALUE: { "kind": "Custom", "value": "dummy1" }, RES_ENTITY: "dummy_entity_1", RES_SLOT_NAME: "dummy_slot_name" } expected_result = parsing_result(text, intent=mocked_proba_parser_intent, slots=[expected_slot]) self.assertEqual(expected_result, result)
def test_should_use_parsers_sequentially(self): # Given input_text = "hello world" intent = intent_classification_result(intent_name='dummy_intent_1', probability=0.7) slots = [ unresolved_slot(match_range=(6, 11), value='world', entity='mocked_entity', slot_name='mocked_slot_name') ] class TestIntentParser1Config(ProcessingUnitConfig): unit_name = "test_intent_parser1" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return TestIntentParser1Config() class TestIntentParser1(IntentParser): unit_name = "test_intent_parser1" config_type = TestIntentParser1Config def fit(self, dataset, force_retrain): self._fitted = True return self @property def fitted(self): return hasattr(self, '_fitted') and self._fitted def parse(self, text, intents): return empty_result(text) def to_dict(self): return { "unit_name": self.unit_name, } @classmethod def from_dict(cls, unit_dict): conf = cls.config_type() return TestIntentParser1(conf) class TestIntentParser2Config(ProcessingUnitConfig): unit_name = "test_intent_parser2" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return TestIntentParser2Config() class TestIntentParser2(IntentParser): unit_name = "test_intent_parser2" config_type = TestIntentParser2Config def fit(self, dataset, force_retrain): self._fitted = True return self @property def fitted(self): return hasattr(self, '_fitted') and self._fitted def parse(self, text, intents): if text == input_text: return parsing_result(text, intent, slots) return empty_result(text) def to_dict(self): return { "unit_name": self.unit_name, } @classmethod def from_dict(cls, unit_dict): conf = cls.config_type() return TestIntentParser2(conf) register_processing_unit(TestIntentParser1) register_processing_unit(TestIntentParser2) mocked_dataset_metadata = { "language_code": "en", "entities": { "mocked_entity": { "automatically_extensible": True, "utterances": dict() } }, "slot_name_mappings": { "dummy_intent_1": { "mocked_slot_name": "mocked_entity" } } } config = NLUEngineConfig( [TestIntentParser1Config(), TestIntentParser2Config()]) engine = SnipsNLUEngine(config).fit(SAMPLE_DATASET) # pylint:disable=protected-access engine._dataset_metadata = mocked_dataset_metadata # pylint:enable=protected-access # When parse = engine.parse(input_text) # Then expected_slots = [custom_slot(s) for s in slots] expected_parse = parsing_result(input_text, intent, expected_slots) self.assertDictEqual(expected_parse, parse)
def test_should_be_deserializable(self): # When class TestIntentParser1Config(ProcessingUnitConfig): unit_name = "test_intent_parser1" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return TestIntentParser1Config() class TestIntentParser1(IntentParser): unit_name = "test_intent_parser1" config_type = TestIntentParser1Config def fit(self, dataset, force_retrain): self._fitted = True return self @property def fitted(self): return hasattr(self, '_fitted') and self._fitted def parse(self, text, intents): return empty_result(text) def to_dict(self): return { "unit_name": self.unit_name, } @classmethod def from_dict(cls, unit_dict): config = cls.config_type() return TestIntentParser1(config) class TestIntentParser2Config(ProcessingUnitConfig): unit_name = "test_intent_parser2" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return TestIntentParser2Config() class TestIntentParser2(IntentParser): unit_name = "test_intent_parser2" config_type = TestIntentParser2Config def fit(self, dataset, force_retrain): self._fitted = True return self @property def fitted(self): return hasattr(self, '_fitted') and self._fitted def parse(self, text, intents): return empty_result(text) def to_dict(self): return { "unit_name": self.unit_name, } @classmethod def from_dict(cls, unit_dict): config = cls.config_type() return TestIntentParser2(config) register_processing_unit(TestIntentParser1) register_processing_unit(TestIntentParser2) dataset_metadata = { "language_code": "en", "entities": { "Temperature": { "automatically_extensible": True, "utterances": { "boiling": "hot", "cold": "cold", "hot": "hot", "iced": "cold" } } }, "slot_name_mappings": { "MakeCoffee": { "number_of_cups": "snips/number" }, "MakeTea": { "beverage_temperature": "Temperature", "number_of_cups": "snips/number" } }, } parser1_config = TestIntentParser1Config() parser2_config = TestIntentParser2Config() engine_config = NLUEngineConfig([parser1_config, parser2_config]) engine_dict = { "unit_name": "nlu_engine", "dataset_metadata": dataset_metadata, "config": engine_config.to_dict(), "intent_parsers": [ { "unit_name": "test_intent_parser1" }, { "unit_name": "test_intent_parser2" }, ], "model_version": snips_nlu.__model_version__, "training_package_version": snips_nlu.__version__ } engine = SnipsNLUEngine.from_dict(engine_dict) # Then parser1_config = TestIntentParser1Config() parser2_config = TestIntentParser2Config() expected_engine_config = NLUEngineConfig( [parser1_config, parser2_config]).to_dict() # pylint:disable=protected-access self.assertDictEqual(engine._dataset_metadata, dataset_metadata) # pylint:enable=protected-access self.assertDictEqual(engine.config.to_dict(), expected_engine_config)
def test_should_use_parsers_sequentially(self): # Given input_text = "hello world" intent = intent_classification_result(intent_name='dummy_intent_1', probability=0.7) slots = [ unresolved_slot(match_range=(6, 11), value='world', entity='mocked_entity', slot_name='mocked_slot_name') ] class FirstIntentParserConfig(ProcessingUnitConfig): unit_name = "first_intent_parser" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return FirstIntentParserConfig() def get_required_resources(self): return None class FirstIntentParser(IntentParser): unit_name = "first_intent_parser" config_type = FirstIntentParserConfig def fit(self, dataset, force_retrain): self._fitted = True return self @property def fitted(self): return hasattr(self, '_fitted') and self._fitted def parse(self, text, intents): return empty_result(text) def persist(self, path): path = Path(path) path.mkdir() with (path / "metadata.json").open(mode="w") as f: f.write(json_string({"unit_name": self.unit_name})) @classmethod def from_path(cls, path): cfg = cls.config_type() return cls(cfg) class SecondIntentParserConfig(ProcessingUnitConfig): unit_name = "second_intent_parser" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return SecondIntentParserConfig() def get_required_resources(self): return None class SecondIntentParser(IntentParser): unit_name = "second_intent_parser" config_type = SecondIntentParserConfig def fit(self, dataset, force_retrain): self._fitted = True return self @property def fitted(self): return hasattr(self, '_fitted') and self._fitted def parse(self, text, intents): if text == input_text: return parsing_result(text, intent, slots) return empty_result(text) def persist(self, path): path = Path(path) path.mkdir() with (path / "metadata.json").open(mode="w") as f: f.write(json_string({"unit_name": self.unit_name})) @classmethod def from_path(cls, path): cfg = cls.config_type() return cls(cfg) register_processing_unit(FirstIntentParser) register_processing_unit(SecondIntentParser) mocked_dataset_metadata = { "language_code": "en", "entities": { "mocked_entity": { "automatically_extensible": True, "utterances": dict() } }, "slot_name_mappings": { "dummy_intent_1": { "mocked_slot_name": "mocked_entity" } } } config = NLUEngineConfig( [FirstIntentParserConfig(), SecondIntentParserConfig()]) engine = SnipsNLUEngine(config).fit(SAMPLE_DATASET) # pylint:disable=protected-access engine._dataset_metadata = mocked_dataset_metadata # pylint:enable=protected-access # When parse = engine.parse(input_text) # Then expected_slots = [custom_slot(s) for s in slots] expected_parse = parsing_result(input_text, intent, expected_slots) self.assertDictEqual(expected_parse, parse)