def parse(self, text, intents=None): """Performs intent parsing on the provided *text* by first classifying the intent and then using the correspond slot filler to extract slots Args: text (str): Input intents (str or list of str): If provided, reduces the scope of intent parsing to the provided list of intents Returns: dict: The most likely intent along with the extracted slots. See :func:`.parsing_result` for the output format. Raises: NotTrained: When the intent parser is not fitted """ if not self.fitted: raise NotTrained("ProbabilisticIntentParser must be fitted") if isinstance(intents, str): intents = [intents] intent_result = self.intent_classifier.get_intent(text, intents) if intent_result is None: return empty_result(text) intent_name = intent_result[RES_INTENT_NAME] slots = self.slot_fillers[intent_name].get_slots(text) return parsing_result(text, intent_result, slots)
def parse(self, text, intents=None): """Performs intent parsing on the provided *text* by first classifying the intent and then using the correspond slot filler to extract slots Args: text (str): Input intents (str or list of str): If provided, reduces the scope of intent parsing to the provided list of intents Returns: dict: The most likely intent along with the extracted slots. See :func:`.parsing_result` for the output format. Raises: NotTrained: When the intent parser is not fitted """ if not self.fitted: raise NotTrained("ProbabilisticIntentParser must be fitted") logger.debug("Probabilistic intent parser parsing '%s'...", text) if isinstance(intents, str): intents = [intents] intent_result = self.intent_classifier.get_intent(text, intents) if intent_result is None: return empty_result(text) intent_name = intent_result[RES_INTENT_NAME] slots = self.slot_fillers[intent_name].get_slots(text) return parsing_result(text, intent_result, slots)
def parse(self, text, intents=None, top_n=None): """Performs intent parsing on the provided *text* Intent and slots are extracted simultaneously through pattern matching Args: text (str): input intents (str or list of str): if provided, reduces the scope of intent parsing to the provided list of intents top_n (int, optional): when provided, this method will return a list of at most top_n most likely intents, instead of a single parsing result. Note that the returned list can contain less than ``top_n`` elements, for instance when the parameter ``intents`` is not None, or when ``top_n`` is greater than the total number of intents. Returns: dict or list: the most likely intent(s) along with the extracted slots. See :func:`.parsing_result` and :func:`.extraction_result` for the output format. Raises: NotTrained: when the intent parser is not fitted """ if top_n is None: top_intents = self._parse_top_intents(text, top_n=1, intents=intents) if top_intents: intent = top_intents[0][RES_INTENT] slots = top_intents[0][RES_SLOTS] return parsing_result(text, intent, slots) return empty_result(text, probability=1.0) return self._parse_top_intents(text, top_n=top_n, intents=intents)
def test_should_serialize_results(self): # Given input_ = "hello world" intent = intent_classification_result("world", 0.5) slots = [ unresolved_slot([3, 5], "slot_value", "slot_entity", "slot_name") ] # When result = parsing_result(input=input_, intent=intent, slots=slots) # Then msg = "Result dict should be json serializable" with self.fail_if_exception(msg): json.dumps(result) expected_result = { RES_INTENT: { RES_INTENT_NAME: 'world', RES_PROBA: 0.5 }, RES_SLOTS: [{ RES_MATCH_RANGE: { "start": 3, "end": 5 }, RES_ENTITY: 'slot_entity', RES_SLOT_NAME: 'slot_name', RES_VALUE: 'slot_value' }], RES_INPUT: input_ } self.assertDictEqual(expected_result, result)
def parse(self, text, intents=None): """Performs intent parsing on the provided *text* by calling its intent parsers successively Args: text (str): Input intents (str or list of str): If provided, reduces the scope of intent parsing to the provided list of intents Returns: dict: The most likely intent along with the extracted slots. See :func:`.parsing_result` for the output format. Raises: NotTrained: When the nlu engine is not fitted TypeError: When input type is not unicode """ logging.info("NLU engine parsing: '%s'...", text) if not isinstance(text, str): raise TypeError("Expected unicode but received: %s" % type(text)) if isinstance(intents, str): intents = [intents] for parser in self.intent_parsers: res = parser.parse(text, intents) if is_empty(res): continue resolved_slots = self.resolve_slots(text, res[RES_SLOTS]) return parsing_result(text, intent=res[RES_INTENT], slots=resolved_slots) return empty_result(text)
def test_should_parse_slightly_ambiguous_utterances(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: intent_1 utterances: - call tomorrow --- type: intent name: intent_2 utterances: - call [time:snips/datetime](today)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json parser = DeterministicIntentParser().fit(dataset) text = "call tomorrow" # When res = parser.parse(text) # Then expected_intent = intent_classification_result(intent_name="intent_1", probability=2. / 3.) expected_result = parsing_result(text, expected_intent, []) self.assertEqual(expected_result, res)
def _get_matching_result(self, text, processed_text, regex, intent, builtin_entities_ranges_mapping=None): found_result = regex.match(processed_text) if found_result is None: return None parsed_intent = intent_classification_result(intent_name=intent, probability=1.0) slots = [] for group_name in found_result.groupdict(): slot_name = self.group_names_to_slot_names[group_name] entity = self.slot_names_to_entities[intent][slot_name] rng = (found_result.start(group_name), found_result.end(group_name)) if builtin_entities_ranges_mapping is not None: if rng in builtin_entities_ranges_mapping: rng = builtin_entities_ranges_mapping[rng] else: shift = _get_range_shift( rng, builtin_entities_ranges_mapping) rng = {START: rng[0] + shift, END: rng[1] + shift} else: rng = {START: rng[0], END: rng[1]} value = text[rng[START]:rng[END]] parsed_slot = unresolved_slot( match_range=rng, value=value, entity=entity, slot_name=slot_name) slots.append(parsed_slot) parsed_slots = _deduplicate_overlapping_slots( slots, self.language) parsed_slots = sorted(parsed_slots, key=lambda s: s[RES_MATCH_RANGE][START]) return parsing_result(text, parsed_intent, parsed_slots)
def test_should_serialize_results(self): # Given input_ = "hello world" intent = intent_classification_result("world", 0.5) slots = [unresolved_slot([3, 5], "slot_value", "slot_entity", "slot_name")] # When result = parsing_result(input=input_, intent=intent, slots=slots) # Then msg = "Result dict should be json serializable" with self.fail_if_exception(msg): json.dumps(result) expected_result = { RES_INTENT: {RES_INTENT_NAME: 'world', RES_PROBABILITY: 0.5}, RES_SLOTS: [{RES_MATCH_RANGE: {"start": 3, "end": 5}, RES_ENTITY: 'slot_entity', RES_SLOT_NAME: 'slot_name', RES_VALUE: 'slot_value'}], RES_INPUT: input_} self.assertDictEqual(expected_result, result)
def parse(self, text, intents=None, top_n=None): """Performs intent parsing on the provided *text* by calling its intent parsers successively Args: text (str): Input intents (str or list of str, optional): If provided, reduces the scope of intent parsing to the provided list of intents top_n (int, optional): when provided, this method will return a list of at most top_n most likely intents, instead of a single parsing result. Note that the returned list can contain less than ``top_n`` elements, for instance when the parameter ``intents`` is not None, or when ``top_n`` is greater than the total number of intents. Returns: dict or list: the most likely intent(s) along with the extracted slots. See :func:`.parsing_result` and :func:`.extraction_result` for the output format. Raises: NotTrained: When the nlu engine is not fitted InvalidInputError: When input type is not unicode """ if not isinstance(text, str): raise InvalidInputError("Expected unicode but received: %s" % type(text)) if isinstance(intents, str): intents = {intents} elif isinstance(intents, list): intents = set(intents) if top_n is None: none_proba = 0.0 for parser in self.intent_parsers: res = parser.parse(text, intents) if is_empty(res): none_proba = res[RES_INTENT][RES_PROBA] continue resolved_slots = self._resolve_slots(text, res[RES_SLOTS]) return parsing_result(text, intent=res[RES_INTENT], slots=resolved_slots) return empty_result(text, none_proba) intents_results = self.get_intents(text) if intents is not None: intents_results = [ res for res in intents_results if res[RES_INTENT_NAME] in intents ] intents_results = intents_results[:top_n] results = [] for intent_res in intents_results: slots = self.get_slots(text, intent_res[RES_INTENT_NAME]) results.append(extraction_result(intent_res, slots)) return results
def parse(self, text, intents=None, top_n=None): slots = [ unresolved_slot(match_range=(0, len(text)), value=text, entity="entity1", slot_name="slot1") ] return parsing_result(text, mocked_intent, slots)
def mock_proba_parse(text, intents): slots = [ unresolved_slot(match_range=(0, len(text)), value=text, entity="entity1", slot_name="slot1") ] return parsing_result(text, mocked_proba_parser_intent, slots)
def parse(self, text, intents=None): """Performs intent parsing on the provided *text* Intent and slots are extracted simultaneously through pattern matching Args: text (str): Input intents (str or list of str): If provided, reduces the scope of intent parsing to the provided list of intents Returns: dict: The matched intent, if any, along with the extracted slots. See :func:`.parsing_result` for the output format. Raises: NotTrained: When the intent parser is not fitted """ if not self.fitted: raise NotTrained("DeterministicIntentParser must be fitted") if isinstance(intents, str): intents = [intents] ranges_mapping, processed_text = _replace_builtin_entities( text, self.language) for intent, regexes in iteritems(self.regexes_per_intent): if intents is not None and intent not in intents: continue for regex in regexes: match = regex.match(processed_text) if match is None: continue parsed_intent = intent_classification_result( intent_name=intent, probability=1.0) slots = [] for group_name in match.groupdict(): slot_name = self.group_names_to_slot_names[group_name] entity = self.slot_names_to_entities[slot_name] rng = (match.start(group_name), match.end(group_name)) value = match.group(group_name) if rng in ranges_mapping: rng = ranges_mapping[rng] value = text[rng[START]:rng[END]] else: rng = {START: rng[0], END: rng[1]} parsed_slot = unresolved_slot(match_range=rng, value=value, entity=entity, slot_name=slot_name) slots.append(parsed_slot) parsed_slots = _deduplicate_overlapping_slots( slots, self.language) parsed_slots = sorted(parsed_slots, key=lambda s: s[RES_MATCH_RANGE][START]) return parsing_result(text, parsed_intent, parsed_slots) return empty_result(text)
def parse(self, text, intents=None): """Performs intent parsing on the provided *text* Intent and slots are extracted simultaneously through pattern matching Args: text (str): Input intents (str or list of str): If provided, reduces the scope of intent parsing to the provided list of intents Returns: dict: The matched intent, if any, along with the extracted slots. See :func:`.parsing_result` for the output format. Raises: NotTrained: When the intent parser is not fitted """ if not self.fitted: raise NotTrained("DeterministicIntentParser must be fitted") if isinstance(intents, str): intents = [intents] ranges_mapping, processed_text = _replace_builtin_entities( text, self.language) for intent, regexes in iteritems(self.regexes_per_intent): if intents is not None and intent not in intents: continue for regex in regexes: match = regex.match(processed_text) if match is None: continue parsed_intent = intent_classification_result( intent_name=intent, probability=1.0) slots = [] for group_name in match.groupdict(): slot_name = self.group_names_to_slot_names[group_name] entity = self.slot_names_to_entities[slot_name] rng = (match.start(group_name), match.end(group_name)) value = match.group(group_name) if rng in ranges_mapping: rng = ranges_mapping[rng] value = text[rng[START]:rng[END]] else: rng = {START: rng[0], END: rng[1]} parsed_slot = unresolved_slot( match_range=rng, value=value, entity=entity, slot_name=slot_name) slots.append(parsed_slot) parsed_slots = _deduplicate_overlapping_slots( slots, self.language) parsed_slots = sorted(parsed_slots, key=lambda s: s[RES_MATCH_RANGE][START]) return parsing_result(text, parsed_intent, parsed_slots) return empty_result(text)
def test_should_serialize_results_when_none_values(self): # Given input_ = "hello world" # When result = parsing_result(input=input_, intent=None, slots=None) # Then expected_result = { RES_INTENT: None, RES_SLOTS: None, RES_INPUT: input_ } self.assertDictEqual(expected_result, result)
def test_should_serialize_results_when_none_values(self): # Given input_ = "hello world" # When result = parsing_result(input=input_, intent=None, slots=None) # Then expected_result = { RES_INTENT: None, RES_SLOTS: None, RES_INPUT: input_ } self.assertDictEqual(expected_result, result)
def parse(self, text, intents=None, top_n=None): """Performs intent parsing on the provided *text* by first classifying the intent and then using the correspond slot filler to extract slots Args: text (str): input intents (str or list of str): if provided, reduces the scope of intent parsing to the provided list of intents top_n (int, optional): when provided, this method will return a list of at most top_n most likely intents, instead of a single parsing result. Note that the returned list can contain less than ``top_n`` elements, for instance when the parameter ``intents`` is not None, or when ``top_n`` is greater than the total number of intents. Returns: dict or list: the most likely intent(s) along with the extracted slots. See :func:`.parsing_result` and :func:`.extraction_result` for the output format. Raises: NotTrained: when the intent parser is not fitted """ if isinstance(intents, str): intents = {intents} elif isinstance(intents, list): intents = list(intents) if top_n is None: intent_result = self.intent_classifier.get_intent(text, intents) intent_name = intent_result[RES_INTENT_NAME] if intent_name is not None: slots = self.slot_fillers[intent_name].get_slots(text) else: slots = [] return parsing_result(text, intent_result, slots) results = [] intents_results = self.intent_classifier.get_intents(text) for intent_result in intents_results[:top_n]: intent_name = intent_result[RES_INTENT_NAME] if intent_name is not None: slots = self.slot_fillers[intent_name].get_slots(text) else: slots = [] results.append(extraction_result(intent_result, slots)) return results
def parse(self, text, intents=None): """Performs intent parsing on the provided *text* by calling its intent parsers successively Args: text (str): Input intents (str or list of str): If provided, reduces the scope of intent parsing to the provided list of intents Returns: dict: The most likely intent along with the extracted slots. See :func:`.parsing_result` for the output format. Raises: NotTrained: When the nlu engine is not fitted TypeError: When input type is not unicode """ logging.info("NLU engine parsing: '%s'...", text) if not isinstance(text, str): raise TypeError("Expected unicode but received: %s" % type(text)) if not self.fitted: raise NotTrained("SnipsNLUEngine must be fitted") if isinstance(intents, str): intents = [intents] language = self._dataset_metadata["language_code"] entities = self._dataset_metadata["entities"] for parser in self.intent_parsers: res = parser.parse(text, intents) if is_empty(res): continue slots = res[RES_SLOTS] scope = [ s[RES_ENTITY] for s in slots if is_builtin_entity(s[RES_ENTITY]) ] resolved_slots = resolve_slots(text, slots, entities, language, scope) return parsing_result(text, intent=res[RES_INTENT], slots=resolved_slots) return empty_result(text)
def test_should_use_parsers_sequentially(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: greeting1 utterances: - hello [greeted:name](john)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json input_text = "hello snips" intent = intent_classification_result(intent_name='greeting1', probability=0.7) slots = [ unresolved_slot(match_range=(6, 11), value='snips', entity='name', slot_name='greeted') ] # pylint:disable=unused-variable @IntentParser.register("first_intent_parser", True) class FirstIntentParser(MockIntentParser): pass @IntentParser.register("second_intent_parser", True) class SecondIntentParser(MockIntentParser): def parse(self, text, intents=None, top_n=None): if text == input_text: return parsing_result(text, intent, slots) return empty_result(text, 1.0) # pylint:enable=unused-variable config = NLUEngineConfig( ["first_intent_parser", "second_intent_parser"]) engine = SnipsNLUEngine(config).fit(dataset) # When parse = engine.parse(input_text) # Then expected_slots = [custom_slot(s) for s in slots] expected_parse = parsing_result(input_text, intent, expected_slots) self.assertDictEqual(expected_parse, parse)
def parse(self, text, intents=None): """Performs intent parsing on the provided *text* by calling its intent parsers successively Args: text (str): Input intents (str or list of str): If provided, reduces the scope of intent parsing to the provided list of intents Returns: dict: The most likely intent along with the extracted slots. See :func:`.parsing_result` for the output format. Raises: NotTrained: When the nlu engine is not fitted TypeError: When input type is not unicode """ if not isinstance(text, str): raise TypeError("Expected unicode but received: %s" % type(text)) if not self.fitted: raise NotTrained("SnipsNLUEngine must be fitted") if isinstance(intents, str): intents = [intents] language = self._dataset_metadata["language_code"] entities = self._dataset_metadata["entities"] for parser in self.intent_parsers: res = parser.parse(text, intents) if is_empty(res): continue slots = res[RES_SLOTS] scope = [s[RES_ENTITY] for s in slots if is_builtin_entity(s[RES_ENTITY])] resolved_slots = resolve_slots(text, slots, entities, language, scope) return parsing_result(text, intent=res[RES_INTENT], slots=resolved_slots) return empty_result(text)
def parse(self, text, intents): if text == input_text: return parsing_result(text, intent, slots) return empty_result(text)
def test_synonyms_should_point_to_base_value(self): # Given dataset = { "intents": { "dummy_intent_1": { "utterances": [ { "data": [ { "text": "dummy_1", "entity": "dummy_entity_1", "slot_name": "dummy_slot_name" } ] } ] } }, "entities": { "dummy_entity_1": { "use_synonyms": True, "automatically_extensible": False, "data": [ { "value": "dummy1", "synonyms": [ "dummy1", "dummy1_bis" ] } ], "matching_strictness": 1.0 } }, "language": "en" } mocked_intent = intent_classification_result("dummy_intent_1", 1.0) mocked_slots = [ unresolved_slot(match_range=(0, 10), value="dummy1_bis", entity="dummy_entity_1", slot_name="dummy_slot_name")] # pylint:disable=unused-variable @IntentParser.register("my_intent_parser", True) class MyIntentParser(MockIntentParser): def parse(self, text, intents=None, top_n=None): return parsing_result(text, mocked_intent, mocked_slots) # pylint:enable=unused-variable input_ = "dummy1_bis" config = NLUEngineConfig(["my_intent_parser"]) engine = SnipsNLUEngine(config).fit(dataset) # When result = engine.parse(input_) # Then expected_slot = { RES_MATCH_RANGE: { "start": 0, "end": 10 }, RES_RAW_VALUE: "dummy1_bis", RES_VALUE: { "kind": "Custom", "value": "dummy1" }, RES_ENTITY: "dummy_entity_1", RES_SLOT_NAME: "dummy_slot_name" } expected_result = parsing_result( input_, mocked_intent, slots=[expected_slot]) self.assertEqual(expected_result, result)
def parse(self, text, intents=None, top_n=None): if text == input_text: return parsing_result(text, intent, slots) return empty_result(text, 1.0)
def test_synonyms_should_not_collide_when_remapped_to_base_value(self): # Given dataset = { "intents": { "intent1": { "utterances": [ { "data": [ { "text": "value", "entity": "entity1", "slot_name": "slot1" } ] } ] } }, "entities": { "entity1": { "data": [ { "value": "a", "synonyms": [ "favorïte" ] }, { "value": "b", "synonyms": [ "favorite" ] } ], "use_synonyms": True, "automatically_extensible": False, "matching_strictness": 1.0 } }, "language": "en", } mocked_intent = intent_classification_result("intent1", 1.0) # pylint:disable=unused-variable @IntentParser.register("my_intent_parser", True) class MyIntentParser(MockIntentParser): def parse(self, text, intents=None, top_n=None): slots = [ unresolved_slot(match_range=(0, len(text)), value=text, entity="entity1", slot_name="slot1")] return parsing_result(text, mocked_intent, slots) # pylint:enable=unused-variable config = NLUEngineConfig(["my_intent_parser"]) engine = SnipsNLUEngine(config).fit(dataset) # When result1 = engine.parse("favorite") result2 = engine.parse("favorïte") # Then expected_slot1 = { RES_MATCH_RANGE: { "start": 0, "end": 8 }, RES_RAW_VALUE: "favorite", RES_VALUE: { "kind": "Custom", "value": "b" }, RES_ENTITY: "entity1", RES_SLOT_NAME: "slot1" } expected_slot2 = { RES_MATCH_RANGE: { "start": 0, "end": 8 }, RES_RAW_VALUE: "favorïte", RES_VALUE: { "kind": "Custom", "value": "a" }, RES_ENTITY: "entity1", RES_SLOT_NAME: "slot1" } expected_result1 = parsing_result("favorite", intent=mocked_intent, slots=[expected_slot1]) expected_result2 = parsing_result("favorïte", intent=mocked_intent, slots=[expected_slot2]) self.assertEqual(expected_result1, result1) self.assertEqual(expected_result2, result2)
def parse(self, text, intents=None, top_n=None): return parsing_result(text, mocked_intent, mocked_slots)
def test_should_handle_keyword_entities(self, mocked_regex_parse, mocked_crf_parse): # Given dataset = { "snips_nlu_version": "1.1.1", "intents": { "dummy_intent_1": { "utterances": [{ "data": [{ "text": "dummy_1", "entity": "dummy_entity_1", "slot_name": "dummy_slot_name" }, { "text": " dummy_2", "entity": "dummy_entity_2", "slot_name": "other_dummy_slot_name" }] }] } }, "entities": { "dummy_entity_1": { "use_synonyms": True, "automatically_extensible": False, "data": [{ "value": "dummy1", "synonyms": ["dummy1", "dummy1_bis"] }, { "value": "dummy2", "synonyms": ["dummy2", "dummy2_bis"] }] }, "dummy_entity_2": { "use_synonyms": False, "automatically_extensible": True, "data": [{ "value": "dummy2", "synonyms": ["dummy2"] }] } }, "language": "en" } text = "dummy_3 dummy_4" mocked_crf_intent = intent_classification_result("dummy_intent_1", 1.0) mocked_crf_slots = [ unresolved_slot(match_range=(0, 7), value="dummy_3", entity="dummy_entity_1", slot_name="dummy_slot_name"), unresolved_slot(match_range=(8, 15), value="dummy_4", entity="dummy_entity_2", slot_name="other_dummy_slot_name") ] mocked_regex_parse.return_value = empty_result(text) mocked_crf_parse.return_value = parsing_result(text, mocked_crf_intent, mocked_crf_slots) engine = SnipsNLUEngine() # When engine = engine.fit(dataset) result = engine.parse(text) # Then expected_slot = custom_slot( unresolved_slot(match_range=(8, 15), value="dummy_4", entity="dummy_entity_2", slot_name="other_dummy_slot_name")) expected_result = parsing_result(text, intent=mocked_crf_intent, slots=[expected_slot]) self.assertEqual(expected_result, result)
def test_should_handle_keyword_entities(self, mocked_regex_parse, mocked_crf_parse): # Given dataset = { "snips_nlu_version": "1.1.1", "intents": { "dummy_intent_1": { "utterances": [ { "data": [ { "text": "dummy_1", "entity": "dummy_entity_1", "slot_name": "dummy_slot_name" }, { "text": " dummy_2", "entity": "dummy_entity_2", "slot_name": "other_dummy_slot_name" } ] } ] } }, "entities": { "dummy_entity_1": { "use_synonyms": True, "automatically_extensible": False, "data": [ { "value": "dummy1", "synonyms": [ "dummy1", "dummy1_bis" ] }, { "value": "dummy2", "synonyms": [ "dummy2", "dummy2_bis" ] } ] }, "dummy_entity_2": { "use_synonyms": False, "automatically_extensible": True, "data": [ { "value": "dummy2", "synonyms": [ "dummy2" ] } ] } }, "language": "en" } text = "dummy_3 dummy_4" mocked_crf_intent = intent_classification_result("dummy_intent_1", 1.0) mocked_crf_slots = [unresolved_slot(match_range=(0, 7), value="dummy_3", entity="dummy_entity_1", slot_name="dummy_slot_name"), unresolved_slot(match_range=(8, 15), value="dummy_4", entity="dummy_entity_2", slot_name="other_dummy_slot_name")] mocked_regex_parse.return_value = empty_result(text) mocked_crf_parse.return_value = parsing_result( text, mocked_crf_intent, mocked_crf_slots) engine = SnipsNLUEngine() # When engine = engine.fit(dataset) result = engine.parse(text) # Then expected_slot = custom_slot(unresolved_slot( match_range=(8, 15), value="dummy_4", entity="dummy_entity_2", slot_name="other_dummy_slot_name")) expected_result = parsing_result(text, intent=mocked_crf_intent, slots=[expected_slot]) self.assertEqual(expected_result, result)
def test_synonyms_should_point_to_base_value(self, mocked_deter_parse, mocked_proba_parse): # Given dataset = { "snips_nlu_version": "1.1.1", "intents": { "dummy_intent_1": { "utterances": [ { "data": [ { "text": "dummy_1", "entity": "dummy_entity_1", "slot_name": "dummy_slot_name" } ] } ] } }, "entities": { "dummy_entity_1": { "use_synonyms": True, "automatically_extensible": False, "data": [ { "value": "dummy1", "synonyms": [ "dummy1", "dummy1_bis" ] } ] } }, "language": "en" } text = "dummy1_bis" mocked_proba_parser_intent = intent_classification_result( "dummy_intent_1", 1.0) mocked_proba_parser_slots = [ unresolved_slot(match_range=(0, 10), value="dummy1_bis", entity="dummy_entity_1", slot_name="dummy_slot_name")] mocked_deter_parse.return_value = empty_result(text) mocked_proba_parse.return_value = parsing_result( text, mocked_proba_parser_intent, mocked_proba_parser_slots) engine = SnipsNLUEngine().fit(dataset) # When result = engine.parse(text) # Then expected_slot = { RES_MATCH_RANGE: { "start": 0, "end": 10 }, RES_RAW_VALUE: "dummy1_bis", RES_VALUE: { "kind": "Custom", "value": "dummy1" }, RES_ENTITY: "dummy_entity_1", RES_SLOT_NAME: "dummy_slot_name" } expected_result = parsing_result( text, intent=mocked_proba_parser_intent, slots=[expected_slot]) self.assertEqual(expected_result, result)
def test_should_use_parsers_sequentially(self): # Given input_text = "hello world" intent = intent_classification_result(intent_name='dummy_intent_1', probability=0.7) slots = [ unresolved_slot(match_range=(6, 11), value='world', entity='mocked_entity', slot_name='mocked_slot_name') ] class FirstIntentParserConfig(ProcessingUnitConfig): unit_name = "first_intent_parser" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return FirstIntentParserConfig() def get_required_resources(self): return None class FirstIntentParser(IntentParser): unit_name = "first_intent_parser" config_type = FirstIntentParserConfig def fit(self, dataset, force_retrain): self._fitted = True return self @property def fitted(self): return hasattr(self, '_fitted') and self._fitted def parse(self, text, intents): return empty_result(text) def persist(self, path): path = Path(path) path.mkdir() with (path / "metadata.json").open(mode="w") as f: f.write(json_string({"unit_name": self.unit_name})) @classmethod def from_path(cls, path): cfg = cls.config_type() return cls(cfg) class SecondIntentParserConfig(ProcessingUnitConfig): unit_name = "second_intent_parser" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return SecondIntentParserConfig() def get_required_resources(self): return None class SecondIntentParser(IntentParser): unit_name = "second_intent_parser" config_type = SecondIntentParserConfig def fit(self, dataset, force_retrain): self._fitted = True return self @property def fitted(self): return hasattr(self, '_fitted') and self._fitted def parse(self, text, intents): if text == input_text: return parsing_result(text, intent, slots) return empty_result(text) def persist(self, path): path = Path(path) path.mkdir() with (path / "metadata.json").open(mode="w") as f: f.write(json_string({"unit_name": self.unit_name})) @classmethod def from_path(cls, path): cfg = cls.config_type() return cls(cfg) register_processing_unit(FirstIntentParser) register_processing_unit(SecondIntentParser) mocked_dataset_metadata = { "language_code": "en", "entities": { "mocked_entity": { "automatically_extensible": True, "utterances": dict() } }, "slot_name_mappings": { "dummy_intent_1": { "mocked_slot_name": "mocked_entity" } } } config = NLUEngineConfig( [FirstIntentParserConfig(), SecondIntentParserConfig()]) engine = SnipsNLUEngine(config).fit(SAMPLE_DATASET) # pylint:disable=protected-access engine._dataset_metadata = mocked_dataset_metadata # pylint:enable=protected-access # When parse = engine.parse(input_text) # Then expected_slots = [custom_slot(s) for s in slots] expected_parse = parsing_result(input_text, intent, expected_slots) self.assertDictEqual(expected_parse, parse)
def parse(self, text, intents): if text == input_text: return parsing_result(text, intent, slots) return empty_result(text)
def test_should_use_parsers_sequentially(self): # Given input_text = "hello world" intent = intent_classification_result( intent_name='dummy_intent_1', probability=0.7) slots = [unresolved_slot(match_range=(6, 11), value='world', entity='mocked_entity', slot_name='mocked_slot_name')] class TestIntentParser1Config(ProcessingUnitConfig): unit_name = "test_intent_parser1" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return TestIntentParser1Config() class TestIntentParser1(IntentParser): unit_name = "test_intent_parser1" config_type = TestIntentParser1Config def fit(self, dataset, force_retrain): self._fitted = True return self @property def fitted(self): return hasattr(self, '_fitted') and self._fitted def parse(self, text, intents): return empty_result(text) def to_dict(self): return { "unit_name": self.unit_name, } @classmethod def from_dict(cls, unit_dict): conf = cls.config_type() return TestIntentParser1(conf) class TestIntentParser2Config(ProcessingUnitConfig): unit_name = "test_intent_parser2" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return TestIntentParser2Config() class TestIntentParser2(IntentParser): unit_name = "test_intent_parser2" config_type = TestIntentParser2Config def fit(self, dataset, force_retrain): self._fitted = True return self @property def fitted(self): return hasattr(self, '_fitted') and self._fitted def parse(self, text, intents): if text == input_text: return parsing_result(text, intent, slots) return empty_result(text) def to_dict(self): return { "unit_name": self.unit_name, } @classmethod def from_dict(cls, unit_dict): conf = cls.config_type() return TestIntentParser2(conf) register_processing_unit(TestIntentParser1) register_processing_unit(TestIntentParser2) mocked_dataset_metadata = { "language_code": "en", "entities": { "mocked_entity": { "automatically_extensible": True, "utterances": dict() } }, "slot_name_mappings": { "dummy_intent_1": { "mocked_slot_name": "mocked_entity" } } } config = NLUEngineConfig([TestIntentParser1Config(), TestIntentParser2Config()]) engine = SnipsNLUEngine(config).fit(SAMPLE_DATASET) # pylint:disable=protected-access engine._dataset_metadata = mocked_dataset_metadata # pylint:enable=protected-access # When parse = engine.parse(input_text) # Then expected_slots = [custom_slot(s) for s in slots] expected_parse = parsing_result(input_text, intent, expected_slots) self.assertDictEqual(expected_parse, parse)
def test_should_use_parsers_sequentially(self): # Given input_text = "hello world" intent = intent_classification_result(intent_name='dummy_intent_1', probability=0.7) slots = [ unresolved_slot(match_range=(6, 11), value='world', entity='mocked_entity', slot_name='mocked_slot_name') ] class TestIntentParser1Config(ProcessingUnitConfig): unit_name = "test_intent_parser1" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return TestIntentParser1Config() class TestIntentParser1(IntentParser): unit_name = "test_intent_parser1" config_type = TestIntentParser1Config def fit(self, dataset, force_retrain): self._fitted = True return self @property def fitted(self): return hasattr(self, '_fitted') and self._fitted def parse(self, text, intents): return empty_result(text) def to_dict(self): return { "unit_name": self.unit_name, } @classmethod def from_dict(cls, unit_dict): conf = cls.config_type() return TestIntentParser1(conf) class TestIntentParser2Config(ProcessingUnitConfig): unit_name = "test_intent_parser2" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return TestIntentParser2Config() class TestIntentParser2(IntentParser): unit_name = "test_intent_parser2" config_type = TestIntentParser2Config def fit(self, dataset, force_retrain): self._fitted = True return self @property def fitted(self): return hasattr(self, '_fitted') and self._fitted def parse(self, text, intents): if text == input_text: return parsing_result(text, intent, slots) return empty_result(text) def to_dict(self): return { "unit_name": self.unit_name, } @classmethod def from_dict(cls, unit_dict): conf = cls.config_type() return TestIntentParser2(conf) register_processing_unit(TestIntentParser1) register_processing_unit(TestIntentParser2) mocked_dataset_metadata = { "language_code": "en", "entities": { "mocked_entity": { "automatically_extensible": True, "utterances": dict() } }, "slot_name_mappings": { "dummy_intent_1": { "mocked_slot_name": "mocked_entity" } } } config = NLUEngineConfig( [TestIntentParser1Config(), TestIntentParser2Config()]) engine = SnipsNLUEngine(config).fit(SAMPLE_DATASET) # pylint:disable=protected-access engine._dataset_metadata = mocked_dataset_metadata # pylint:enable=protected-access # When parse = engine.parse(input_text) # Then expected_slots = [custom_slot(s) for s in slots] expected_parse = parsing_result(input_text, intent, expected_slots) self.assertDictEqual(expected_parse, parse)
def test_synonyms_should_not_collide_when_remapped_to_base_value( self, mocked_proba_parse): # Given # Given dataset = { "intents": { "intent1": { "utterances": [{ "data": [{ "text": "value", "entity": "entity1", "slot_name": "slot1" }] }] } }, "entities": { "entity1": { "data": [{ "value": "a", "synonyms": ["favorïte"] }, { "value": "b", "synonyms": ["favorite"] }], "use_synonyms": True, "automatically_extensible": False } }, "language": "en", } mocked_proba_parser_intent = intent_classification_result( "intent1", 1.0) def mock_proba_parse(text, intents): slots = [ unresolved_slot(match_range=(0, len(text)), value=text, entity="entity1", slot_name="slot1") ] return parsing_result(text, mocked_proba_parser_intent, slots) mocked_proba_parse.side_effect = mock_proba_parse config = NLUEngineConfig([ProbabilisticIntentParserConfig()]) engine = SnipsNLUEngine(config).fit(dataset) # When result1 = engine.parse("favorite") result2 = engine.parse("favorïte") # Then expected_slot1 = { RES_MATCH_RANGE: { "start": 0, "end": 8 }, RES_RAW_VALUE: "favorite", RES_VALUE: { "kind": "Custom", "value": "b" }, RES_ENTITY: "entity1", RES_SLOT_NAME: "slot1" } expected_slot2 = { RES_MATCH_RANGE: { "start": 0, "end": 8 }, RES_RAW_VALUE: "favorïte", RES_VALUE: { "kind": "Custom", "value": "a" }, RES_ENTITY: "entity1", RES_SLOT_NAME: "slot1" } expected_result1 = parsing_result("favorite", intent=mocked_proba_parser_intent, slots=[expected_slot1]) expected_result2 = parsing_result("favorïte", intent=mocked_proba_parser_intent, slots=[expected_slot2]) self.assertEqual(expected_result1, result1) self.assertEqual(expected_result2, result2)
def test_synonyms_should_point_to_base_value(self, mocked_deter_parse, mocked_proba_parse): # Given dataset = { "snips_nlu_version": "1.1.1", "intents": { "dummy_intent_1": { "utterances": [{ "data": [{ "text": "dummy_1", "entity": "dummy_entity_1", "slot_name": "dummy_slot_name" }] }] } }, "entities": { "dummy_entity_1": { "use_synonyms": True, "automatically_extensible": False, "data": [{ "value": "dummy1", "synonyms": ["dummy1", "dummy1_bis"] }] } }, "language": "en" } text = "dummy1_bis" mocked_proba_parser_intent = intent_classification_result( "dummy_intent_1", 1.0) mocked_proba_parser_slots = [ unresolved_slot(match_range=(0, 10), value="dummy1_bis", entity="dummy_entity_1", slot_name="dummy_slot_name") ] mocked_deter_parse.return_value = empty_result(text) mocked_proba_parse.return_value = parsing_result( text, mocked_proba_parser_intent, mocked_proba_parser_slots) engine = SnipsNLUEngine().fit(dataset) # When result = engine.parse(text) # Then expected_slot = { RES_MATCH_RANGE: { "start": 0, "end": 10 }, RES_RAW_VALUE: "dummy1_bis", RES_VALUE: { "kind": "Custom", "value": "dummy1" }, RES_ENTITY: "dummy_entity_1", RES_SLOT_NAME: "dummy_slot_name" } expected_result = parsing_result(text, intent=mocked_proba_parser_intent, slots=[expected_slot]) self.assertEqual(expected_result, result)
def test_should_be_deserializable(self): # Given parser_dict = { "config": { "unit_name": "lookup_intent_parser", "ignore_stop_words": True }, "language_code": "en", "map": { hash_str("make coffee"): [0, []], hash_str("prepare % snipsnumber % coffees"): [0, [0]], hash_str("% snipsnumber % teas at % snipstemperature %"): [1, [0, 1]], }, "slots_names": ["nb_cups", "tea_temperature"], "intents_names": ["MakeCoffee", "MakeTea"], "entity_scopes": [ { "entity_scope": { "builtin": ["snips/number"], "custom": [], }, "intent_group": ["MakeCoffee"] }, { "entity_scope": { "builtin": ["snips/number", "snips/temperature"], "custom": [], }, "intent_group": ["MakeTea"] }, ], "stop_words_whitelist": dict() } self.tmp_file_path.mkdir() metadata = {"unit_name": "lookup_intent_parser"} self.writeJsonContent( self.tmp_file_path / "intent_parser.json", parser_dict) self.writeJsonContent(self.tmp_file_path / "metadata.json", metadata) resources = self.get_resources("en") builtin_entity_parser = BuiltinEntityParser.build(language="en") custom_entity_parser = EntityParserMock() # When parser = LookupIntentParser.from_path( self.tmp_file_path, custom_entity_parser=custom_entity_parser, builtin_entity_parser=builtin_entity_parser, resources=resources) res_make_coffee = parser.parse("make me a coffee") res_make_tea = parser.parse("two teas at 90°C please") # Then expected_result_coffee = parsing_result( input="make me a coffee", intent=intent_classification_result("MakeCoffee", 1.0), slots=[]) expected_result_tea = parsing_result( input="two teas at 90°C please", intent=intent_classification_result("MakeTea", 1.0), slots=[ { "entity": "snips/number", "range": {"end": 3, "start": 0}, "slotName": "nb_cups", "value": "two" }, { "entity": "snips/temperature", "range": {"end": 16, "start": 12}, "slotName": "tea_temperature", "value": "90°C" } ]) self.assertEqual(expected_result_coffee, res_make_coffee) self.assertEqual(expected_result_tea, res_make_tea)