def get_intents(self, text): return [ intent_classification_result("intent2", 0.6), intent_classification_result("intent1", 0.2), intent_classification_result(None, 0.15), intent_classification_result("intent3", 0.05) ]
def _get_intents(self, text, intents_filter): if isinstance(intents_filter, str): intents_filter = {intents_filter} elif isinstance(intents_filter, list): intents_filter = set(intents_filter) if not text or not self.intent_list or not self.featurizer: results = [intent_classification_result(None, 1.0)] results += [intent_classification_result(i, 0.0) for i in self.intent_list if i is not None] return results if len(self.intent_list) == 1: return [intent_classification_result(self.intent_list[0], 1.0)] # pylint: disable=C0103 X = self.featurizer.transform([text_to_utterance(text)]) # pylint: enable=C0103 proba_vec = self._predict_proba(X) logger.debug( "%s", DifferedLoggingMessage(self.log_activation_weights, text, X)) results = [ intent_classification_result(i, proba) for i, proba in zip(self.intent_list, proba_vec[0]) if intents_filter is None or i is None or i in intents_filter] return sorted(results, key=lambda res: -res[RES_PROBA])
def test_should_get_intents(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: greeting1 utterances: - hello --- type: intent name: greeting2 utterances: - how are you""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json input_text = "hello world" # pylint:disable=unused-variable @IntentParser.register("first_intent_parser", True) class FirstIntentParser(MockIntentParser): def get_intents(self, text): return [ intent_classification_result("greeting1", 0.5), intent_classification_result("greeting2", 0.3), intent_classification_result(None, 0.2) ] @IntentParser.register("second_intent_parser", True) class SecondIntentParser(MockIntentParser): def get_intents(self, text): return [ intent_classification_result("greeting2", 0.6), intent_classification_result("greeting1", 0.2), intent_classification_result(None, 0.1) ] # pylint:enable=unused-variable config = NLUEngineConfig(["first_intent_parser", "second_intent_parser"]) engine = SnipsNLUEngine(config).fit(dataset) # When res_intents = engine.get_intents(input_text) # Then expected_intents = [ intent_classification_result("greeting2", 0.6), intent_classification_result("greeting1", 0.5), intent_classification_result(None, 0.2) ] self.assertListEqual(expected_intents, res_intents)
def test_should_parse_intent_with_ambivalent_words(self): # Given slots_dataset_stream = io.StringIO(""" --- type: intent name: give_flower utterances: - give a rose to [name](emily) - give a daisy to [name](tom) - give a tulip to [name](daisy) """) dataset = Dataset.from_yaml_files("en", [slots_dataset_stream]).json parser = LookupIntentParser().fit(dataset) text = "give a daisy to emily" # When parsing = parser.parse(text) # Then expected_intent = intent_classification_result( intent_name="give_flower", probability=1.0) expected_slots = [ { "entity": "name", "range": {"end": 21, "start": 16}, "slotName": "name", "value": "emily" } ] self.assertDictEqual(expected_intent, parsing[RES_INTENT]) self.assertListEqual(expected_slots, parsing[RES_SLOTS])
def test_should_serialize_results(self): # Given input_ = "hello world" intent = intent_classification_result("world", 0.5) slots = [unresolved_slot([3, 5], "slot_value", "slot_entity", "slot_name")] # When result = parsing_result(input=input_, intent=intent, slots=slots) # Then msg = "Result dict should be json serializable" with self.fail_if_exception(msg): json.dumps(result) expected_result = { RES_INTENT: {RES_INTENT_NAME: 'world', RES_PROBABILITY: 0.5}, RES_SLOTS: [{RES_MATCH_RANGE: {"start": 3, "end": 5}, RES_ENTITY: 'slot_entity', RES_SLOT_NAME: 'slot_name', RES_VALUE: 'slot_value'}], RES_INPUT: input_} self.assertDictEqual(expected_result, result)
def test_should_parse_slightly_ambiguous_utterances(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: intent_1 utterances: - call tomorrow --- type: intent name: intent_2 utterances: - call [time:snips/datetime](today)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json parser = DeterministicIntentParser().fit(dataset) text = "call tomorrow" # When res = parser.parse(text) # Then expected_intent = intent_classification_result(intent_name="intent_1", probability=2. / 3.) expected_result = parsing_result(text, expected_intent, []) self.assertEqual(expected_result, res)
def test_should_parse_intent(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: intent1 utterances: - foo bar baz --- type: intent name: intent2 utterances: - foo bar ban""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json parser = DeterministicIntentParser().fit(dataset) text = "foo bar ban" # When parsing = parser.parse(text) # Then probability = 1.0 expected_intent = intent_classification_result(intent_name="intent2", probability=probability) self.assertEqual(expected_intent, parsing[RES_INTENT])
def test_should_serialize_results(self): # Given input_ = "hello world" intent = intent_classification_result("world", 0.5) slots = [ unresolved_slot([3, 5], "slot_value", "slot_entity", "slot_name") ] # When result = parsing_result(input=input_, intent=intent, slots=slots) # Then msg = "Result dict should be json serializable" with self.fail_if_exception(msg): json.dumps(result) expected_result = { RES_INTENT: { RES_INTENT_NAME: 'world', RES_PROBA: 0.5 }, RES_SLOTS: [{ RES_MATCH_RANGE: { "start": 3, "end": 5 }, RES_ENTITY: 'slot_entity', RES_SLOT_NAME: 'slot_name', RES_VALUE: 'slot_value' }], RES_INPUT: input_ } self.assertDictEqual(expected_result, result)
def test_empty_vocabulary_should_fit_and_return_none_intent( self, mocked_build_training): # Given dataset_stream = io.StringIO(""" --- type: intent name: dummy_intent_1 utterances: - "[dummy_slot_name:dummy_entity_1](...)" --- type: entity name: dummy_entity_1 automatically_extensible: true use_synonyms: false matching_strictness: 1.0 values: - ... """) dataset = Dataset.from_yaml_files("en", [dataset_stream]).json text = " " noise_size = 6 utterances = [text] + [text] * noise_size utterances = [text_to_utterance(t) for t in utterances] labels = [0] + [1] * noise_size intent_list = ["dummy_intent_1", None] mocked_build_training.return_value = utterances, labels, intent_list # When / Then intent_classifier = LogRegIntentClassifier().fit(dataset) intent = intent_classifier.get_intent("no intent there") self.assertEqual(intent_classification_result(None, 1.0), intent)
def test_should_get_none_intent_when_empty_input(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: my_first_intent utterances: - how are you - hello how are you? - what's up --- type: intent name: my_second_intent utterances: - what is the weather today ? - does it rain - will it rain tomorrow""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json classifier = LogRegIntentClassifier().fit(dataset) text = "" # When result = classifier.get_intent(text) # Then self.assertEqual(intent_classification_result(None, 1.0), result)
def _get_matching_result(self, text, processed_text, regex, intent, builtin_entities_ranges_mapping=None): found_result = regex.match(processed_text) if found_result is None: return None parsed_intent = intent_classification_result(intent_name=intent, probability=1.0) slots = [] for group_name in found_result.groupdict(): slot_name = self.group_names_to_slot_names[group_name] entity = self.slot_names_to_entities[intent][slot_name] rng = (found_result.start(group_name), found_result.end(group_name)) if builtin_entities_ranges_mapping is not None: if rng in builtin_entities_ranges_mapping: rng = builtin_entities_ranges_mapping[rng] else: shift = _get_range_shift( rng, builtin_entities_ranges_mapping) rng = {START: rng[0] + shift, END: rng[1] + shift} else: rng = {START: rng[0], END: rng[1]} value = text[rng[START]:rng[END]] parsed_slot = unresolved_slot( match_range=rng, value=value, entity=entity, slot_name=slot_name) slots.append(parsed_slot) parsed_slots = _deduplicate_overlapping_slots( slots, self.language) parsed_slots = sorted(parsed_slots, key=lambda s: s[RES_MATCH_RANGE][START]) return parsing_result(text, parsed_intent, parsed_slots)
def test_should_parse_top_intents(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: intent1 utterances: - hello world --- type: intent name: intent2 utterances: - foo bar""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json parser = DeterministicIntentParser().fit(dataset) text = "hello world" # When results = parser.parse(text, top_n=3) # Then expected_intent = intent_classification_result(intent_name="intent1", probability=1.0) expected_results = [extraction_result(expected_intent, [])] self.assertEqual(expected_results, results)
def parse(self, text, intents=None): """Performs intent parsing on the provided *text* Intent and slots are extracted simultaneously through pattern matching Args: text (str): Input intents (str or list of str): If provided, reduces the scope of intent parsing to the provided list of intents Returns: dict: The matched intent, if any, along with the extracted slots. See :func:`.parsing_result` for the output format. Raises: NotTrained: When the intent parser is not fitted """ if not self.fitted: raise NotTrained("DeterministicIntentParser must be fitted") if isinstance(intents, str): intents = [intents] ranges_mapping, processed_text = _replace_builtin_entities( text, self.language) for intent, regexes in iteritems(self.regexes_per_intent): if intents is not None and intent not in intents: continue for regex in regexes: match = regex.match(processed_text) if match is None: continue parsed_intent = intent_classification_result( intent_name=intent, probability=1.0) slots = [] for group_name in match.groupdict(): slot_name = self.group_names_to_slot_names[group_name] entity = self.slot_names_to_entities[slot_name] rng = (match.start(group_name), match.end(group_name)) value = match.group(group_name) if rng in ranges_mapping: rng = ranges_mapping[rng] value = text[rng[START]:rng[END]] else: rng = {START: rng[0], END: rng[1]} parsed_slot = unresolved_slot(match_range=rng, value=value, entity=entity, slot_name=slot_name) slots.append(parsed_slot) parsed_slots = _deduplicate_overlapping_slots( slots, self.language) parsed_slots = sorted(parsed_slots, key=lambda s: s[RES_MATCH_RANGE][START]) return parsing_result(text, parsed_intent, parsed_slots) return empty_result(text)
def get_intents(self, text): """Returns the list of intents ordered by decreasing probability The length of the returned list is exactly the number of intents in the dataset + 1 for the None intent """ nb_intents = len(self.regexes_per_intent) top_intents = [intent_result[RES_INTENT] for intent_result in self._parse_top_intents(text, top_n=nb_intents)] matched_intents = {res[RES_INTENT_NAME] for res in top_intents} for intent in self.regexes_per_intent: if intent not in matched_intents: top_intents.append(intent_classification_result(intent, 0.0)) # The None intent is not included in the regex patterns and is thus # never matched by the deterministic parser top_intents.append(intent_classification_result(None, 0.0)) return top_intents
def get_intent(self, text, intents_filter=None): """Performs intent classification on the provided *text* Args: text (str): Input intents_filter (str or list of str): When defined, it will find the most likely intent among the list, otherwise it will use the whole list of intents defined in the dataset Returns: dict or None: The most likely intent along with its probability or *None* if no intent was found Raises: NotTrained: When the intent classifier is not fitted """ if not self.fitted: raise NotTrained('LogRegIntentClassifier must be fitted') if isinstance(intents_filter, str): intents_filter = [intents_filter] if not text or not self.intent_list \ or self.featurizer is None or self.classifier is None: return None if len(self.intent_list) == 1: if self.intent_list[0] is None: return None return intent_classification_result(self.intent_list[0], 1.0) # pylint: disable=C0103 X = self.featurizer.transform([text_to_utterance(text)]) # pylint: enable=C0103 proba_vec = self._predict_proba(X, intents_filter=intents_filter) intents_probas = sorted(zip(self.intent_list, proba_vec[0]), key=lambda p: -p[1]) for intent, proba in intents_probas: if intent is None: return None if intents_filter is None or intent in intents_filter: return intent_classification_result(intent, proba) return None
def parse(self, text, intents=None): """Performs intent parsing on the provided *text* Intent and slots are extracted simultaneously through pattern matching Args: text (str): Input intents (str or list of str): If provided, reduces the scope of intent parsing to the provided list of intents Returns: dict: The matched intent, if any, along with the extracted slots. See :func:`.parsing_result` for the output format. Raises: NotTrained: When the intent parser is not fitted """ if not self.fitted: raise NotTrained("DeterministicIntentParser must be fitted") if isinstance(intents, str): intents = [intents] ranges_mapping, processed_text = _replace_builtin_entities( text, self.language) for intent, regexes in iteritems(self.regexes_per_intent): if intents is not None and intent not in intents: continue for regex in regexes: match = regex.match(processed_text) if match is None: continue parsed_intent = intent_classification_result( intent_name=intent, probability=1.0) slots = [] for group_name in match.groupdict(): slot_name = self.group_names_to_slot_names[group_name] entity = self.slot_names_to_entities[slot_name] rng = (match.start(group_name), match.end(group_name)) value = match.group(group_name) if rng in ranges_mapping: rng = ranges_mapping[rng] value = text[rng[START]:rng[END]] else: rng = {START: rng[0], END: rng[1]} parsed_slot = unresolved_slot( match_range=rng, value=value, entity=entity, slot_name=slot_name) slots.append(parsed_slot) parsed_slots = _deduplicate_overlapping_slots( slots, self.language) parsed_slots = sorted(parsed_slots, key=lambda s: s[RES_MATCH_RANGE][START]) return parsing_result(text, parsed_intent, parsed_slots) return empty_result(text)
def test_should_parse_top_intents(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: intent1 utterances: - meeting tomorrow --- type: intent name: intent2 utterances: - meeting [time:snips/datetime](today)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json parser = DeterministicIntentParser().fit(dataset) text = "meeting tomorrow" # When results = parser.parse(text, top_n=3) # Then slot = { "entity": "snips/datetime", "range": { "end": 16, "start": 8 }, "slotName": "time", "value": "tomorrow" } expected_results = [ extraction_result( intent_classification_result(intent_name="intent1", probability=0.5), []), extraction_result( intent_classification_result(intent_name="intent2", probability=0.5), [slot]) ] results = sorted(results, key=lambda r: r[RES_INTENT][RES_INTENT_NAME]) self.assertEqual(expected_results, results)
def test_should_get_none_intent_when_empty_dataset(self): # Given dataset = get_empty_dataset(LANGUAGE_EN) classifier = LogRegIntentClassifier().fit(dataset) text = "this is a dummy query" # When intent = classifier.get_intent(text) # Then expected_intent = intent_classification_result(None, 1.0) self.assertEqual(intent, expected_intent)
def test_should_parse_stop_words_slots(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: search utterances: - search - search [search_object](this) - search [search_object](a cat) --- type: entity name: search_object values: - [this thing, that] """) resources = self.get_resources("en") resources[STOP_WORDS] = {"a", "this", "that"} dataset = Dataset.from_yaml_files("en", [dataset_stream]).json parser_config = DeterministicIntentParserConfig(ignore_stop_words=True) parser = DeterministicIntentParser(config=parser_config, resources=resources) parser.fit(dataset) # When res_1 = parser.parse("search this") res_2 = parser.parse("search that") # Then expected_intent = intent_classification_result(intent_name="search", probability=1.0) expected_slots_1 = [ unresolved_slot(match_range=(7, 11), value="this", entity="search_object", slot_name="search_object") ] expected_slots_2 = [ unresolved_slot(match_range=(7, 11), value="that", entity="search_object", slot_name="search_object") ] self.assertEqual(expected_intent, res_1[RES_INTENT]) self.assertEqual(expected_intent, res_2[RES_INTENT]) self.assertListEqual(expected_slots_1, res_1[RES_SLOTS]) self.assertListEqual(expected_slots_2, res_2[RES_SLOTS])
def test_should_get_intent(self): # Given dataset = validate_and_format_dataset(self.slots_dataset) parser = DeterministicIntentParser().fit(dataset) text = "this is a dummy_a query with another dummy_c at 10p.m. or " \ "at 12p.m." # When parsing = parser.parse(text) # Then probability = 1.0 expected_intent = intent_classification_result( intent_name="dummy_intent_1", probability=probability) self.assertEqual(expected_intent, parsing[RES_INTENT])
def test_should_parse_intent_with_stop_words(self, mock_get_stop_words): # Given mock_get_stop_words.return_value = {"a", "hey"} dataset = self.slots_dataset config = DeterministicIntentParserConfig(ignore_stop_words=True) parser = DeterministicIntentParser(config).fit(dataset) text = "Hey this is dummy_a query with another dummy_c at 10p.m. or " \ "at 12p.m." # When parsing = parser.parse(text) # Then probability = 1.0 expected_intent = intent_classification_result( intent_name="dummy_intent_1", probability=probability) self.assertEqual(expected_intent, parsing[RES_INTENT])
def test_should_use_parsers_sequentially(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: greeting1 utterances: - hello [greeted:name](john)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json input_text = "hello snips" intent = intent_classification_result(intent_name='greeting1', probability=0.7) slots = [ unresolved_slot(match_range=(6, 11), value='snips', entity='name', slot_name='greeted') ] # pylint:disable=unused-variable @IntentParser.register("first_intent_parser", True) class FirstIntentParser(MockIntentParser): pass @IntentParser.register("second_intent_parser", True) class SecondIntentParser(MockIntentParser): def parse(self, text, intents=None, top_n=None): if text == input_text: return parsing_result(text, intent, slots) return empty_result(text, 1.0) # pylint:enable=unused-variable config = NLUEngineConfig( ["first_intent_parser", "second_intent_parser"]) engine = SnipsNLUEngine(config).fit(dataset) # When parse = engine.parse(input_text) # Then expected_slots = [custom_slot(s) for s in slots] expected_parse = parsing_result(input_text, intent, expected_slots) self.assertDictEqual(expected_parse, parse)
def test_should_parse_intent_after_deserialization(self): # Given dataset = self.slots_dataset shared = self.get_shared_data(dataset) parser = DeterministicIntentParser(**shared).fit(dataset) parser.persist(self.tmp_file_path) deserialized_parser = DeterministicIntentParser.from_path( self.tmp_file_path, **shared) text = "this is a dummy_a query with another dummy_c at 10p.m. or " \ "at 12p.m." # When parsing = deserialized_parser.parse(text) # Then probability = 1.0 expected_intent = intent_classification_result( intent_name="dummy_intent_1", probability=probability) self.assertEqual(expected_intent, parsing[RES_INTENT])
def test_should_parse_intent_with_duplicated_slot_names(self): # Given slots_dataset_stream = io.StringIO(""" --- type: intent name: math_operation slots: - name: number entity: snips/number utterances: - what is [number](one) plus [number](one)""") dataset = Dataset.from_yaml_files("en", [slots_dataset_stream]).json parser = DeterministicIntentParser().fit(dataset) text = "what is one plus one" # When parsing = parser.parse(text) # Then probability = 1.0 expected_intent = intent_classification_result( intent_name="math_operation", probability=probability) expected_slots = [{ "entity": "snips/number", "range": { "end": 11, "start": 8 }, "slotName": "number", "value": "one" }, { "entity": "snips/number", "range": { "end": 20, "start": 17 }, "slotName": "number", "value": "one" }] self.assertDictEqual(expected_intent, parsing[RES_INTENT]) self.assertListEqual(expected_slots, parsing[RES_SLOTS])
def test_should_get_intent_after_deserialization(self): # Given dataset = validate_and_format_dataset(self.slots_dataset) parser = DeterministicIntentParser().fit(dataset) custom_entity_parser = parser.custom_entity_parser parser.persist(self.tmp_file_path) deserialized_parser = DeterministicIntentParser.from_path( self.tmp_file_path, builtin_entity_parser=BuiltinEntityParser.build(language="en"), custom_entity_parser=custom_entity_parser) text = "this is a dummy_a query with another dummy_c at 10p.m. or " \ "at 12p.m." # When parsing = deserialized_parser.parse(text) # Then probability = 1.0 expected_intent = intent_classification_result( intent_name="dummy_intent_1", probability=probability) self.assertEqual(expected_intent, parsing[RES_INTENT])
def _parse_map_output(self, text, output, entities, intents): """Parse the map output to the parser's result format""" intent_id, slot_ids = output intent_name = self._intents_names[intent_id] if intents is not None and intent_name not in intents: return None parsed_intent = intent_classification_result( intent_name=intent_name, probability=1.0) slots = [] # assert invariant assert len(slot_ids) == len(entities) for slot_id, entity in zip(slot_ids, entities): slot_name = self._slots_names[slot_id] rng_start = entity[RES_MATCH_RANGE][START] rng_end = entity[RES_MATCH_RANGE][END] slot_value = text[rng_start:rng_end] entity_name = entity[ENTITY_KIND] slot = unresolved_slot( [rng_start, rng_end], slot_value, entity_name, slot_name) slots.append(slot) return extraction_result(parsed_intent, slots)
def test_should_use_parsers_sequentially(self): # Given input_text = "hello world" intent = intent_classification_result( intent_name='dummy_intent_1', probability=0.7) slots = [unresolved_slot(match_range=(6, 11), value='world', entity='mocked_entity', slot_name='mocked_slot_name')] class TestIntentParser1Config(ProcessingUnitConfig): unit_name = "test_intent_parser1" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return TestIntentParser1Config() class TestIntentParser1(IntentParser): unit_name = "test_intent_parser1" config_type = TestIntentParser1Config def fit(self, dataset, force_retrain): self._fitted = True return self @property def fitted(self): return hasattr(self, '_fitted') and self._fitted def parse(self, text, intents): return empty_result(text) def to_dict(self): return { "unit_name": self.unit_name, } @classmethod def from_dict(cls, unit_dict): conf = cls.config_type() return TestIntentParser1(conf) class TestIntentParser2Config(ProcessingUnitConfig): unit_name = "test_intent_parser2" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return TestIntentParser2Config() class TestIntentParser2(IntentParser): unit_name = "test_intent_parser2" config_type = TestIntentParser2Config def fit(self, dataset, force_retrain): self._fitted = True return self @property def fitted(self): return hasattr(self, '_fitted') and self._fitted def parse(self, text, intents): if text == input_text: return parsing_result(text, intent, slots) return empty_result(text) def to_dict(self): return { "unit_name": self.unit_name, } @classmethod def from_dict(cls, unit_dict): conf = cls.config_type() return TestIntentParser2(conf) register_processing_unit(TestIntentParser1) register_processing_unit(TestIntentParser2) mocked_dataset_metadata = { "language_code": "en", "entities": { "mocked_entity": { "automatically_extensible": True, "utterances": dict() } }, "slot_name_mappings": { "dummy_intent_1": { "mocked_slot_name": "mocked_entity" } } } config = NLUEngineConfig([TestIntentParser1Config(), TestIntentParser2Config()]) engine = SnipsNLUEngine(config).fit(SAMPLE_DATASET) # pylint:disable=protected-access engine._dataset_metadata = mocked_dataset_metadata # pylint:enable=protected-access # When parse = engine.parse(input_text) # Then expected_slots = [custom_slot(s) for s in slots] expected_parse = parsing_result(input_text, intent, expected_slots) self.assertDictEqual(expected_parse, parse)
def test_synonyms_should_point_to_base_value(self, mocked_deter_parse, mocked_proba_parse): # Given dataset = { "snips_nlu_version": "1.1.1", "intents": { "dummy_intent_1": { "utterances": [{ "data": [{ "text": "dummy_1", "entity": "dummy_entity_1", "slot_name": "dummy_slot_name" }] }] } }, "entities": { "dummy_entity_1": { "use_synonyms": True, "automatically_extensible": False, "data": [{ "value": "dummy1", "synonyms": ["dummy1", "dummy1_bis"] }] } }, "language": "en" } text = "dummy1_bis" mocked_proba_parser_intent = intent_classification_result( "dummy_intent_1", 1.0) mocked_proba_parser_slots = [ unresolved_slot(match_range=(0, 10), value="dummy1_bis", entity="dummy_entity_1", slot_name="dummy_slot_name") ] mocked_deter_parse.return_value = empty_result(text) mocked_proba_parse.return_value = parsing_result( text, mocked_proba_parser_intent, mocked_proba_parser_slots) engine = SnipsNLUEngine().fit(dataset) # When result = engine.parse(text) # Then expected_slot = { RES_MATCH_RANGE: { "start": 0, "end": 10 }, RES_RAW_VALUE: "dummy1_bis", RES_VALUE: { "kind": "Custom", "value": "dummy1" }, RES_ENTITY: "dummy_entity_1", RES_SLOT_NAME: "dummy_slot_name" } expected_result = parsing_result(text, intent=mocked_proba_parser_intent, slots=[expected_slot]) self.assertEqual(expected_result, result)
def test_should_handle_keyword_entities(self, mocked_regex_parse, mocked_crf_parse): # Given dataset = { "snips_nlu_version": "1.1.1", "intents": { "dummy_intent_1": { "utterances": [{ "data": [{ "text": "dummy_1", "entity": "dummy_entity_1", "slot_name": "dummy_slot_name" }, { "text": " dummy_2", "entity": "dummy_entity_2", "slot_name": "other_dummy_slot_name" }] }] } }, "entities": { "dummy_entity_1": { "use_synonyms": True, "automatically_extensible": False, "data": [{ "value": "dummy1", "synonyms": ["dummy1", "dummy1_bis"] }, { "value": "dummy2", "synonyms": ["dummy2", "dummy2_bis"] }] }, "dummy_entity_2": { "use_synonyms": False, "automatically_extensible": True, "data": [{ "value": "dummy2", "synonyms": ["dummy2"] }] } }, "language": "en" } text = "dummy_3 dummy_4" mocked_crf_intent = intent_classification_result("dummy_intent_1", 1.0) mocked_crf_slots = [ unresolved_slot(match_range=(0, 7), value="dummy_3", entity="dummy_entity_1", slot_name="dummy_slot_name"), unresolved_slot(match_range=(8, 15), value="dummy_4", entity="dummy_entity_2", slot_name="other_dummy_slot_name") ] mocked_regex_parse.return_value = empty_result(text) mocked_crf_parse.return_value = parsing_result(text, mocked_crf_intent, mocked_crf_slots) engine = SnipsNLUEngine() # When engine = engine.fit(dataset) result = engine.parse(text) # Then expected_slot = custom_slot( unresolved_slot(match_range=(8, 15), value="dummy_4", entity="dummy_entity_2", slot_name="other_dummy_slot_name")) expected_result = parsing_result(text, intent=mocked_crf_intent, slots=[expected_slot]) self.assertEqual(expected_result, result)
def test_should_use_parsers_sequentially(self): # Given input_text = "hello world" intent = intent_classification_result(intent_name='dummy_intent_1', probability=0.7) slots = [ unresolved_slot(match_range=(6, 11), value='world', entity='mocked_entity', slot_name='mocked_slot_name') ] class TestIntentParser1Config(ProcessingUnitConfig): unit_name = "test_intent_parser1" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return TestIntentParser1Config() class TestIntentParser1(IntentParser): unit_name = "test_intent_parser1" config_type = TestIntentParser1Config def fit(self, dataset, force_retrain): self._fitted = True return self @property def fitted(self): return hasattr(self, '_fitted') and self._fitted def parse(self, text, intents): return empty_result(text) def to_dict(self): return { "unit_name": self.unit_name, } @classmethod def from_dict(cls, unit_dict): conf = cls.config_type() return TestIntentParser1(conf) class TestIntentParser2Config(ProcessingUnitConfig): unit_name = "test_intent_parser2" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return TestIntentParser2Config() class TestIntentParser2(IntentParser): unit_name = "test_intent_parser2" config_type = TestIntentParser2Config def fit(self, dataset, force_retrain): self._fitted = True return self @property def fitted(self): return hasattr(self, '_fitted') and self._fitted def parse(self, text, intents): if text == input_text: return parsing_result(text, intent, slots) return empty_result(text) def to_dict(self): return { "unit_name": self.unit_name, } @classmethod def from_dict(cls, unit_dict): conf = cls.config_type() return TestIntentParser2(conf) register_processing_unit(TestIntentParser1) register_processing_unit(TestIntentParser2) mocked_dataset_metadata = { "language_code": "en", "entities": { "mocked_entity": { "automatically_extensible": True, "utterances": dict() } }, "slot_name_mappings": { "dummy_intent_1": { "mocked_slot_name": "mocked_entity" } } } config = NLUEngineConfig( [TestIntentParser1Config(), TestIntentParser2Config()]) engine = SnipsNLUEngine(config).fit(SAMPLE_DATASET) # pylint:disable=protected-access engine._dataset_metadata = mocked_dataset_metadata # pylint:enable=protected-access # When parse = engine.parse(input_text) # Then expected_slots = [custom_slot(s) for s in slots] expected_parse = parsing_result(input_text, intent, expected_slots) self.assertDictEqual(expected_parse, parse)
def get_intent(self, text, intents_filter): if "tea" in text: return intent_classification_result("MakeTea", 1.0) elif "coffee" in text: return intent_classification_result("MakeCoffee", 1.0) return intent_classification_result(None, 1.0)
def test_synonyms_should_point_to_base_value(self, mocked_deter_parse, mocked_proba_parse): # Given dataset = { "snips_nlu_version": "1.1.1", "intents": { "dummy_intent_1": { "utterances": [ { "data": [ { "text": "dummy_1", "entity": "dummy_entity_1", "slot_name": "dummy_slot_name" } ] } ] } }, "entities": { "dummy_entity_1": { "use_synonyms": True, "automatically_extensible": False, "data": [ { "value": "dummy1", "synonyms": [ "dummy1", "dummy1_bis" ] } ] } }, "language": "en" } text = "dummy1_bis" mocked_proba_parser_intent = intent_classification_result( "dummy_intent_1", 1.0) mocked_proba_parser_slots = [ unresolved_slot(match_range=(0, 10), value="dummy1_bis", entity="dummy_entity_1", slot_name="dummy_slot_name")] mocked_deter_parse.return_value = empty_result(text) mocked_proba_parse.return_value = parsing_result( text, mocked_proba_parser_intent, mocked_proba_parser_slots) engine = SnipsNLUEngine().fit(dataset) # When result = engine.parse(text) # Then expected_slot = { RES_MATCH_RANGE: { "start": 0, "end": 10 }, RES_RAW_VALUE: "dummy1_bis", RES_VALUE: { "kind": "Custom", "value": "dummy1" }, RES_ENTITY: "dummy_entity_1", RES_SLOT_NAME: "dummy_slot_name" } expected_result = parsing_result( text, intent=mocked_proba_parser_intent, slots=[expected_slot]) self.assertEqual(expected_result, result)
def test_synonyms_should_point_to_base_value(self): # Given dataset = { "intents": { "dummy_intent_1": { "utterances": [ { "data": [ { "text": "dummy_1", "entity": "dummy_entity_1", "slot_name": "dummy_slot_name" } ] } ] } }, "entities": { "dummy_entity_1": { "use_synonyms": True, "automatically_extensible": False, "data": [ { "value": "dummy1", "synonyms": [ "dummy1", "dummy1_bis" ] } ], "matching_strictness": 1.0 } }, "language": "en" } mocked_intent = intent_classification_result("dummy_intent_1", 1.0) mocked_slots = [ unresolved_slot(match_range=(0, 10), value="dummy1_bis", entity="dummy_entity_1", slot_name="dummy_slot_name")] # pylint:disable=unused-variable @IntentParser.register("my_intent_parser", True) class MyIntentParser(MockIntentParser): def parse(self, text, intents=None, top_n=None): return parsing_result(text, mocked_intent, mocked_slots) # pylint:enable=unused-variable input_ = "dummy1_bis" config = NLUEngineConfig(["my_intent_parser"]) engine = SnipsNLUEngine(config).fit(dataset) # When result = engine.parse(input_) # Then expected_slot = { RES_MATCH_RANGE: { "start": 0, "end": 10 }, RES_RAW_VALUE: "dummy1_bis", RES_VALUE: { "kind": "Custom", "value": "dummy1" }, RES_ENTITY: "dummy_entity_1", RES_SLOT_NAME: "dummy_slot_name" } expected_result = parsing_result( input_, mocked_intent, slots=[expected_slot]) self.assertEqual(expected_result, result)
def test_should_parse_top_intents(self): # Given text = "foo bar ban" dataset_stream = io.StringIO(""" --- type: intent name: intent1 utterances: - foo [slot1:entity1](bak) --- type: intent name: intent2 utterances: - '[slot2:entity2](foo) baz' --- type: intent name: intent3 utterances: - foo bap""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json # pylint:disable=unused-variable @IntentParser.register("first_intent_parser", True) class FirstIntentParser(MockIntentParser): def get_intents(self, text): return [ intent_classification_result("intent1", 0.5), intent_classification_result("intent2", 0.3), intent_classification_result(None, 0.15), intent_classification_result("intent3", 0.05) ] def get_slots(self, text, intent): if intent == "intent1": return [] if intent == "intent2": return [ unresolved_slot((0, 3), "foo", "entity2", "slot2") ] return [] @IntentParser.register("second_intent_parser", True) class SecondIntentParser(MockIntentParser): def get_intents(self, text): return [ intent_classification_result("intent2", 0.6), intent_classification_result("intent1", 0.2), intent_classification_result(None, 0.15), intent_classification_result("intent3", 0.05) ] def get_slots(self, text, intent): if intent == "intent1": return [ unresolved_slot((0, 3), "foo", "entity1", "slot1") ] if intent == "intent2": return [ unresolved_slot((8, 11), "ban", "entity2", "slot2") ] return [] # pylint:enable=unused-variable config = NLUEngineConfig( ["first_intent_parser", "second_intent_parser"]) nlu_engine = SnipsNLUEngine(config).fit(dataset) # When results = nlu_engine.parse(text, top_n=3) results_with_filter = nlu_engine.parse( text, intents=["intent1", "intent3"], top_n=3) # Then expected_results = [ extraction_result( intent_classification_result("intent2", 0.6), [custom_slot( unresolved_slot((0, 3), "foo", "entity2", "slot2"))] ), extraction_result( intent_classification_result("intent1", 0.5), [custom_slot( unresolved_slot((0, 3), "foo", "entity1", "slot1"))] ), extraction_result( intent_classification_result(None, 0.15), [] ), ] expected_results_with_filter = [ extraction_result( intent_classification_result("intent1", 0.5), [custom_slot( unresolved_slot((0, 3), "foo", "entity1", "slot1"))] ), extraction_result( intent_classification_result(None, 0.15), [] ), extraction_result( intent_classification_result("intent3", 0.05), [] ), ] self.assertListEqual(expected_results, results) self.assertListEqual(expected_results_with_filter, results_with_filter)
def test_should_handle_keyword_entities(self, mocked_regex_parse, mocked_crf_parse): # Given dataset = { "snips_nlu_version": "1.1.1", "intents": { "dummy_intent_1": { "utterances": [ { "data": [ { "text": "dummy_1", "entity": "dummy_entity_1", "slot_name": "dummy_slot_name" }, { "text": " dummy_2", "entity": "dummy_entity_2", "slot_name": "other_dummy_slot_name" } ] } ] } }, "entities": { "dummy_entity_1": { "use_synonyms": True, "automatically_extensible": False, "data": [ { "value": "dummy1", "synonyms": [ "dummy1", "dummy1_bis" ] }, { "value": "dummy2", "synonyms": [ "dummy2", "dummy2_bis" ] } ] }, "dummy_entity_2": { "use_synonyms": False, "automatically_extensible": True, "data": [ { "value": "dummy2", "synonyms": [ "dummy2" ] } ] } }, "language": "en" } text = "dummy_3 dummy_4" mocked_crf_intent = intent_classification_result("dummy_intent_1", 1.0) mocked_crf_slots = [unresolved_slot(match_range=(0, 7), value="dummy_3", entity="dummy_entity_1", slot_name="dummy_slot_name"), unresolved_slot(match_range=(8, 15), value="dummy_4", entity="dummy_entity_2", slot_name="other_dummy_slot_name")] mocked_regex_parse.return_value = empty_result(text) mocked_crf_parse.return_value = parsing_result( text, mocked_crf_intent, mocked_crf_slots) engine = SnipsNLUEngine() # When engine = engine.fit(dataset) result = engine.parse(text) # Then expected_slot = custom_slot(unresolved_slot( match_range=(8, 15), value="dummy_4", entity="dummy_entity_2", slot_name="other_dummy_slot_name")) expected_result = parsing_result(text, intent=mocked_crf_intent, slots=[expected_slot]) self.assertEqual(expected_result, result)