def test_training_should_be_reproducible(self): # Given random_state = 42 dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me a [beverage_temperature:Temperature](hot) cup of tea - make me [number_of_cups:snips/number](five) tea cups --- type: intent name: MakeCoffee utterances: - make me [number_of_cups:snips/number](one) cup of coffee please - brew [number_of_cups] cups of coffee""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json # When parser1 = DeterministicIntentParser(random_state=random_state) parser1.fit(dataset) parser2 = DeterministicIntentParser(random_state=random_state) parser2.fit(dataset) # Then with temp_dir() as tmp_dir: dir_parser1 = tmp_dir / "parser1" dir_parser2 = tmp_dir / "parser2" parser1.persist(dir_parser1) parser2.persist(dir_parser2) hash1 = dirhash(str(dir_parser1), 'sha256') hash2 = dirhash(str(dir_parser2), 'sha256') self.assertEqual(hash1, hash2)
def test_should_parse_stop_words_slots(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: search utterances: - search - search [search_object](this) - search [search_object](a cat) --- type: entity name: search_object values: - [this thing, that] """) resources = self.get_resources("en") resources[STOP_WORDS] = {"a", "this", "that"} dataset = Dataset.from_yaml_files("en", [dataset_stream]).json parser_config = DeterministicIntentParserConfig(ignore_stop_words=True) parser = DeterministicIntentParser(config=parser_config, resources=resources) parser.fit(dataset) # When res_1 = parser.parse("search this") res_2 = parser.parse("search that") # Then expected_intent = intent_classification_result(intent_name="search", probability=1.0) expected_slots_1 = [ unresolved_slot(match_range=(7, 11), value="this", entity="search_object", slot_name="search_object") ] expected_slots_2 = [ unresolved_slot(match_range=(7, 11), value="that", entity="search_object", slot_name="search_object") ] self.assertEqual(expected_intent, res_1[RES_INTENT]) self.assertEqual(expected_intent, res_2[RES_INTENT]) self.assertListEqual(expected_slots_1, res_1[RES_SLOTS]) self.assertListEqual(expected_slots_2, res_2[RES_SLOTS])
def test_should_fit_and_parse_with_non_ascii_tags(self): # Given inputs = ("string%s" % i for i in range(10)) utterances = [{ DATA: [{ TEXT: string, ENTITY: "non_ascìi_entïty", SLOT_NAME: "non_ascìi_slöt" }] } for string in inputs] # When naughty_dataset = { "intents": { "naughty_intent": { "utterances": utterances } }, "entities": { "non_ascìi_entïty": { "use_synonyms": False, "automatically_extensible": True, "matching_strictness": 1.0, "data": [] } }, "language": "en", } naughty_dataset = validate_and_format_dataset(naughty_dataset) # Then with self.fail_if_exception("Exception raised"): parser = DeterministicIntentParser() parser.fit(naughty_dataset) parsing = parser.parse("string0") expected_slot = { 'entity': 'non_ascìi_entïty', 'range': { "start": 0, "end": 7 }, 'slotName': u'non_ascìi_slöt', 'value': u'string0' } intent_name = parsing[RES_INTENT][RES_INTENT_NAME] self.assertEqual("naughty_intent", intent_name) self.assertListEqual([expected_slot], parsing[RES_SLOTS])