def test_should_fit_and_parse_empty_intent(self): # Given dataset = { "intents": { "dummy_intent": { "utterances": [ { "data": [ { "text": " " } ] } ] } }, "language": "en", "entities": dict() } slot_filler = CRFSlotFiller(**self.get_shared_data(dataset)) # When slot_filler.fit(dataset, "dummy_intent") slot_filler.get_slots("ya")
def test_should_not_get_slots_when_not_fitted(self): # Given slot_filler = CRFSlotFiller() # When / Then self.assertFalse(slot_filler.fitted) with self.assertRaises(NotTrained): slot_filler.get_slots("foobar")
def test_should_parse_naughty_strings(self): # Given dataset = validate_and_format_dataset(SAMPLE_DATASET) naughty_strings_path = TEST_PATH / "resources" / "naughty_strings.txt" with naughty_strings_path.open(encoding='utf8') as f: naughty_strings = [line.strip("\n") for line in f.readlines()] # When slot_filler = CRFSlotFiller().fit(dataset, "dummy_intent_1") # Then for s in naughty_strings: with self.fail_if_exception("Naughty string crashes"): slot_filler.get_slots(s)
def test_should_get_slots(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me [number_of_cups:snips/number](five) cups of tea - please I want [number_of_cups](two) cups of tea""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json shared = self.get_shared_data(dataset) shared[RANDOM_STATE] = 42 slot_filler = CRFSlotFiller(**shared) intent = "MakeTea" slot_filler.fit(dataset, intent) # When slots = slot_filler.get_slots("make me two cups of tea") # Then expected_slots = [ unresolved_slot(match_range={ START: 8, END: 11 }, value='two', entity='snips/number', slot_name='number_of_cups') ] self.assertListEqual(slots, expected_slots)
def test_should_not_use_crf_when_dataset_with_no_slots(self): # Given dataset = { "language": "en", "intents": { "intent1": { "utterances": [{ "data": [{ "text": "This is an utterance without " "slots" }] }] } }, "entities": {} } slot_filler = CRFSlotFiller(**self.get_shared_data(dataset)) mock_compute_features = MagicMock() slot_filler.compute_features = mock_compute_features # When slot_filler.fit(dataset, "intent1") slots = slot_filler.get_slots("This is an utterance without slots") # Then mock_compute_features.assert_not_called() self.assertListEqual([], slots)
def test_should_get_builtin_slots(self): # Given dataset = validate_and_format_dataset(WEATHER_DATASET) config = CRFSlotFillerConfig(random_seed=42) intent = "SearchWeatherForecast" slot_filler = CRFSlotFiller(config) slot_filler.fit(dataset, intent) # When slots = slot_filler.get_slots("Give me the weather at 9p.m. in Paris") # Then expected_slots = [ unresolved_slot(match_range={ START: 20, END: 28 }, value='at 9p.m.', entity='snips/datetime', slot_name='datetime'), unresolved_slot(match_range={ START: 32, END: 37 }, value='Paris', entity='weather_location', slot_name='location') ] self.assertListEqual(expected_slots, slots)
def test_should_get_sub_builtin_slots(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: PlanBreak utterances: - 'I want to leave from [start:snips/datetime](tomorrow) until [end:snips/datetime](next thursday)' - find me something from [start](9am) to [end](12pm) - I need a break from [start](2pm) until [end](4pm) - Can you suggest something from [start](april 4th) until [end](april 6th) ? - Book me a trip from [start](this friday) to [end](next tuesday)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json config = CRFSlotFillerConfig(random_seed=42) intent = "PlanBreak" slot_filler = CRFSlotFiller(config, **self.get_shared_data(dataset)) slot_filler.fit(dataset, intent) # When slots = slot_filler.get_slots("Find me a plan from 5pm to 6pm") # Then expected_slots = [ unresolved_slot(match_range={START: 20, END: 23}, value="5pm", entity="snips/datetime", slot_name="start"), unresolved_slot(match_range={START: 27, END: 30}, value="6pm", entity="snips/datetime", slot_name="end") ] self.assertListEqual(expected_slots, slots)
def test_should_get_builtin_slots(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: GetWeather utterances: - what is the weather [datetime:snips/datetime](at 9pm) - what's the weather in [location:weather_location](berlin) - What's the weather in [location](tokyo) [datetime](this weekend)? - Can you tell me the weather [datetime] please ? - what is the weather forecast [datetime] in [location](paris)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json config = CRFSlotFillerConfig(random_seed=42) intent = "GetWeather" slot_filler = CRFSlotFiller(config, **self.get_shared_data(dataset)) slot_filler.fit(dataset, intent) # When slots = slot_filler.get_slots("Give me the weather at 9pm in Paris") # Then expected_slots = [ unresolved_slot(match_range={START: 20, END: 26}, value='at 9pm', entity='snips/datetime', slot_name='datetime'), unresolved_slot(match_range={START: 30, END: 35}, value='Paris', entity='weather_location', slot_name='location') ] self.assertListEqual(expected_slots, slots)
def test_should_parse_naughty_strings(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: my_intent utterances: - this is [entity1](my first entity)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json naughty_strings_path = TEST_PATH / "resources" / "naughty_strings.txt" with naughty_strings_path.open(encoding='utf8') as f: naughty_strings = [line.strip("\n") for line in f.readlines()] # When shared = self.get_shared_data(dataset) slot_filler = CRFSlotFiller(**shared).fit(dataset, "my_intent") # Then for s in naughty_strings: with self.fail_if_exception("Naughty string crashes"): slot_filler.get_slots(s)
def test_should_fit_and_parse_with_non_ascii_tags(self): # Given inputs = ("string%s" % i for i in range(10)) utterances = [{ DATA: [{ TEXT: string, ENTITY: "non_ascìi_entïty", SLOT_NAME: "non_ascìi_slöt" }] } for string in inputs] # When naughty_dataset = { "intents": { "naughty_intent": { "utterances": utterances } }, "entities": { "non_ascìi_entïty": { "use_synonyms": False, "automatically_extensible": True, "data": [] } }, "language": "en", "snips_nlu_version": "0.0.1" } naughty_dataset = validate_and_format_dataset(naughty_dataset) # Then with self.fail_if_exception("Naughty string make NLU crash"): slot_filler = CRFSlotFiller() slot_filler.fit(naughty_dataset, "naughty_intent") slots = slot_filler.get_slots("string0") expected_slot = { "entity": "non_ascìi_entïty", "range": { "start": 0, "end": 7 }, "slotName": u"non_ascìi_slöt", "value": u"string0" } self.assertListEqual([expected_slot], slots)
def test_should_get_slots(self): # Given dataset = validate_and_format_dataset(BEVERAGE_DATASET) config = CRFSlotFillerConfig(random_seed=42) intent = "MakeTea" slot_filler = CRFSlotFiller(config) slot_filler.fit(dataset, intent) # When slots = slot_filler.get_slots("make me two cups of tea") # Then expected_slots = [ unresolved_slot(match_range={START: 8, END: 11}, value='two', entity='snips/number', slot_name='number_of_cups')] self.assertListEqual(slots, expected_slots)