def test_should_not_parse_when_not_fitted(self): # Given parser = LookupIntentParser() # When / Then self.assertFalse(parser.fitted) with self.assertRaises(NotTrained): parser.parse("foobar")
def test_should_parse_intent(self): # Given dataset_stream = io.StringIO( """ --- type: intent name: intent1 utterances: - foo bar baz --- type: intent name: intent2 utterances: - foo bar ban""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json parser = LookupIntentParser().fit(dataset) text = "foo bar ban" # When parsing = parser.parse(text) # Then probability = 1.0 expected_intent = intent_classification_result( intent_name="intent2", probability=probability) self.assertEqual(expected_intent, parsing[RES_INTENT])
def test_should_ignore_very_ambiguous_utterances(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: intent_1 utterances: - "[event_type](meeting) tomorrow" --- type: intent name: intent_2 utterances: - call [time:snips/datetime](today) --- type: entity name: event_type values: - call - diner""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json parser = LookupIntentParser().fit(dataset) text = "call tomorrow" # When res = parser.parse(text) # Then self.assertEqual(empty_result(text, 1.0), res)
def test_should_parse_slightly_ambiguous_utterances(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: intent_1 utterances: - call tomorrow --- type: intent name: intent_2 utterances: - call [time:snips/datetime](today)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json parser = LookupIntentParser().fit(dataset) text = "call tomorrow" # When res = parser.parse(text) # Then expected_intent = intent_classification_result( intent_name="intent_1", probability=2. / 3.) expected_result = parsing_result(text, expected_intent, []) self.assertEqual(expected_result, res)
def test_should_parse_intent_with_ambivalent_words(self): # Given slots_dataset_stream = io.StringIO(""" --- type: intent name: give_flower utterances: - give a rose to [name](emily) - give a daisy to [name](tom) - give a tulip to [name](daisy) """) dataset = Dataset.from_yaml_files("en", [slots_dataset_stream]).json parser = LookupIntentParser().fit(dataset) text = "give a daisy to emily" # When parsing = parser.parse(text) # Then expected_intent = intent_classification_result( intent_name="give_flower", probability=1.0) expected_slots = [ { "entity": "name", "range": {"end": 21, "start": 16}, "slotName": "name", "value": "emily" } ] self.assertDictEqual(expected_intent, parsing[RES_INTENT]) self.assertListEqual(expected_slots, parsing[RES_SLOTS])
def test_should_parse_naughty_strings(self): # Given dataset_stream = io.StringIO( """ --- type: intent name: my_intent utterances: - this is [slot1:entity1](my first entity) - this is [slot2:entity2](second_entity)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json naughty_strings_path = TEST_PATH / "resources" / "naughty_strings.txt" with naughty_strings_path.open(encoding="utf8") as f: naughty_strings = [line.strip("\n") for line in f.readlines()] # When parser = LookupIntentParser().fit(dataset) # Then for s in naughty_strings: with self.fail_if_exception("Exception raised"): parser.parse(s)
def test_should_parse_stop_words_slots(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: search utterances: - search - search [search_object](this) - search [search_object](a cat) --- type: entity name: search_object values: - [this thing, that] """) resources = deepcopy(self.get_resources("en")) resources[STOP_WORDS] = {"a", "this", "that"} dataset = Dataset.from_yaml_files("en", [dataset_stream]).json parser_config = LookupIntentParserConfig(ignore_stop_words=True) parser = LookupIntentParser(config=parser_config, resources=resources) parser.fit(dataset) # When res_1 = parser.parse("search this") res_2 = parser.parse("search that") # Then expected_intent = intent_classification_result( intent_name="search", probability=1.0) expected_slots_1 = [ unresolved_slot(match_range=(7, 11), value="this", entity="search_object", slot_name="search_object") ] expected_slots_2 = [ unresolved_slot(match_range=(7, 11), value="that", entity="search_object", slot_name="search_object") ] self.assertEqual(expected_intent, res_1[RES_INTENT]) self.assertEqual(expected_intent, res_2[RES_INTENT]) self.assertListEqual(expected_slots_1, res_1[RES_SLOTS]) self.assertListEqual(expected_slots_2, res_2[RES_SLOTS])
def test_should_parse_intent_with_stop_words(self, mock_get_stop_words): # Given mock_get_stop_words.return_value = {"a", "hey"} dataset = self.slots_dataset config = LookupIntentParserConfig(ignore_stop_words=True) parser = LookupIntentParser(config).fit(dataset) text = "Hey this is dummy_a query with another dummy_c at 10p.m. " \ "or at 12p.m." # When parsing = parser.parse(text) # Then probability = 1.0 expected_intent = intent_classification_result( intent_name="dummy_intent_1", probability=probability) self.assertEqual(expected_intent, parsing[RES_INTENT])
def test_should_fit_and_parse_with_non_ascii_tags(self): # Given inputs = ["string%s" % i for i in range(10)] utterances = [ { DATA: [ { TEXT: string, ENTITY: "non_ascìi_entïty", SLOT_NAME: "non_ascìi_slöt", } ] } for string in inputs ] # When naughty_dataset = { "intents": {"naughty_intent": {"utterances": utterances}}, "entities": { "non_ascìi_entïty": { "use_synonyms": False, "automatically_extensible": True, "matching_strictness": 1.0, "data": [], } }, "language": "en", } # Then with self.fail_if_exception("Exception raised"): parser = LookupIntentParser().fit(naughty_dataset) parsing = parser.parse("string0") expected_slot = { "entity": "non_ascìi_entïty", "range": {"start": 0, "end": 7}, "slotName": "non_ascìi_slöt", "value": "string0", } intent_name = parsing[RES_INTENT][RES_INTENT_NAME] self.assertEqual("naughty_intent", intent_name) self.assertListEqual([expected_slot], parsing[RES_SLOTS])
def test_should_parse_intent_with_duplicated_slot_names(self): # Given slots_dataset_stream = io.StringIO(""" --- type: intent name: math_operation slots: - name: number entity: snips/number utterances: - what is [number](one) plus [number](one)""") dataset = Dataset.from_yaml_files("en", [slots_dataset_stream]).json parser = LookupIntentParser().fit(dataset) text = "what is one plus one" # When parsing = parser.parse(text) # Then probability = 1.0 expected_intent = intent_classification_result( intent_name="math_operation", probability=probability) expected_slots = [ { "entity": "snips/number", "range": {"end": 11, "start": 8}, "slotName": "number", "value": "one" }, { "entity": "snips/number", "range": {"end": 20, "start": 17}, "slotName": "number", "value": "one" } ] self.assertDictEqual(expected_intent, parsing[RES_INTENT]) self.assertListEqual(expected_slots, parsing[RES_SLOTS])
def test_should_parse_intent_with_filter(self): # Given dataset_stream = io.StringIO( """ --- type: intent name: intent1 utterances: - foo bar baz --- type: intent name: intent2 utterances: - foo bar ban""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json parser = LookupIntentParser().fit(dataset) text = "foo bar ban" # When parsing = parser.parse(text, intents=["intent1"]) # Then self.assertEqual(empty_result(text, 1.0), parsing)
def test_should_ignore_completely_ambiguous_utterances(self): # Given dataset_stream = io.StringIO( """ --- type: intent name: dummy_intent_1 utterances: - Hello world --- type: intent name: dummy_intent_2 utterances: - Hello world""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json parser = LookupIntentParser().fit(dataset) text = "Hello world" # When res = parser.parse(text) # Then self.assertEqual(empty_result(text, 1.0), res)
def test_should_parse_slots(self): # Given dataset = self.slots_dataset parser = LookupIntentParser().fit(dataset) texts = [ ( "this is a dummy a query with another dummy_c at 10p.m. or at" " 12p.m.", [ unresolved_slot( match_range=(10, 17), value="dummy a", entity="dummy_entity_1", slot_name="dummy_slot_name", ), unresolved_slot( match_range=(37, 44), value="dummy_c", entity="dummy_entity_2", slot_name="dummy_slot_name2", ), unresolved_slot( match_range=(45, 54), value="at 10p.m.", entity="snips/datetime", slot_name="startTime", ), unresolved_slot( match_range=(58, 67), value="at 12p.m.", entity="snips/datetime", slot_name="startTime", ), ], ), ( "this, is,, a, dummy a query with another dummy_c at 10pm or " "at 12p.m.", [ unresolved_slot( match_range=(14, 21), value="dummy a", entity="dummy_entity_1", slot_name="dummy_slot_name", ), unresolved_slot( match_range=(41, 48), value="dummy_c", entity="dummy_entity_2", slot_name="dummy_slot_name2", ), unresolved_slot( match_range=(49, 56), value="at 10pm", entity="snips/datetime", slot_name="startTime", ), unresolved_slot( match_range=(60, 69), value="at 12p.m.", entity="snips/datetime", slot_name="startTime", ), ], ), ( "this is a dummy b", [ unresolved_slot( match_range=(10, 17), value="dummy b", entity="dummy_entity_1", slot_name="dummy_slot_name", ) ], ), ( " this is a dummy b ", [ unresolved_slot( match_range=(11, 18), value="dummy b", entity="dummy_entity_1", slot_name="dummy_slot_name", ) ], ), ( " at 8am ’ there is a dummy a", [ unresolved_slot( match_range=(1, 7), value="at 8am", entity="snips/datetime", slot_name="startTime", ), unresolved_slot( match_range=(21, 29), value="dummy a", entity="dummy_entity_1", slot_name="dummy_slot_name", ), ], ), ] for text, expected_slots in texts: # When parsing = parser.parse(text) # Then self.assertListEqual(expected_slots, parsing[RES_SLOTS])
def test_should_parse_top_intents(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: intent1 utterances: - meeting [time:snips/datetime](today) --- type: intent name: intent2 utterances: - meeting tomorrow --- type: intent name: intent3 utterances: - "[event_type](call) [time:snips/datetime](at 9pm)" --- type: entity name: event_type values: - meeting - feedback session""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json parser = LookupIntentParser().fit(dataset) text = "meeting tomorrow" # When results = parser.parse(text, top_n=3) # Then time_slot = { "entity": "snips/datetime", "range": {"end": 16, "start": 8}, "slotName": "time", "value": "tomorrow" } event_slot = { "entity": "event_type", "range": {"end": 7, "start": 0}, "slotName": "event_type", "value": "meeting" } weight_intent_1 = 1. / 2. weight_intent_2 = 1. weight_intent_3 = 1. / 3. total_weight = weight_intent_1 + weight_intent_2 + weight_intent_3 proba_intent2 = weight_intent_2 / total_weight proba_intent1 = weight_intent_1 / total_weight proba_intent3 = weight_intent_3 / total_weight expected_results = [ extraction_result( intent_classification_result( intent_name="intent2", probability=proba_intent2), slots=[]), extraction_result( intent_classification_result( intent_name="intent1", probability=proba_intent1), slots=[time_slot]), extraction_result( intent_classification_result( intent_name="intent3", probability=proba_intent3), slots=[event_slot, time_slot]) ] self.assertEqual(expected_results, results)