Beispiel #1
0
    def test_should_get_builtin_slots(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: GetWeather
utterances:
- what is the weather [datetime:snips/datetime](at 9pm)
- what's the weather in [location:weather_location](berlin)
- What's the weather in [location](tokyo) [datetime](this weekend)?
- Can you tell me the weather [datetime] please ?
- what is the weather forecast [datetime] in [location](paris)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        config = CRFSlotFillerConfig(random_seed=42)
        intent = "GetWeather"
        slot_filler = CRFSlotFiller(config, **self.get_shared_data(dataset))
        slot_filler.fit(dataset, intent)

        # When
        slots = slot_filler.get_slots("Give me the weather at 9pm in Paris")

        # Then
        expected_slots = [
            unresolved_slot(match_range={START: 20, END: 26},
                            value='at 9pm',
                            entity='snips/datetime',
                            slot_name='datetime'),
            unresolved_slot(match_range={START: 30, END: 35},
                            value='Paris',
                            entity='weather_location',
                            slot_name='location')
        ]
        self.assertListEqual(expected_slots, slots)
    def test_should_get_builtin_slots(self):
        # Given
        dataset = validate_and_format_dataset(WEATHER_DATASET)
        config = CRFSlotFillerConfig(random_seed=42)
        intent = "SearchWeatherForecast"
        slot_filler = CRFSlotFiller(config)
        slot_filler.fit(dataset, intent)

        # When
        slots = slot_filler.get_slots("Give me the weather at 9p.m. in Paris")

        # Then
        expected_slots = [
            unresolved_slot(match_range={
                START: 20,
                END: 28
            },
                            value='at 9p.m.',
                            entity='snips/datetime',
                            slot_name='datetime'),
            unresolved_slot(match_range={
                START: 32,
                END: 37
            },
                            value='Paris',
                            entity='weather_location',
                            slot_name='location')
        ]
        self.assertListEqual(expected_slots, slots)
Beispiel #3
0
    def test_should_get_sub_builtin_slots(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: PlanBreak
utterances:
- 'I want to leave from [start:snips/datetime](tomorrow) until 
  [end:snips/datetime](next thursday)'
- find me something from [start](9am) to [end](12pm)
- I need a break from [start](2pm) until [end](4pm)
- Can you suggest something from [start](april 4th) until [end](april 6th) ?
- Book me a trip from [start](this friday) to [end](next tuesday)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        config = CRFSlotFillerConfig(random_seed=42)
        intent = "PlanBreak"
        slot_filler = CRFSlotFiller(config,
                                    **self.get_shared_data(dataset))
        slot_filler.fit(dataset, intent)

        # When
        slots = slot_filler.get_slots("Find me a plan from 5pm to 6pm")

        # Then
        expected_slots = [
            unresolved_slot(match_range={START: 20, END: 23},
                            value="5pm",
                            entity="snips/datetime",
                            slot_name="start"),
            unresolved_slot(match_range={START: 27, END: 30},
                            value="6pm",
                            entity="snips/datetime",
                            slot_name="end")
        ]
        self.assertListEqual(expected_slots, slots)
    def test_should_get_slots_with_keywords_slot_filler(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: SetLightColor
utterances:
- set the light to [color](blue) in the [room](kitchen)
- please make the lights [color](red) in the [room](bathroom)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        intent = "SetLightColor"
        slot_filler = KeywordSlotFiller().fit(dataset, intent)

        # When
        slots = slot_filler.get_slots("I want red lights in the kitchen now")

        # Then
        expected_slots = [
            unresolved_slot(match_range={
                START: 7,
                END: 10
            },
                            value="red",
                            entity="color",
                            slot_name="color"),
            unresolved_slot(match_range={
                START: 25,
                END: 32
            },
                            value="kitchen",
                            entity="room",
                            slot_name="room")
        ]
        self.assertListEqual(slots, expected_slots)
Beispiel #5
0
 def get_slots(self, text, intent):
     if intent == "intent1":
         return [unresolved_slot((0, 3), "foo", "entity1", "slot1")]
     if intent == "intent2":
         return [
             unresolved_slot((8, 11), "ban", "entity2", "slot2")
         ]
     return []
    def test_should_parse_stop_words_slots(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: search
utterances:
  - search
  - search [search_object](this)
  - search [search_object](a cat)
  
---
type: entity
name: search_object
values:
  - [this thing, that]
  """)

        resources = self.get_resources("en")
        resources[STOP_WORDS] = {"a", "this", "that"}
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser_config = DeterministicIntentParserConfig(ignore_stop_words=True)
        parser = DeterministicIntentParser(config=parser_config,
                                           resources=resources)
        parser.fit(dataset)

        # When
        res_1 = parser.parse("search this")
        res_2 = parser.parse("search that")

        # Then
        expected_intent = intent_classification_result(intent_name="search",
                                                       probability=1.0)
        expected_slots_1 = [
            unresolved_slot(match_range=(7, 11),
                            value="this",
                            entity="search_object",
                            slot_name="search_object")
        ]
        expected_slots_2 = [
            unresolved_slot(match_range=(7, 11),
                            value="that",
                            entity="search_object",
                            slot_name="search_object")
        ]
        self.assertEqual(expected_intent, res_1[RES_INTENT])
        self.assertEqual(expected_intent, res_2[RES_INTENT])
        self.assertListEqual(expected_slots_1, res_1[RES_SLOTS])
        self.assertListEqual(expected_slots_2, res_2[RES_SLOTS])
Beispiel #7
0
    def test_should_parse_after_deserialization(self):
        # Given
        dataset = BEVERAGE_DATASET
        engine = SnipsNLUEngine().fit(dataset)
        input_ = "Give me 3 cups of hot tea please"

        # When
        engine_dict = engine.to_dict()
        deserialized_engine = SnipsNLUEngine.from_dict(engine_dict)
        result = deserialized_engine.parse(input_)

        # Then
        msg = "SnipsNLUEngine dict should be json serializable to utf-8"
        with self.fail_if_exception(msg):
            json.dumps(engine_dict).encode("utf-8")
        expected_slots = [
            resolved_slot({START: 8, END: 9}, '3',
                          {'kind': 'Number', 'value': 3.0},
                          'snips/number', 'number_of_cups'),
            custom_slot(
                unresolved_slot({START: 18, END: 21}, 'hot', 'Temperature',
                                'beverage_temperature'))
        ]
        self.assertEqual(result[RES_INPUT], input_)
        self.assertEqual(result[RES_INTENT][RES_INTENT_NAME], 'MakeTea')
        self.assertListEqual(result[RES_SLOTS], expected_slots)
Beispiel #8
0
    def test_should_parse_after_deserialization(self):
        # Given
        dataset = BEVERAGE_DATASET
        engine = SnipsNLUEngine().fit(dataset)
        input_ = "Give me 3 cups of hot tea please"

        # When
        engine_dict = engine.to_dict()
        deserialized_engine = SnipsNLUEngine.from_dict(engine_dict)
        result = deserialized_engine.parse(input_)

        # Then
        msg = "SnipsNLUEngine dict should be json serializable to utf-8"
        with self.fail_if_exception(msg):
            json.dumps(engine_dict).encode("utf-8")
        expected_slots = [
            resolved_slot({
                START: 8,
                END: 9
            }, '3', {
                'kind': 'Number',
                'value': 3.0
            }, 'snips/number', 'number_of_cups'),
            custom_slot(
                unresolved_slot({
                    START: 18,
                    END: 21
                }, 'hot', 'Temperature', 'beverage_temperature'))
        ]
        self.assertEqual(result[RES_INPUT], input_)
        self.assertEqual(result[RES_INTENT][RES_INTENT_NAME], 'MakeTea')
        self.assertListEqual(result[RES_SLOTS], expected_slots)
Beispiel #9
0
    def test_should_serialize_results(self):
        # Given
        input_ = "hello world"
        intent = intent_classification_result("world", 0.5)
        slots = [unresolved_slot([3, 5],
                                 "slot_value",
                                 "slot_entity",
                                 "slot_name")]

        # When
        result = parsing_result(input=input_, intent=intent, slots=slots)

        # Then
        msg = "Result dict should be json serializable"
        with self.fail_if_exception(msg):
            json.dumps(result)

        expected_result = {
            RES_INTENT: {RES_INTENT_NAME: 'world', RES_PROBABILITY: 0.5},
            RES_SLOTS: [{RES_MATCH_RANGE: {"start": 3, "end": 5},
                         RES_ENTITY: 'slot_entity',
                         RES_SLOT_NAME: 'slot_name',
                         RES_VALUE: 'slot_value'}],
            RES_INPUT: input_}
        self.assertDictEqual(expected_result, result)
 def _get_matching_result(self, text, processed_text, regex, intent,
                          builtin_entities_ranges_mapping=None):
     found_result = regex.match(processed_text)
     if found_result is None:
         return None
     parsed_intent = intent_classification_result(intent_name=intent,
                                                  probability=1.0)
     slots = []
     for group_name in found_result.groupdict():
         slot_name = self.group_names_to_slot_names[group_name]
         entity = self.slot_names_to_entities[intent][slot_name]
         rng = (found_result.start(group_name),
                found_result.end(group_name))
         if builtin_entities_ranges_mapping is not None:
             if rng in builtin_entities_ranges_mapping:
                 rng = builtin_entities_ranges_mapping[rng]
             else:
                 shift = _get_range_shift(
                     rng, builtin_entities_ranges_mapping)
                 rng = {START: rng[0] + shift, END: rng[1] + shift}
         else:
             rng = {START: rng[0], END: rng[1]}
         value = text[rng[START]:rng[END]]
         parsed_slot = unresolved_slot(
             match_range=rng, value=value, entity=entity,
             slot_name=slot_name)
         slots.append(parsed_slot)
     parsed_slots = _deduplicate_overlapping_slots(
         slots, self.language)
     parsed_slots = sorted(parsed_slots,
                           key=lambda s: s[RES_MATCH_RANGE][START])
     return parsing_result(text, parsed_intent, parsed_slots)
    def test_should_get_slots(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me [number_of_cups:snips/number](five) cups of tea
- please I want [number_of_cups](two) cups of tea""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        shared = self.get_shared_data(dataset)
        shared[RANDOM_STATE] = 42
        slot_filler = CRFSlotFiller(**shared)
        intent = "MakeTea"
        slot_filler.fit(dataset, intent)

        # When
        slots = slot_filler.get_slots("make me two cups of tea")

        # Then
        expected_slots = [
            unresolved_slot(match_range={
                START: 8,
                END: 11
            },
                            value='two',
                            entity='snips/number',
                            slot_name='number_of_cups')
        ]
        self.assertListEqual(slots, expected_slots)
    def test_should_parse_with_filter(self):
        dataset_stream = io.StringIO("""
---
type: intent
name: intent1
utterances:
  - "[slot1:entity1](foo) bar"

---
type: intent
name: intent2
utterances:
  - foo bar [slot2:entity2](baz)

---
type: intent
name: intent3
utterances:
  - foz for [slot3:entity3](baz)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        shared = self.get_shared_data(dataset)
        shared[RANDOM_STATE] = 42
        parser = ProbabilisticIntentParser(**shared)
        parser.fit(dataset)
        text = "foo bar baz"

        # When
        result = parser.parse(text, intents=["intent1", "intent3"])

        # Then
        expected_slots = [unresolved_slot((0, 3), "foo", "entity1", "slot1")]

        self.assertEqual("intent1", result[RES_INTENT][RES_INTENT_NAME])
        self.assertEqual(expected_slots, result[RES_SLOTS])
Beispiel #13
0
    def test_should_parse_after_deserialization_from_dir(self):
        # Given
        dataset = BEVERAGE_DATASET
        engine = SnipsNLUEngine().fit(dataset)
        input_ = "Give me 3 cups of hot tea please"

        # When
        engine.persist(self.tmp_file_path)
        deserialized_engine = SnipsNLUEngine.from_path(self.tmp_file_path)
        result = deserialized_engine.parse(input_)

        # Then
        expected_slots = [
            resolved_slot({
                START: 8,
                END: 9
            }, "3", {
                "kind": "Number",
                "value": 3.0
            }, "snips/number", "number_of_cups"),
            custom_slot(
                unresolved_slot({
                    START: 18,
                    END: 21
                }, "hot", "Temperature", "beverage_temperature"))
        ]
        self.assertEqual(result[RES_INPUT], input_)
        self.assertEqual(result[RES_INTENT][RES_INTENT_NAME], "MakeTea")
        self.assertListEqual(result[RES_SLOTS], expected_slots)
    def test_should_be_serializable_into_bytearray(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me [number_of_cups:snips/number](one) cup of tea
- i want [number_of_cups] cups of tea please
- can you prepare [number_of_cups] cups of tea ?""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        shared = self.get_shared_data(dataset)
        slot_filler = CRFSlotFiller(**shared).fit(dataset, "MakeTea")

        # When
        slot_filler_bytes = slot_filler.to_byte_array()
        loaded_slot_filler = CRFSlotFiller.from_byte_array(
            slot_filler_bytes, **shared)
        slots = loaded_slot_filler.get_slots("make me two cups of tea")

        # Then
        expected_slots = [
            unresolved_slot(match_range={
                START: 8,
                END: 11
            },
                            value='two',
                            entity='snips/number',
                            slot_name='number_of_cups')
        ]
        self.assertListEqual(expected_slots, slots)
    def test_should_get_slots_after_deserialization(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me [number_of_cups:snips/number](one) cup of tea
- i want [number_of_cups] cups of tea please
- can you prepare [number_of_cups] cups of tea ?""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        intent = "MakeTea"
        shared = self.get_shared_data(dataset)
        shared[RANDOM_STATE] = 42
        slot_filler = CRFSlotFiller(**shared)
        slot_filler.fit(dataset, intent)
        slot_filler.persist(self.tmp_file_path)

        deserialized_slot_filler = CRFSlotFiller.from_path(
            self.tmp_file_path, **shared)

        # When
        slots = deserialized_slot_filler.get_slots("make me two cups of tea")

        # Then
        expected_slots = [
            unresolved_slot(match_range={
                START: 8,
                END: 11
            },
                            value='two',
                            entity='snips/number',
                            slot_name='number_of_cups')
        ]
        self.assertListEqual(expected_slots, slots)
    def test_should_serialize_results(self):
        # Given
        input_ = "hello world"
        intent = intent_classification_result("world", 0.5)
        slots = [
            unresolved_slot([3, 5], "slot_value", "slot_entity", "slot_name")
        ]

        # When
        result = parsing_result(input=input_, intent=intent, slots=slots)

        # Then
        msg = "Result dict should be json serializable"
        with self.fail_if_exception(msg):
            json.dumps(result)

        expected_result = {
            RES_INTENT: {
                RES_INTENT_NAME: 'world',
                RES_PROBA: 0.5
            },
            RES_SLOTS: [{
                RES_MATCH_RANGE: {
                    "start": 3,
                    "end": 5
                },
                RES_ENTITY: 'slot_entity',
                RES_SLOT_NAME: 'slot_name',
                RES_VALUE: 'slot_value'
            }],
            RES_INPUT:
            input_
        }
        self.assertDictEqual(expected_result, result)
Beispiel #17
0
    def test_should_be_serializable_into_bytearray(self):
        # Given
        dataset = BEVERAGE_DATASET
        slot_filler = CRFSlotFiller().fit(dataset, "MakeTea")
        builtin_intent_parser = slot_filler.builtin_entity_parser
        custom_entity_parser = slot_filler.custom_entity_parser

        # When
        slot_filler_bytes = slot_filler.to_byte_array()
        loaded_slot_filler = CRFSlotFiller.from_byte_array(
            slot_filler_bytes,
            builtin_entity_parser=builtin_intent_parser,
            custom_entity_parser=custom_entity_parser)
        slots = loaded_slot_filler.get_slots("make me two cups of tea")

        # Then
        expected_slots = [
            unresolved_slot(match_range={
                START: 8,
                END: 11
            },
                            value='two',
                            entity='snips/number',
                            slot_name='number_of_cups')
        ]
        self.assertListEqual(expected_slots, slots)
Beispiel #18
0
    def test_should_get_slots_after_deserialization(self):
        # Given
        dataset = BEVERAGE_DATASET
        config = CRFSlotFillerConfig(random_seed=42)
        intent = "MakeTea"
        slot_filler = CRFSlotFiller(config)
        slot_filler.fit(dataset, intent)
        slot_filler.persist(self.tmp_file_path)

        custom_entity_parser = slot_filler.custom_entity_parser
        builtin_entity_parser = slot_filler.builtin_entity_parser

        deserialized_slot_filler = CRFSlotFiller.from_path(
            self.tmp_file_path,
            custom_entity_parser=custom_entity_parser,
            builtin_entity_parser=builtin_entity_parser)

        # When
        slots = deserialized_slot_filler.get_slots("make me two cups of tea")

        # Then
        expected_slots = [
            unresolved_slot(match_range={
                START: 8,
                END: 11
            },
                            value='two',
                            entity='snips/number',
                            slot_name='number_of_cups')
        ]
        self.assertListEqual(expected_slots, slots)
Beispiel #19
0
 def mock_proba_parse(text, intents):
     slots = [
         unresolved_slot(match_range=(0, len(text)),
                         value=text,
                         entity="entity1",
                         slot_name="slot1")
     ]
     return parsing_result(text, mocked_proba_parser_intent, slots)
    def test_engine_with_keyword_slot_filler_should_be_serializable(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: SetLightColor
utterances:
- set the light to [color](blue) in the [room](kitchen)
- please make the lights [color](red) in the [room](bathroom)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        intent = "SetLightColor"
        slot_filler_config = {
            "unit_name": "keyword_slot_filler",
            "lowercase": True
        }
        parser_config = ProbabilisticIntentParserConfig(
            slot_filler_config=slot_filler_config)
        engine_config = NLUEngineConfig([parser_config])
        engine = SnipsNLUEngine(engine_config).fit(dataset, intent)
        engine.persist(self.tmp_file_path)
        text = "I want Red lights in the kitchen now"

        # When
        loaded_engine = SnipsNLUEngine.from_path(self.tmp_file_path)
        res = loaded_engine.parse(text)

        # Then
        expected_slots = [
            custom_slot(
                unresolved_slot(match_range={
                    START: 7,
                    END: 10
                },
                                value="Red",
                                entity="color",
                                slot_name="color"), "red"),
            custom_slot(
                unresolved_slot(match_range={
                    START: 25,
                    END: 32
                },
                                value="kitchen",
                                entity="room",
                                slot_name="room"))
        ]
        self.assertListEqual(expected_slots, res["slots"])
Beispiel #21
0
 def parse(self, text, intents=None, top_n=None):
     slots = [
         unresolved_slot(match_range=(0, len(text)),
                         value=text,
                         entity="entity1",
                         slot_name="slot1")
     ]
     return parsing_result(text, mocked_intent, slots)
Beispiel #22
0
def tags_to_slots(text, tokens, tags, tagging_scheme, intent_slots_mapping):
    slots = tags_to_preslots(tokens, tags, tagging_scheme)
    return [
        unresolved_slot(match_range=slot[RANGE],
                        value=text[slot[RANGE][START]:slot[RANGE][END]],
                        entity=intent_slots_mapping[slot[SLOT_NAME]],
                        slot_name=slot[SLOT_NAME])
        for slot in slots
    ]
    def parse(self, text, intents=None):
        """Performs intent parsing on the provided *text*

        Intent and slots are extracted simultaneously through pattern matching

        Args:
            text (str): Input
            intents (str or list of str): If provided, reduces the scope of
            intent parsing to the provided list of intents

        Returns:
            dict: The matched intent, if any, along with the extracted slots.
            See :func:`.parsing_result` for the output format.

        Raises:
            NotTrained: When the intent parser is not fitted
        """
        if not self.fitted:
            raise NotTrained("DeterministicIntentParser must be fitted")

        if isinstance(intents, str):
            intents = [intents]

        ranges_mapping, processed_text = _replace_builtin_entities(
            text, self.language)

        for intent, regexes in iteritems(self.regexes_per_intent):
            if intents is not None and intent not in intents:
                continue
            for regex in regexes:
                match = regex.match(processed_text)
                if match is None:
                    continue
                parsed_intent = intent_classification_result(
                    intent_name=intent, probability=1.0)
                slots = []
                for group_name in match.groupdict():
                    slot_name = self.group_names_to_slot_names[group_name]
                    entity = self.slot_names_to_entities[slot_name]
                    rng = (match.start(group_name), match.end(group_name))
                    value = match.group(group_name)
                    if rng in ranges_mapping:
                        rng = ranges_mapping[rng]
                        value = text[rng[START]:rng[END]]
                    else:
                        rng = {START: rng[0], END: rng[1]}
                    parsed_slot = unresolved_slot(match_range=rng,
                                                  value=value,
                                                  entity=entity,
                                                  slot_name=slot_name)
                    slots.append(parsed_slot)
                parsed_slots = _deduplicate_overlapping_slots(
                    slots, self.language)
                parsed_slots = sorted(parsed_slots,
                                      key=lambda s: s[RES_MATCH_RANGE][START])
                return parsing_result(text, parsed_intent, parsed_slots)
        return empty_result(text)
    def parse(self, text, intents=None):
        """Performs intent parsing on the provided *text*

        Intent and slots are extracted simultaneously through pattern matching

        Args:
            text (str): Input
            intents (str or list of str): If provided, reduces the scope of
            intent parsing to the provided list of intents

        Returns:
            dict: The matched intent, if any, along with the extracted slots.
            See :func:`.parsing_result` for the output format.

        Raises:
            NotTrained: When the intent parser is not fitted
        """
        if not self.fitted:
            raise NotTrained("DeterministicIntentParser must be fitted")

        if isinstance(intents, str):
            intents = [intents]

        ranges_mapping, processed_text = _replace_builtin_entities(
            text, self.language)

        for intent, regexes in iteritems(self.regexes_per_intent):
            if intents is not None and intent not in intents:
                continue
            for regex in regexes:
                match = regex.match(processed_text)
                if match is None:
                    continue
                parsed_intent = intent_classification_result(
                    intent_name=intent, probability=1.0)
                slots = []
                for group_name in match.groupdict():
                    slot_name = self.group_names_to_slot_names[group_name]
                    entity = self.slot_names_to_entities[slot_name]
                    rng = (match.start(group_name), match.end(group_name))
                    value = match.group(group_name)
                    if rng in ranges_mapping:
                        rng = ranges_mapping[rng]
                        value = text[rng[START]:rng[END]]
                    else:
                        rng = {START: rng[0], END: rng[1]}
                    parsed_slot = unresolved_slot(
                        match_range=rng, value=value, entity=entity,
                        slot_name=slot_name)
                    slots.append(parsed_slot)
                parsed_slots = _deduplicate_overlapping_slots(
                    slots, self.language)
                parsed_slots = sorted(parsed_slots,
                                      key=lambda s: s[RES_MATCH_RANGE][START])
                return parsing_result(text, parsed_intent, parsed_slots)
        return empty_result(text)
Beispiel #25
0
 def get_slots(self, text):
     tokens = tokenize(text, self.language)
     slots = []
     for token in tokens:
         normalized_value = token.value
         if self.config.get("lowercase", False):
             normalized_value = normalized_value.lower()
         if normalized_value in self.slots_keywords:
             entity = self.slots_keywords[normalized_value][0]
             slot_name = self.slots_keywords[normalized_value][1]
             slot = unresolved_slot((token.start, token.end), token.value,
                                    entity, slot_name)
             slots.append(slot)
     return slots
Beispiel #26
0
    def test_should_parse_after_deserialization_from_dir(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups
- i want [number_of_cups] cups of [beverage_temperature](boiling hot) tea pls
- can you prepare [number_of_cups] cup of [beverage_temperature](cold) tea ?

---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee
- can you prepare [number_of_cups] cup of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        shared = self.get_shared_data(dataset)
        engine = SnipsNLUEngine(**shared).fit(dataset)
        text = "Give me 3 cups of hot tea please"

        # When
        engine.persist(self.tmp_file_path)
        deserialized_engine = SnipsNLUEngine.from_path(self.tmp_file_path)
        result = deserialized_engine.parse(text)

        # Then
        expected_slots = [
            resolved_slot({
                START: 8,
                END: 9
            }, "3", {
                "kind": "Number",
                "value": 3.0
            }, "snips/number", "number_of_cups"),
            custom_slot(
                unresolved_slot({
                    START: 18,
                    END: 21
                }, "hot", "Temperature", "beverage_temperature"))
        ]
        self.assertEqual(result[RES_INPUT], text)
        self.assertEqual(result[RES_INTENT][RES_INTENT_NAME], "MakeTea")
        self.assertListEqual(result[RES_SLOTS], expected_slots)
    def test_should_get_slots(self):
        # Given
        dataset = validate_and_format_dataset(BEVERAGE_DATASET)
        config = CRFSlotFillerConfig(random_seed=42)
        intent = "MakeTea"
        slot_filler = CRFSlotFiller(config)
        slot_filler.fit(dataset, intent)

        # When
        slots = slot_filler.get_slots("make me two cups of tea")

        # Then
        expected_slots = [
            unresolved_slot(match_range={START: 8, END: 11},
                            value='two',
                            entity='snips/number',
                            slot_name='number_of_cups')]
        self.assertListEqual(slots, expected_slots)
Beispiel #28
0
    def test_should_use_parsers_sequentially(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: greeting1
utterances:
- hello [greeted:name](john)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        input_text = "hello snips"
        intent = intent_classification_result(intent_name='greeting1',
                                              probability=0.7)
        slots = [
            unresolved_slot(match_range=(6, 11),
                            value='snips',
                            entity='name',
                            slot_name='greeted')
        ]

        # pylint:disable=unused-variable
        @IntentParser.register("first_intent_parser", True)
        class FirstIntentParser(MockIntentParser):
            pass

        @IntentParser.register("second_intent_parser", True)
        class SecondIntentParser(MockIntentParser):
            def parse(self, text, intents=None, top_n=None):
                if text == input_text:
                    return parsing_result(text, intent, slots)
                return empty_result(text, 1.0)

        # pylint:enable=unused-variable

        config = NLUEngineConfig(
            ["first_intent_parser", "second_intent_parser"])
        engine = SnipsNLUEngine(config).fit(dataset)

        # When
        parse = engine.parse(input_text)

        # Then
        expected_slots = [custom_slot(s) for s in slots]
        expected_parse = parsing_result(input_text, intent, expected_slots)
        self.assertDictEqual(expected_parse, parse)
    def test_should_deduplicate_overlapping_slots(self):
        # Given
        language = LANGUAGE_EN
        slots = [
            unresolved_slot([3, 7], "non_overlapping1", "e", "s1"),
            unresolved_slot([9, 16], "aaaaaaa", "e1", "s2"),
            unresolved_slot([10, 18], "bbbbbbbb", "e1", "s3"),
            unresolved_slot([17, 23], "b cccc", "e1", "s4"),
            unresolved_slot([50, 60], "non_overlapping2", "e", "s5"),
        ]

        # When
        deduplicated_slots = _deduplicate_overlapping_slots(slots, language)

        # Then
        expected_slots = [
            unresolved_slot([3, 7], "non_overlapping1", "e", "s1"),
            unresolved_slot([17, 23], "b cccc", "e1", "s4"),
            unresolved_slot([50, 60], "non_overlapping2", "e", "s5"),
        ]
        self.assertSequenceEqual(deduplicated_slots, expected_slots)
Beispiel #30
0
    def test_should_get_slots(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: greeting
utterances:
- hello [greeted:name](john)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        input_text = "hello snips"
        greeting_intent = "greeting"
        expected_slots = [
            unresolved_slot(match_range=(6, 11),
                            value="snips",
                            entity="name",
                            slot_name="greeted")
        ]

        # pylint:disable=unused-variable
        @IntentParser.register("first_intent_parser", True)
        class FirstIntentParser(MockIntentParser):
            pass

        @IntentParser.register("second_intent_parser", True)
        class SecondIntentParser(MockIntentParser):
            def get_slots(self, text, intent):
                if text == input_text and intent == greeting_intent:
                    return expected_slots
                return []

        # pylint:enable=unused-variable

        config = NLUEngineConfig(
            ["first_intent_parser", "second_intent_parser"])
        engine = SnipsNLUEngine(config).fit(dataset)

        # When
        res_slots = engine.get_slots(input_text, greeting_intent)

        # Then
        expected_slots = [custom_slot(s) for s in expected_slots]
        self.assertListEqual(expected_slots, res_slots)
Beispiel #31
0
    def _parse_map_output(self, text, output, entities, intents):
        """Parse the map output to the parser's result format"""
        intent_id, slot_ids = output
        intent_name = self._intents_names[intent_id]
        if intents is not None and intent_name not in intents:
            return None

        parsed_intent = intent_classification_result(
            intent_name=intent_name, probability=1.0)
        slots = []
        # assert invariant
        assert len(slot_ids) == len(entities)
        for slot_id, entity in zip(slot_ids, entities):
            slot_name = self._slots_names[slot_id]
            rng_start = entity[RES_MATCH_RANGE][START]
            rng_end = entity[RES_MATCH_RANGE][END]
            slot_value = text[rng_start:rng_end]
            entity_name = entity[ENTITY_KIND]
            slot = unresolved_slot(
                [rng_start, rng_end], slot_value, entity_name, slot_name)
            slots.append(slot)

        return extraction_result(parsed_intent, slots)
Beispiel #32
0
    def test_should_parse_with_filter(self):
        dataset_stream = io.StringIO("""
---
type: intent
name: intent1
utterances:
  - "[slot1:entity1](foo) bar"

---
type: intent
name: intent2
utterances:
  - foo bar [slot2:entity2](baz)

---
type: intent
name: intent3
utterances:
  - foz for [slot3:entity3](baz)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        classifier_config = LogRegIntentClassifierConfig(random_seed=42)
        slot_filler_config = CRFSlotFillerConfig(random_seed=42)
        parser_config = ProbabilisticIntentParserConfig(
            classifier_config, slot_filler_config)
        parser = ProbabilisticIntentParser(parser_config)
        parser.fit(dataset)
        text = "foo bar baz"

        # When
        result = parser.parse(text, intents=["intent1", "intent3"])

        # Then
        expected_slots = [unresolved_slot((0, 3), "foo", "entity1", "slot1")]

        self.assertEqual("intent1", result[RES_INTENT][RES_INTENT_NAME])
        self.assertEqual(expected_slots, result[RES_SLOTS])
    def test_should_deduplicate_overlapping_slots(self):
        # Given
        language = LANGUAGE_EN
        slots = [
            unresolved_slot([0, 3], "kid", "e", "s1"),
            unresolved_slot([4, 8], "loco", "e1", "s2"),
            unresolved_slot([0, 8], "kid loco", "e1", "s3"),
            unresolved_slot([9, 13], "song", "e2", "s4"),
        ]

        # When
        deduplicated_slots = _deduplicate_overlapping_slots(slots, language)

        # Then
        expected_slots = [
            unresolved_slot([0, 8], "kid loco", "e1", "s3"),
            unresolved_slot([9, 13], "song", "e2", "s4"),
        ]
        self.assertSequenceEqual(deduplicated_slots, expected_slots)
Beispiel #34
0
    def test_bilou_tags_to_slots(self):
        # Given
        language = LANGUAGE_EN
        slot_name = "animal"
        intent_slots_mapping = {"animal": "animal"}
        tags = [
            {
                "text": "",
                "tags": [],
                "expected_slots": []
            },
            {
                "text": "nothing here",
                "tags": [OUTSIDE, OUTSIDE],
                "expected_slots": []
            },
            {
                "text": "i am a blue bird",
                "tags": [OUTSIDE, OUTSIDE, OUTSIDE,
                         BEGINNING_PREFIX + slot_name,
                         LAST_PREFIX + slot_name],
                "expected_slots": [
                    unresolved_slot(
                        match_range=(7, 16),
                        value="blue bird",
                        entity=slot_name,
                        slot_name=slot_name
                    )
                ]
            },
            {
                "text": "i am a bird",
                "tags": [OUTSIDE, OUTSIDE, OUTSIDE,
                         UNIT_PREFIX + slot_name],
                "expected_slots": [
                    unresolved_slot(
                        match_range=(7, 11),
                        value="bird",
                        entity=slot_name,
                        slot_name=slot_name
                    )
                ]
            },
            {
                "text": "bird",
                "tags": [UNIT_PREFIX + slot_name],
                "expected_slots": [
                    unresolved_slot(
                        match_range=(0, 4),
                        value="bird",
                        entity=slot_name,
                        slot_name=slot_name
                    )
                ]
            },
            {
                "text": "blue bird",
                "tags": [BEGINNING_PREFIX + slot_name,
                         LAST_PREFIX + slot_name],
                "expected_slots": [
                    unresolved_slot(
                        match_range=(0, 9),
                        value="blue bird",
                        entity=slot_name,
                        slot_name=slot_name
                    )
                ]
            },
            {
                "text": "light blue bird blue bird",
                "tags": [BEGINNING_PREFIX + slot_name,
                         INSIDE_PREFIX + slot_name,
                         LAST_PREFIX + slot_name,
                         BEGINNING_PREFIX + slot_name,
                         LAST_PREFIX + slot_name],
                "expected_slots": [
                    unresolved_slot(
                        match_range=(0, 15),
                        value="light blue bird",
                        entity=slot_name,
                        slot_name=slot_name
                    ),
                    unresolved_slot(
                        match_range=(16, 25),
                        value="blue bird",
                        entity=slot_name,
                        slot_name=slot_name
                    )
                ]
            },
            {
                "text": "bird birdy",
                "tags": [UNIT_PREFIX + slot_name,
                         UNIT_PREFIX + slot_name],
                "expected_slots": [
                    unresolved_slot(
                        match_range=(0, 4),
                        value="bird",
                        entity=slot_name,
                        slot_name=slot_name
                    ),
                    unresolved_slot(
                        match_range=(5, 10),
                        value="birdy",
                        entity=slot_name,
                        slot_name=slot_name
                    )
                ]
            },
            {
                "text": "light bird bird blue bird",
                "tags": [BEGINNING_PREFIX + slot_name,
                         INSIDE_PREFIX + slot_name,
                         UNIT_PREFIX + slot_name,
                         BEGINNING_PREFIX + slot_name,
                         INSIDE_PREFIX + slot_name],
                "expected_slots": [
                    unresolved_slot(
                        match_range=(0, 10),
                        value="light bird",
                        entity=slot_name,
                        slot_name=slot_name
                    ),
                    unresolved_slot(
                        match_range=(11, 15),
                        value="bird",
                        entity=slot_name,
                        slot_name=slot_name
                    ),
                    unresolved_slot(
                        match_range=(16, 25),
                        value="blue bird",
                        entity=slot_name,
                        slot_name=slot_name
                    )
                ]
            },
            {
                "text": "bird bird bird",
                "tags": [LAST_PREFIX + slot_name,
                         BEGINNING_PREFIX + slot_name,
                         UNIT_PREFIX + slot_name],
                "expected_slots": [
                    unresolved_slot(
                        match_range=(0, 4),
                        value="bird",
                        entity=slot_name,
                        slot_name=slot_name
                    ),
                    unresolved_slot(
                        match_range=(5, 9),
                        value="bird",
                        entity=slot_name,
                        slot_name=slot_name
                    ),
                    unresolved_slot(
                        match_range=(10, 14),
                        value="bird",
                        entity=slot_name,
                        slot_name=slot_name
                    )
                ]
            },
        ]

        for data in tags:
            # When
            slots = tags_to_slots(
                data["text"], tokenize(data["text"], language),
                data["tags"], TaggingScheme.BILOU,
                intent_slots_mapping)
            # Then
            self.assertEqual(slots, data["expected_slots"])
Beispiel #35
0
    def test_should_use_parsers_sequentially(self):
        # Given
        input_text = "hello world"
        intent = intent_classification_result(
            intent_name='dummy_intent_1', probability=0.7)
        slots = [unresolved_slot(match_range=(6, 11),
                                 value='world',
                                 entity='mocked_entity',
                                 slot_name='mocked_slot_name')]

        class TestIntentParser1Config(ProcessingUnitConfig):
            unit_name = "test_intent_parser1"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return TestIntentParser1Config()

        class TestIntentParser1(IntentParser):
            unit_name = "test_intent_parser1"
            config_type = TestIntentParser1Config

            def fit(self, dataset, force_retrain):
                self._fitted = True
                return self

            @property
            def fitted(self):
                return hasattr(self, '_fitted') and self._fitted

            def parse(self, text, intents):
                return empty_result(text)

            def to_dict(self):
                return {
                    "unit_name": self.unit_name,
                }

            @classmethod
            def from_dict(cls, unit_dict):
                conf = cls.config_type()
                return TestIntentParser1(conf)

        class TestIntentParser2Config(ProcessingUnitConfig):
            unit_name = "test_intent_parser2"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return TestIntentParser2Config()

        class TestIntentParser2(IntentParser):
            unit_name = "test_intent_parser2"
            config_type = TestIntentParser2Config

            def fit(self, dataset, force_retrain):
                self._fitted = True
                return self

            @property
            def fitted(self):
                return hasattr(self, '_fitted') and self._fitted

            def parse(self, text, intents):
                if text == input_text:
                    return parsing_result(text, intent, slots)
                return empty_result(text)

            def to_dict(self):
                return {
                    "unit_name": self.unit_name,
                }

            @classmethod
            def from_dict(cls, unit_dict):
                conf = cls.config_type()
                return TestIntentParser2(conf)

        register_processing_unit(TestIntentParser1)
        register_processing_unit(TestIntentParser2)

        mocked_dataset_metadata = {
            "language_code": "en",
            "entities": {
                "mocked_entity": {
                    "automatically_extensible": True,
                    "utterances": dict()
                }
            },
            "slot_name_mappings": {
                "dummy_intent_1": {
                    "mocked_slot_name": "mocked_entity"
                }
            }
        }

        config = NLUEngineConfig([TestIntentParser1Config(),
                                  TestIntentParser2Config()])
        engine = SnipsNLUEngine(config).fit(SAMPLE_DATASET)
        # pylint:disable=protected-access
        engine._dataset_metadata = mocked_dataset_metadata
        # pylint:enable=protected-access

        # When
        parse = engine.parse(input_text)

        # Then
        expected_slots = [custom_slot(s) for s in slots]
        expected_parse = parsing_result(input_text, intent, expected_slots)
        self.assertDictEqual(expected_parse, parse)
Beispiel #36
0
    def test_synonyms_should_point_to_base_value(self, mocked_deter_parse,
                                                 mocked_proba_parse):
        # Given
        dataset = {
            "snips_nlu_version": "1.1.1",
            "intents": {
                "dummy_intent_1": {
                    "utterances": [
                        {
                            "data": [
                                {
                                    "text": "dummy_1",
                                    "entity": "dummy_entity_1",
                                    "slot_name": "dummy_slot_name"
                                }
                            ]
                        }
                    ]
                }
            },
            "entities": {
                "dummy_entity_1": {
                    "use_synonyms": True,
                    "automatically_extensible": False,
                    "data": [
                        {
                            "value": "dummy1",
                            "synonyms": [
                                "dummy1",
                                "dummy1_bis"
                            ]
                        }
                    ]
                }
            },
            "language": "en"
        }

        text = "dummy1_bis"
        mocked_proba_parser_intent = intent_classification_result(
            "dummy_intent_1", 1.0)
        mocked_proba_parser_slots = [
            unresolved_slot(match_range=(0, 10), value="dummy1_bis",
                            entity="dummy_entity_1",
                            slot_name="dummy_slot_name")]

        mocked_deter_parse.return_value = empty_result(text)
        mocked_proba_parse.return_value = parsing_result(
            text, mocked_proba_parser_intent, mocked_proba_parser_slots)

        engine = SnipsNLUEngine().fit(dataset)

        # When
        result = engine.parse(text)

        # Then
        expected_slot = {
            RES_MATCH_RANGE: {
                "start": 0,
                "end": 10
            },
            RES_RAW_VALUE: "dummy1_bis",
            RES_VALUE: {
                "kind": "Custom",
                "value": "dummy1"
            },
            RES_ENTITY: "dummy_entity_1",
            RES_SLOT_NAME: "dummy_slot_name"
        }
        expected_result = parsing_result(
            text, intent=mocked_proba_parser_intent, slots=[expected_slot])
        self.assertEqual(expected_result, result)
Beispiel #37
0
    def test_should_handle_keyword_entities(self, mocked_regex_parse,
                                            mocked_crf_parse):
        # Given
        dataset = {
            "snips_nlu_version": "1.1.1",
            "intents": {
                "dummy_intent_1": {
                    "utterances": [
                        {
                            "data": [
                                {
                                    "text": "dummy_1",
                                    "entity": "dummy_entity_1",
                                    "slot_name": "dummy_slot_name"
                                },
                                {
                                    "text": " dummy_2",
                                    "entity": "dummy_entity_2",
                                    "slot_name": "other_dummy_slot_name"
                                }
                            ]
                        }
                    ]
                }
            },
            "entities": {
                "dummy_entity_1": {
                    "use_synonyms": True,
                    "automatically_extensible": False,
                    "data": [
                        {
                            "value": "dummy1",
                            "synonyms": [
                                "dummy1",
                                "dummy1_bis"
                            ]
                        },
                        {
                            "value": "dummy2",
                            "synonyms": [
                                "dummy2",
                                "dummy2_bis"
                            ]
                        }
                    ]
                },
                "dummy_entity_2": {
                    "use_synonyms": False,
                    "automatically_extensible": True,
                    "data": [
                        {
                            "value": "dummy2",
                            "synonyms": [
                                "dummy2"
                            ]
                        }
                    ]
                }
            },
            "language": "en"
        }

        text = "dummy_3 dummy_4"
        mocked_crf_intent = intent_classification_result("dummy_intent_1", 1.0)
        mocked_crf_slots = [unresolved_slot(match_range=(0, 7),
                                            value="dummy_3",
                                            entity="dummy_entity_1",
                                            slot_name="dummy_slot_name"),
                            unresolved_slot(match_range=(8, 15),
                                            value="dummy_4",
                                            entity="dummy_entity_2",
                                            slot_name="other_dummy_slot_name")]

        mocked_regex_parse.return_value = empty_result(text)
        mocked_crf_parse.return_value = parsing_result(
            text, mocked_crf_intent, mocked_crf_slots)

        engine = SnipsNLUEngine()

        # When
        engine = engine.fit(dataset)
        result = engine.parse(text)

        # Then
        expected_slot = custom_slot(unresolved_slot(
            match_range=(8, 15), value="dummy_4", entity="dummy_entity_2",
            slot_name="other_dummy_slot_name"))
        expected_result = parsing_result(text, intent=mocked_crf_intent,
                                         slots=[expected_slot])
        self.assertEqual(expected_result, result)