def get_intents(self, text):
     return [
         intent_classification_result("intent2", 0.6),
         intent_classification_result("intent1", 0.2),
         intent_classification_result(None, 0.15),
         intent_classification_result("intent3", 0.05)
     ]
    def _get_intents(self, text, intents_filter):
        if isinstance(intents_filter, str):
            intents_filter = {intents_filter}
        elif isinstance(intents_filter, list):
            intents_filter = set(intents_filter)

        if not text or not self.intent_list or not self.featurizer:
            results = [intent_classification_result(None, 1.0)]
            results += [intent_classification_result(i, 0.0)
                        for i in self.intent_list if i is not None]
            return results

        if len(self.intent_list) == 1:
            return [intent_classification_result(self.intent_list[0], 1.0)]

        # pylint: disable=C0103
        X = self.featurizer.transform([text_to_utterance(text)])
        # pylint: enable=C0103
        proba_vec = self._predict_proba(X)
        logger.debug(
            "%s", DifferedLoggingMessage(self.log_activation_weights, text, X))
        results = [
            intent_classification_result(i, proba)
            for i, proba in zip(self.intent_list, proba_vec[0])
            if intents_filter is None or i is None or i in intents_filter]

        return sorted(results, key=lambda res: -res[RES_PROBA])
    def test_should_get_intents(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: greeting1
utterances:
- hello

---
type: intent
name: greeting2
utterances:
- how are you""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        input_text = "hello world"

        # pylint:disable=unused-variable
        @IntentParser.register("first_intent_parser", True)
        class FirstIntentParser(MockIntentParser):
            def get_intents(self, text):
                return [
                    intent_classification_result("greeting1", 0.5),
                    intent_classification_result("greeting2", 0.3),
                    intent_classification_result(None, 0.2)
                ]

        @IntentParser.register("second_intent_parser", True)
        class SecondIntentParser(MockIntentParser):
            def get_intents(self, text):
                return [
                    intent_classification_result("greeting2", 0.6),
                    intent_classification_result("greeting1", 0.2),
                    intent_classification_result(None, 0.1)
                ]

        # pylint:enable=unused-variable
        config = NLUEngineConfig(["first_intent_parser",
                                  "second_intent_parser"])
        engine = SnipsNLUEngine(config).fit(dataset)

        # When
        res_intents = engine.get_intents(input_text)

        # Then
        expected_intents = [
            intent_classification_result("greeting2", 0.6),
            intent_classification_result("greeting1", 0.5),
            intent_classification_result(None, 0.2)
        ]
        self.assertListEqual(expected_intents, res_intents)
Beispiel #4
0
    def test_should_parse_intent_with_ambivalent_words(self):
        # Given
        slots_dataset_stream = io.StringIO("""
---
type: intent
name: give_flower
utterances:
  - give a rose to [name](emily)
  - give a daisy to [name](tom)
  - give a tulip to [name](daisy)
  """)
        dataset = Dataset.from_yaml_files("en",
                                          [slots_dataset_stream]).json
        parser = LookupIntentParser().fit(dataset)
        text = "give a daisy to emily"

        # When
        parsing = parser.parse(text)

        # Then
        expected_intent = intent_classification_result(
            intent_name="give_flower", probability=1.0)
        expected_slots = [
            {
                "entity": "name",
                "range": {"end": 21, "start": 16},
                "slotName": "name",
                "value": "emily"
            }
        ]

        self.assertDictEqual(expected_intent, parsing[RES_INTENT])
        self.assertListEqual(expected_slots, parsing[RES_SLOTS])
Beispiel #5
0
    def test_should_serialize_results(self):
        # Given
        input_ = "hello world"
        intent = intent_classification_result("world", 0.5)
        slots = [unresolved_slot([3, 5],
                                 "slot_value",
                                 "slot_entity",
                                 "slot_name")]

        # When
        result = parsing_result(input=input_, intent=intent, slots=slots)

        # Then
        msg = "Result dict should be json serializable"
        with self.fail_if_exception(msg):
            json.dumps(result)

        expected_result = {
            RES_INTENT: {RES_INTENT_NAME: 'world', RES_PROBABILITY: 0.5},
            RES_SLOTS: [{RES_MATCH_RANGE: {"start": 3, "end": 5},
                         RES_ENTITY: 'slot_entity',
                         RES_SLOT_NAME: 'slot_name',
                         RES_VALUE: 'slot_value'}],
            RES_INPUT: input_}
        self.assertDictEqual(expected_result, result)
    def test_should_parse_slightly_ambiguous_utterances(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: intent_1
utterances:
  - call tomorrow

---
type: intent
name: intent_2
utterances:
  - call [time:snips/datetime](today)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser = DeterministicIntentParser().fit(dataset)
        text = "call tomorrow"

        # When
        res = parser.parse(text)

        # Then
        expected_intent = intent_classification_result(intent_name="intent_1",
                                                       probability=2. / 3.)
        expected_result = parsing_result(text, expected_intent, [])
        self.assertEqual(expected_result, res)
    def test_should_parse_intent(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: intent1
utterances:
  - foo bar baz

---
type: intent
name: intent2
utterances:
  - foo bar ban""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser = DeterministicIntentParser().fit(dataset)
        text = "foo bar ban"

        # When
        parsing = parser.parse(text)

        # Then
        probability = 1.0
        expected_intent = intent_classification_result(intent_name="intent2",
                                                       probability=probability)

        self.assertEqual(expected_intent, parsing[RES_INTENT])
    def test_should_serialize_results(self):
        # Given
        input_ = "hello world"
        intent = intent_classification_result("world", 0.5)
        slots = [
            unresolved_slot([3, 5], "slot_value", "slot_entity", "slot_name")
        ]

        # When
        result = parsing_result(input=input_, intent=intent, slots=slots)

        # Then
        msg = "Result dict should be json serializable"
        with self.fail_if_exception(msg):
            json.dumps(result)

        expected_result = {
            RES_INTENT: {
                RES_INTENT_NAME: 'world',
                RES_PROBA: 0.5
            },
            RES_SLOTS: [{
                RES_MATCH_RANGE: {
                    "start": 3,
                    "end": 5
                },
                RES_ENTITY: 'slot_entity',
                RES_SLOT_NAME: 'slot_name',
                RES_VALUE: 'slot_value'
            }],
            RES_INPUT:
            input_
        }
        self.assertDictEqual(expected_result, result)
Beispiel #9
0
    def test_empty_vocabulary_should_fit_and_return_none_intent(
            self, mocked_build_training):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: dummy_intent_1
utterances:
  - "[dummy_slot_name:dummy_entity_1](...)"
  
---
type: entity
name: dummy_entity_1
automatically_extensible: true
use_synonyms: false
matching_strictness: 1.0
values:
  - ...
""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        text = " "
        noise_size = 6
        utterances = [text] + [text] * noise_size
        utterances = [text_to_utterance(t) for t in utterances]
        labels = [0] + [1] * noise_size
        intent_list = ["dummy_intent_1", None]
        mocked_build_training.return_value = utterances, labels, intent_list

        # When / Then
        intent_classifier = LogRegIntentClassifier().fit(dataset)
        intent = intent_classifier.get_intent("no intent there")
        self.assertEqual(intent_classification_result(None, 1.0), intent)
Beispiel #10
0
    def test_should_get_none_intent_when_empty_input(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: my_first_intent
utterances:
- how are you
- hello how are you?
- what's up

---
type: intent
name: my_second_intent
utterances:
- what is the weather today ?
- does it rain
- will it rain tomorrow""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        classifier = LogRegIntentClassifier().fit(dataset)
        text = ""

        # When
        result = classifier.get_intent(text)

        # Then
        self.assertEqual(intent_classification_result(None, 1.0), result)
 def _get_matching_result(self, text, processed_text, regex, intent,
                          builtin_entities_ranges_mapping=None):
     found_result = regex.match(processed_text)
     if found_result is None:
         return None
     parsed_intent = intent_classification_result(intent_name=intent,
                                                  probability=1.0)
     slots = []
     for group_name in found_result.groupdict():
         slot_name = self.group_names_to_slot_names[group_name]
         entity = self.slot_names_to_entities[intent][slot_name]
         rng = (found_result.start(group_name),
                found_result.end(group_name))
         if builtin_entities_ranges_mapping is not None:
             if rng in builtin_entities_ranges_mapping:
                 rng = builtin_entities_ranges_mapping[rng]
             else:
                 shift = _get_range_shift(
                     rng, builtin_entities_ranges_mapping)
                 rng = {START: rng[0] + shift, END: rng[1] + shift}
         else:
             rng = {START: rng[0], END: rng[1]}
         value = text[rng[START]:rng[END]]
         parsed_slot = unresolved_slot(
             match_range=rng, value=value, entity=entity,
             slot_name=slot_name)
         slots.append(parsed_slot)
     parsed_slots = _deduplicate_overlapping_slots(
         slots, self.language)
     parsed_slots = sorted(parsed_slots,
                           key=lambda s: s[RES_MATCH_RANGE][START])
     return parsing_result(text, parsed_intent, parsed_slots)
    def test_should_parse_top_intents(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: intent1
utterances:
  - hello world
  
---
type: intent
name: intent2
utterances:
  - foo bar""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser = DeterministicIntentParser().fit(dataset)
        text = "hello world"

        # When
        results = parser.parse(text, top_n=3)

        # Then
        expected_intent = intent_classification_result(intent_name="intent1",
                                                       probability=1.0)
        expected_results = [extraction_result(expected_intent, [])]
        self.assertEqual(expected_results, results)
    def parse(self, text, intents=None):
        """Performs intent parsing on the provided *text*

        Intent and slots are extracted simultaneously through pattern matching

        Args:
            text (str): Input
            intents (str or list of str): If provided, reduces the scope of
            intent parsing to the provided list of intents

        Returns:
            dict: The matched intent, if any, along with the extracted slots.
            See :func:`.parsing_result` for the output format.

        Raises:
            NotTrained: When the intent parser is not fitted
        """
        if not self.fitted:
            raise NotTrained("DeterministicIntentParser must be fitted")

        if isinstance(intents, str):
            intents = [intents]

        ranges_mapping, processed_text = _replace_builtin_entities(
            text, self.language)

        for intent, regexes in iteritems(self.regexes_per_intent):
            if intents is not None and intent not in intents:
                continue
            for regex in regexes:
                match = regex.match(processed_text)
                if match is None:
                    continue
                parsed_intent = intent_classification_result(
                    intent_name=intent, probability=1.0)
                slots = []
                for group_name in match.groupdict():
                    slot_name = self.group_names_to_slot_names[group_name]
                    entity = self.slot_names_to_entities[slot_name]
                    rng = (match.start(group_name), match.end(group_name))
                    value = match.group(group_name)
                    if rng in ranges_mapping:
                        rng = ranges_mapping[rng]
                        value = text[rng[START]:rng[END]]
                    else:
                        rng = {START: rng[0], END: rng[1]}
                    parsed_slot = unresolved_slot(match_range=rng,
                                                  value=value,
                                                  entity=entity,
                                                  slot_name=slot_name)
                    slots.append(parsed_slot)
                parsed_slots = _deduplicate_overlapping_slots(
                    slots, self.language)
                parsed_slots = sorted(parsed_slots,
                                      key=lambda s: s[RES_MATCH_RANGE][START])
                return parsing_result(text, parsed_intent, parsed_slots)
        return empty_result(text)
Beispiel #14
0
    def get_intents(self, text):
        """Returns the list of intents ordered by decreasing probability

        The length of the returned list is exactly the number of intents in the
        dataset + 1 for the None intent
        """
        nb_intents = len(self.regexes_per_intent)
        top_intents = [intent_result[RES_INTENT] for intent_result in
                       self._parse_top_intents(text, top_n=nb_intents)]
        matched_intents = {res[RES_INTENT_NAME] for res in top_intents}
        for intent in self.regexes_per_intent:
            if intent not in matched_intents:
                top_intents.append(intent_classification_result(intent, 0.0))

        # The None intent is not included in the regex patterns and is thus
        # never matched by the deterministic parser
        top_intents.append(intent_classification_result(None, 0.0))
        return top_intents
Beispiel #15
0
    def get_intent(self, text, intents_filter=None):
        """Performs intent classification on the provided *text*

        Args:
            text (str): Input
            intents_filter (str or list of str): When defined, it will find
                the most likely intent among the list, otherwise it will use
                the whole list of intents defined in the dataset

        Returns:
            dict or None: The most likely intent along with its probability or
            *None* if no intent was found

        Raises:
            NotTrained: When the intent classifier is not fitted

        """
        if not self.fitted:
            raise NotTrained('LogRegIntentClassifier must be fitted')

        if isinstance(intents_filter, str):
            intents_filter = [intents_filter]

        if not text or not self.intent_list \
                or self.featurizer is None or self.classifier is None:
            return None

        if len(self.intent_list) == 1:
            if self.intent_list[0] is None:
                return None
            return intent_classification_result(self.intent_list[0], 1.0)

        # pylint: disable=C0103
        X = self.featurizer.transform([text_to_utterance(text)])
        # pylint: enable=C0103
        proba_vec = self._predict_proba(X, intents_filter=intents_filter)
        intents_probas = sorted(zip(self.intent_list, proba_vec[0]),
                                key=lambda p: -p[1])
        for intent, proba in intents_probas:
            if intent is None:
                return None
            if intents_filter is None or intent in intents_filter:
                return intent_classification_result(intent, proba)
        return None
    def parse(self, text, intents=None):
        """Performs intent parsing on the provided *text*

        Intent and slots are extracted simultaneously through pattern matching

        Args:
            text (str): Input
            intents (str or list of str): If provided, reduces the scope of
            intent parsing to the provided list of intents

        Returns:
            dict: The matched intent, if any, along with the extracted slots.
            See :func:`.parsing_result` for the output format.

        Raises:
            NotTrained: When the intent parser is not fitted
        """
        if not self.fitted:
            raise NotTrained("DeterministicIntentParser must be fitted")

        if isinstance(intents, str):
            intents = [intents]

        ranges_mapping, processed_text = _replace_builtin_entities(
            text, self.language)

        for intent, regexes in iteritems(self.regexes_per_intent):
            if intents is not None and intent not in intents:
                continue
            for regex in regexes:
                match = regex.match(processed_text)
                if match is None:
                    continue
                parsed_intent = intent_classification_result(
                    intent_name=intent, probability=1.0)
                slots = []
                for group_name in match.groupdict():
                    slot_name = self.group_names_to_slot_names[group_name]
                    entity = self.slot_names_to_entities[slot_name]
                    rng = (match.start(group_name), match.end(group_name))
                    value = match.group(group_name)
                    if rng in ranges_mapping:
                        rng = ranges_mapping[rng]
                        value = text[rng[START]:rng[END]]
                    else:
                        rng = {START: rng[0], END: rng[1]}
                    parsed_slot = unresolved_slot(
                        match_range=rng, value=value, entity=entity,
                        slot_name=slot_name)
                    slots.append(parsed_slot)
                parsed_slots = _deduplicate_overlapping_slots(
                    slots, self.language)
                parsed_slots = sorted(parsed_slots,
                                      key=lambda s: s[RES_MATCH_RANGE][START])
                return parsing_result(text, parsed_intent, parsed_slots)
        return empty_result(text)
    def test_should_parse_top_intents(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: intent1
utterances:
  - meeting tomorrow
  
---
type: intent
name: intent2
utterances:
  - meeting [time:snips/datetime](today)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser = DeterministicIntentParser().fit(dataset)
        text = "meeting tomorrow"

        # When
        results = parser.parse(text, top_n=3)

        # Then
        slot = {
            "entity": "snips/datetime",
            "range": {
                "end": 16,
                "start": 8
            },
            "slotName": "time",
            "value": "tomorrow"
        }
        expected_results = [
            extraction_result(
                intent_classification_result(intent_name="intent1",
                                             probability=0.5), []),
            extraction_result(
                intent_classification_result(intent_name="intent2",
                                             probability=0.5), [slot])
        ]
        results = sorted(results, key=lambda r: r[RES_INTENT][RES_INTENT_NAME])
        self.assertEqual(expected_results, results)
Beispiel #18
0
    def test_should_get_none_intent_when_empty_dataset(self):
        # Given
        dataset = get_empty_dataset(LANGUAGE_EN)
        classifier = LogRegIntentClassifier().fit(dataset)
        text = "this is a dummy query"

        # When
        intent = classifier.get_intent(text)

        # Then
        expected_intent = intent_classification_result(None, 1.0)
        self.assertEqual(intent, expected_intent)
    def test_should_parse_stop_words_slots(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: search
utterances:
  - search
  - search [search_object](this)
  - search [search_object](a cat)
  
---
type: entity
name: search_object
values:
  - [this thing, that]
  """)

        resources = self.get_resources("en")
        resources[STOP_WORDS] = {"a", "this", "that"}
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser_config = DeterministicIntentParserConfig(ignore_stop_words=True)
        parser = DeterministicIntentParser(config=parser_config,
                                           resources=resources)
        parser.fit(dataset)

        # When
        res_1 = parser.parse("search this")
        res_2 = parser.parse("search that")

        # Then
        expected_intent = intent_classification_result(intent_name="search",
                                                       probability=1.0)
        expected_slots_1 = [
            unresolved_slot(match_range=(7, 11),
                            value="this",
                            entity="search_object",
                            slot_name="search_object")
        ]
        expected_slots_2 = [
            unresolved_slot(match_range=(7, 11),
                            value="that",
                            entity="search_object",
                            slot_name="search_object")
        ]
        self.assertEqual(expected_intent, res_1[RES_INTENT])
        self.assertEqual(expected_intent, res_2[RES_INTENT])
        self.assertListEqual(expected_slots_1, res_1[RES_SLOTS])
        self.assertListEqual(expected_slots_2, res_2[RES_SLOTS])
Beispiel #20
0
    def test_should_get_intent(self):
        # Given
        dataset = validate_and_format_dataset(self.slots_dataset)

        parser = DeterministicIntentParser().fit(dataset)
        text = "this is a dummy_a query with another dummy_c at 10p.m. or " \
               "at 12p.m."

        # When
        parsing = parser.parse(text)

        # Then
        probability = 1.0
        expected_intent = intent_classification_result(
            intent_name="dummy_intent_1", probability=probability)

        self.assertEqual(expected_intent, parsing[RES_INTENT])
    def test_should_parse_intent_with_stop_words(self, mock_get_stop_words):
        # Given
        mock_get_stop_words.return_value = {"a", "hey"}
        dataset = self.slots_dataset
        config = DeterministicIntentParserConfig(ignore_stop_words=True)
        parser = DeterministicIntentParser(config).fit(dataset)
        text = "Hey this is dummy_a query with another dummy_c at 10p.m. or " \
               "at 12p.m."

        # When
        parsing = parser.parse(text)

        # Then
        probability = 1.0
        expected_intent = intent_classification_result(
            intent_name="dummy_intent_1", probability=probability)

        self.assertEqual(expected_intent, parsing[RES_INTENT])
Beispiel #22
0
    def test_should_use_parsers_sequentially(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: greeting1
utterances:
- hello [greeted:name](john)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        input_text = "hello snips"
        intent = intent_classification_result(intent_name='greeting1',
                                              probability=0.7)
        slots = [
            unresolved_slot(match_range=(6, 11),
                            value='snips',
                            entity='name',
                            slot_name='greeted')
        ]

        # pylint:disable=unused-variable
        @IntentParser.register("first_intent_parser", True)
        class FirstIntentParser(MockIntentParser):
            pass

        @IntentParser.register("second_intent_parser", True)
        class SecondIntentParser(MockIntentParser):
            def parse(self, text, intents=None, top_n=None):
                if text == input_text:
                    return parsing_result(text, intent, slots)
                return empty_result(text, 1.0)

        # pylint:enable=unused-variable

        config = NLUEngineConfig(
            ["first_intent_parser", "second_intent_parser"])
        engine = SnipsNLUEngine(config).fit(dataset)

        # When
        parse = engine.parse(input_text)

        # Then
        expected_slots = [custom_slot(s) for s in slots]
        expected_parse = parsing_result(input_text, intent, expected_slots)
        self.assertDictEqual(expected_parse, parse)
    def test_should_parse_intent_after_deserialization(self):
        # Given
        dataset = self.slots_dataset
        shared = self.get_shared_data(dataset)
        parser = DeterministicIntentParser(**shared).fit(dataset)
        parser.persist(self.tmp_file_path)
        deserialized_parser = DeterministicIntentParser.from_path(
            self.tmp_file_path, **shared)
        text = "this is a dummy_a query with another dummy_c at 10p.m. or " \
               "at 12p.m."

        # When
        parsing = deserialized_parser.parse(text)

        # Then
        probability = 1.0
        expected_intent = intent_classification_result(
            intent_name="dummy_intent_1", probability=probability)
        self.assertEqual(expected_intent, parsing[RES_INTENT])
    def test_should_parse_intent_with_duplicated_slot_names(self):
        # Given
        slots_dataset_stream = io.StringIO("""
---
type: intent
name: math_operation
slots:
  - name: number
    entity: snips/number
utterances:
  - what is [number](one) plus [number](one)""")
        dataset = Dataset.from_yaml_files("en", [slots_dataset_stream]).json
        parser = DeterministicIntentParser().fit(dataset)
        text = "what is one plus one"

        # When
        parsing = parser.parse(text)

        # Then
        probability = 1.0
        expected_intent = intent_classification_result(
            intent_name="math_operation", probability=probability)
        expected_slots = [{
            "entity": "snips/number",
            "range": {
                "end": 11,
                "start": 8
            },
            "slotName": "number",
            "value": "one"
        }, {
            "entity": "snips/number",
            "range": {
                "end": 20,
                "start": 17
            },
            "slotName": "number",
            "value": "one"
        }]

        self.assertDictEqual(expected_intent, parsing[RES_INTENT])
        self.assertListEqual(expected_slots, parsing[RES_SLOTS])
Beispiel #25
0
    def test_should_get_intent_after_deserialization(self):
        # Given
        dataset = validate_and_format_dataset(self.slots_dataset)

        parser = DeterministicIntentParser().fit(dataset)
        custom_entity_parser = parser.custom_entity_parser
        parser.persist(self.tmp_file_path)
        deserialized_parser = DeterministicIntentParser.from_path(
            self.tmp_file_path,
            builtin_entity_parser=BuiltinEntityParser.build(language="en"),
            custom_entity_parser=custom_entity_parser)
        text = "this is a dummy_a query with another dummy_c at 10p.m. or " \
               "at 12p.m."

        # When
        parsing = deserialized_parser.parse(text)

        # Then
        probability = 1.0
        expected_intent = intent_classification_result(
            intent_name="dummy_intent_1", probability=probability)
        self.assertEqual(expected_intent, parsing[RES_INTENT])
Beispiel #26
0
    def _parse_map_output(self, text, output, entities, intents):
        """Parse the map output to the parser's result format"""
        intent_id, slot_ids = output
        intent_name = self._intents_names[intent_id]
        if intents is not None and intent_name not in intents:
            return None

        parsed_intent = intent_classification_result(
            intent_name=intent_name, probability=1.0)
        slots = []
        # assert invariant
        assert len(slot_ids) == len(entities)
        for slot_id, entity in zip(slot_ids, entities):
            slot_name = self._slots_names[slot_id]
            rng_start = entity[RES_MATCH_RANGE][START]
            rng_end = entity[RES_MATCH_RANGE][END]
            slot_value = text[rng_start:rng_end]
            entity_name = entity[ENTITY_KIND]
            slot = unresolved_slot(
                [rng_start, rng_end], slot_value, entity_name, slot_name)
            slots.append(slot)

        return extraction_result(parsed_intent, slots)
Beispiel #27
0
    def test_should_use_parsers_sequentially(self):
        # Given
        input_text = "hello world"
        intent = intent_classification_result(
            intent_name='dummy_intent_1', probability=0.7)
        slots = [unresolved_slot(match_range=(6, 11),
                                 value='world',
                                 entity='mocked_entity',
                                 slot_name='mocked_slot_name')]

        class TestIntentParser1Config(ProcessingUnitConfig):
            unit_name = "test_intent_parser1"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return TestIntentParser1Config()

        class TestIntentParser1(IntentParser):
            unit_name = "test_intent_parser1"
            config_type = TestIntentParser1Config

            def fit(self, dataset, force_retrain):
                self._fitted = True
                return self

            @property
            def fitted(self):
                return hasattr(self, '_fitted') and self._fitted

            def parse(self, text, intents):
                return empty_result(text)

            def to_dict(self):
                return {
                    "unit_name": self.unit_name,
                }

            @classmethod
            def from_dict(cls, unit_dict):
                conf = cls.config_type()
                return TestIntentParser1(conf)

        class TestIntentParser2Config(ProcessingUnitConfig):
            unit_name = "test_intent_parser2"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return TestIntentParser2Config()

        class TestIntentParser2(IntentParser):
            unit_name = "test_intent_parser2"
            config_type = TestIntentParser2Config

            def fit(self, dataset, force_retrain):
                self._fitted = True
                return self

            @property
            def fitted(self):
                return hasattr(self, '_fitted') and self._fitted

            def parse(self, text, intents):
                if text == input_text:
                    return parsing_result(text, intent, slots)
                return empty_result(text)

            def to_dict(self):
                return {
                    "unit_name": self.unit_name,
                }

            @classmethod
            def from_dict(cls, unit_dict):
                conf = cls.config_type()
                return TestIntentParser2(conf)

        register_processing_unit(TestIntentParser1)
        register_processing_unit(TestIntentParser2)

        mocked_dataset_metadata = {
            "language_code": "en",
            "entities": {
                "mocked_entity": {
                    "automatically_extensible": True,
                    "utterances": dict()
                }
            },
            "slot_name_mappings": {
                "dummy_intent_1": {
                    "mocked_slot_name": "mocked_entity"
                }
            }
        }

        config = NLUEngineConfig([TestIntentParser1Config(),
                                  TestIntentParser2Config()])
        engine = SnipsNLUEngine(config).fit(SAMPLE_DATASET)
        # pylint:disable=protected-access
        engine._dataset_metadata = mocked_dataset_metadata
        # pylint:enable=protected-access

        # When
        parse = engine.parse(input_text)

        # Then
        expected_slots = [custom_slot(s) for s in slots]
        expected_parse = parsing_result(input_text, intent, expected_slots)
        self.assertDictEqual(expected_parse, parse)
Beispiel #28
0
    def test_synonyms_should_point_to_base_value(self, mocked_deter_parse,
                                                 mocked_proba_parse):
        # Given
        dataset = {
            "snips_nlu_version": "1.1.1",
            "intents": {
                "dummy_intent_1": {
                    "utterances": [{
                        "data": [{
                            "text": "dummy_1",
                            "entity": "dummy_entity_1",
                            "slot_name": "dummy_slot_name"
                        }]
                    }]
                }
            },
            "entities": {
                "dummy_entity_1": {
                    "use_synonyms":
                    True,
                    "automatically_extensible":
                    False,
                    "data": [{
                        "value": "dummy1",
                        "synonyms": ["dummy1", "dummy1_bis"]
                    }]
                }
            },
            "language": "en"
        }

        text = "dummy1_bis"
        mocked_proba_parser_intent = intent_classification_result(
            "dummy_intent_1", 1.0)
        mocked_proba_parser_slots = [
            unresolved_slot(match_range=(0, 10),
                            value="dummy1_bis",
                            entity="dummy_entity_1",
                            slot_name="dummy_slot_name")
        ]

        mocked_deter_parse.return_value = empty_result(text)
        mocked_proba_parse.return_value = parsing_result(
            text, mocked_proba_parser_intent, mocked_proba_parser_slots)

        engine = SnipsNLUEngine().fit(dataset)

        # When
        result = engine.parse(text)

        # Then
        expected_slot = {
            RES_MATCH_RANGE: {
                "start": 0,
                "end": 10
            },
            RES_RAW_VALUE: "dummy1_bis",
            RES_VALUE: {
                "kind": "Custom",
                "value": "dummy1"
            },
            RES_ENTITY: "dummy_entity_1",
            RES_SLOT_NAME: "dummy_slot_name"
        }
        expected_result = parsing_result(text,
                                         intent=mocked_proba_parser_intent,
                                         slots=[expected_slot])
        self.assertEqual(expected_result, result)
Beispiel #29
0
    def test_should_handle_keyword_entities(self, mocked_regex_parse,
                                            mocked_crf_parse):
        # Given
        dataset = {
            "snips_nlu_version": "1.1.1",
            "intents": {
                "dummy_intent_1": {
                    "utterances": [{
                        "data": [{
                            "text": "dummy_1",
                            "entity": "dummy_entity_1",
                            "slot_name": "dummy_slot_name"
                        }, {
                            "text": " dummy_2",
                            "entity": "dummy_entity_2",
                            "slot_name": "other_dummy_slot_name"
                        }]
                    }]
                }
            },
            "entities": {
                "dummy_entity_1": {
                    "use_synonyms":
                    True,
                    "automatically_extensible":
                    False,
                    "data": [{
                        "value": "dummy1",
                        "synonyms": ["dummy1", "dummy1_bis"]
                    }, {
                        "value": "dummy2",
                        "synonyms": ["dummy2", "dummy2_bis"]
                    }]
                },
                "dummy_entity_2": {
                    "use_synonyms": False,
                    "automatically_extensible": True,
                    "data": [{
                        "value": "dummy2",
                        "synonyms": ["dummy2"]
                    }]
                }
            },
            "language": "en"
        }

        text = "dummy_3 dummy_4"
        mocked_crf_intent = intent_classification_result("dummy_intent_1", 1.0)
        mocked_crf_slots = [
            unresolved_slot(match_range=(0, 7),
                            value="dummy_3",
                            entity="dummy_entity_1",
                            slot_name="dummy_slot_name"),
            unresolved_slot(match_range=(8, 15),
                            value="dummy_4",
                            entity="dummy_entity_2",
                            slot_name="other_dummy_slot_name")
        ]

        mocked_regex_parse.return_value = empty_result(text)
        mocked_crf_parse.return_value = parsing_result(text, mocked_crf_intent,
                                                       mocked_crf_slots)

        engine = SnipsNLUEngine()

        # When
        engine = engine.fit(dataset)
        result = engine.parse(text)

        # Then
        expected_slot = custom_slot(
            unresolved_slot(match_range=(8, 15),
                            value="dummy_4",
                            entity="dummy_entity_2",
                            slot_name="other_dummy_slot_name"))
        expected_result = parsing_result(text,
                                         intent=mocked_crf_intent,
                                         slots=[expected_slot])
        self.assertEqual(expected_result, result)
Beispiel #30
0
    def test_should_use_parsers_sequentially(self):
        # Given
        input_text = "hello world"
        intent = intent_classification_result(intent_name='dummy_intent_1',
                                              probability=0.7)
        slots = [
            unresolved_slot(match_range=(6, 11),
                            value='world',
                            entity='mocked_entity',
                            slot_name='mocked_slot_name')
        ]

        class TestIntentParser1Config(ProcessingUnitConfig):
            unit_name = "test_intent_parser1"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return TestIntentParser1Config()

        class TestIntentParser1(IntentParser):
            unit_name = "test_intent_parser1"
            config_type = TestIntentParser1Config

            def fit(self, dataset, force_retrain):
                self._fitted = True
                return self

            @property
            def fitted(self):
                return hasattr(self, '_fitted') and self._fitted

            def parse(self, text, intents):
                return empty_result(text)

            def to_dict(self):
                return {
                    "unit_name": self.unit_name,
                }

            @classmethod
            def from_dict(cls, unit_dict):
                conf = cls.config_type()
                return TestIntentParser1(conf)

        class TestIntentParser2Config(ProcessingUnitConfig):
            unit_name = "test_intent_parser2"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return TestIntentParser2Config()

        class TestIntentParser2(IntentParser):
            unit_name = "test_intent_parser2"
            config_type = TestIntentParser2Config

            def fit(self, dataset, force_retrain):
                self._fitted = True
                return self

            @property
            def fitted(self):
                return hasattr(self, '_fitted') and self._fitted

            def parse(self, text, intents):
                if text == input_text:
                    return parsing_result(text, intent, slots)
                return empty_result(text)

            def to_dict(self):
                return {
                    "unit_name": self.unit_name,
                }

            @classmethod
            def from_dict(cls, unit_dict):
                conf = cls.config_type()
                return TestIntentParser2(conf)

        register_processing_unit(TestIntentParser1)
        register_processing_unit(TestIntentParser2)

        mocked_dataset_metadata = {
            "language_code": "en",
            "entities": {
                "mocked_entity": {
                    "automatically_extensible": True,
                    "utterances": dict()
                }
            },
            "slot_name_mappings": {
                "dummy_intent_1": {
                    "mocked_slot_name": "mocked_entity"
                }
            }
        }

        config = NLUEngineConfig(
            [TestIntentParser1Config(),
             TestIntentParser2Config()])
        engine = SnipsNLUEngine(config).fit(SAMPLE_DATASET)
        # pylint:disable=protected-access
        engine._dataset_metadata = mocked_dataset_metadata
        # pylint:enable=protected-access

        # When
        parse = engine.parse(input_text)

        # Then
        expected_slots = [custom_slot(s) for s in slots]
        expected_parse = parsing_result(input_text, intent, expected_slots)
        self.assertDictEqual(expected_parse, parse)
 def get_intent(self, text, intents_filter):
     if "tea" in text:
         return intent_classification_result("MakeTea", 1.0)
     elif "coffee" in text:
         return intent_classification_result("MakeCoffee", 1.0)
     return intent_classification_result(None, 1.0)
Beispiel #32
0
    def test_synonyms_should_point_to_base_value(self, mocked_deter_parse,
                                                 mocked_proba_parse):
        # Given
        dataset = {
            "snips_nlu_version": "1.1.1",
            "intents": {
                "dummy_intent_1": {
                    "utterances": [
                        {
                            "data": [
                                {
                                    "text": "dummy_1",
                                    "entity": "dummy_entity_1",
                                    "slot_name": "dummy_slot_name"
                                }
                            ]
                        }
                    ]
                }
            },
            "entities": {
                "dummy_entity_1": {
                    "use_synonyms": True,
                    "automatically_extensible": False,
                    "data": [
                        {
                            "value": "dummy1",
                            "synonyms": [
                                "dummy1",
                                "dummy1_bis"
                            ]
                        }
                    ]
                }
            },
            "language": "en"
        }

        text = "dummy1_bis"
        mocked_proba_parser_intent = intent_classification_result(
            "dummy_intent_1", 1.0)
        mocked_proba_parser_slots = [
            unresolved_slot(match_range=(0, 10), value="dummy1_bis",
                            entity="dummy_entity_1",
                            slot_name="dummy_slot_name")]

        mocked_deter_parse.return_value = empty_result(text)
        mocked_proba_parse.return_value = parsing_result(
            text, mocked_proba_parser_intent, mocked_proba_parser_slots)

        engine = SnipsNLUEngine().fit(dataset)

        # When
        result = engine.parse(text)

        # Then
        expected_slot = {
            RES_MATCH_RANGE: {
                "start": 0,
                "end": 10
            },
            RES_RAW_VALUE: "dummy1_bis",
            RES_VALUE: {
                "kind": "Custom",
                "value": "dummy1"
            },
            RES_ENTITY: "dummy_entity_1",
            RES_SLOT_NAME: "dummy_slot_name"
        }
        expected_result = parsing_result(
            text, intent=mocked_proba_parser_intent, slots=[expected_slot])
        self.assertEqual(expected_result, result)
    def test_synonyms_should_point_to_base_value(self):
        # Given
        dataset = {
            "intents": {
                "dummy_intent_1": {
                    "utterances": [
                        {
                            "data": [
                                {
                                    "text": "dummy_1",
                                    "entity": "dummy_entity_1",
                                    "slot_name": "dummy_slot_name"
                                }
                            ]
                        }
                    ]
                }
            },
            "entities": {
                "dummy_entity_1": {
                    "use_synonyms": True,
                    "automatically_extensible": False,
                    "data": [
                        {
                            "value": "dummy1",
                            "synonyms": [
                                "dummy1",
                                "dummy1_bis"
                            ]
                        }
                    ],
                    "matching_strictness": 1.0
                }
            },
            "language": "en"
        }
        mocked_intent = intent_classification_result("dummy_intent_1", 1.0)
        mocked_slots = [
            unresolved_slot(match_range=(0, 10), value="dummy1_bis",
                            entity="dummy_entity_1",
                            slot_name="dummy_slot_name")]

        # pylint:disable=unused-variable
        @IntentParser.register("my_intent_parser", True)
        class MyIntentParser(MockIntentParser):
            def parse(self, text, intents=None, top_n=None):
                return parsing_result(text, mocked_intent, mocked_slots)

        # pylint:enable=unused-variable

        input_ = "dummy1_bis"
        config = NLUEngineConfig(["my_intent_parser"])
        engine = SnipsNLUEngine(config).fit(dataset)

        # When
        result = engine.parse(input_)

        # Then
        expected_slot = {
            RES_MATCH_RANGE: {
                "start": 0,
                "end": 10
            },
            RES_RAW_VALUE: "dummy1_bis",
            RES_VALUE: {
                "kind": "Custom",
                "value": "dummy1"
            },
            RES_ENTITY: "dummy_entity_1",
            RES_SLOT_NAME: "dummy_slot_name"
        }
        expected_result = parsing_result(
            input_, mocked_intent, slots=[expected_slot])
        self.assertEqual(expected_result, result)
    def test_should_parse_top_intents(self):
        # Given
        text = "foo bar ban"
        dataset_stream = io.StringIO("""
---
type: intent
name: intent1
utterances:
  - foo [slot1:entity1](bak)
  
---
type: intent
name: intent2
utterances:
  - '[slot2:entity2](foo) baz'
  
---
type: intent
name: intent3
utterances:
  - foo bap""")

        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

        # pylint:disable=unused-variable
        @IntentParser.register("first_intent_parser", True)
        class FirstIntentParser(MockIntentParser):
            def get_intents(self, text):
                return [
                    intent_classification_result("intent1", 0.5),
                    intent_classification_result("intent2", 0.3),
                    intent_classification_result(None, 0.15),
                    intent_classification_result("intent3", 0.05)
                ]

            def get_slots(self, text, intent):
                if intent == "intent1":
                    return []
                if intent == "intent2":
                    return [
                        unresolved_slot((0, 3), "foo", "entity2", "slot2")
                    ]
                return []

        @IntentParser.register("second_intent_parser", True)
        class SecondIntentParser(MockIntentParser):
            def get_intents(self, text):
                return [
                    intent_classification_result("intent2", 0.6),
                    intent_classification_result("intent1", 0.2),
                    intent_classification_result(None, 0.15),
                    intent_classification_result("intent3", 0.05)
                ]

            def get_slots(self, text, intent):
                if intent == "intent1":
                    return [
                        unresolved_slot((0, 3), "foo", "entity1", "slot1")
                    ]
                if intent == "intent2":
                    return [
                        unresolved_slot((8, 11), "ban", "entity2", "slot2")
                    ]
                return []

        # pylint:enable=unused-variable

        config = NLUEngineConfig(
            ["first_intent_parser", "second_intent_parser"])
        nlu_engine = SnipsNLUEngine(config).fit(dataset)

        # When
        results = nlu_engine.parse(text, top_n=3)
        results_with_filter = nlu_engine.parse(
            text, intents=["intent1", "intent3"], top_n=3)

        # Then
        expected_results = [
            extraction_result(
                intent_classification_result("intent2", 0.6),
                [custom_slot(
                    unresolved_slot((0, 3), "foo", "entity2", "slot2"))]
            ),
            extraction_result(
                intent_classification_result("intent1", 0.5),
                [custom_slot(
                    unresolved_slot((0, 3), "foo", "entity1", "slot1"))]
            ),
            extraction_result(
                intent_classification_result(None, 0.15),
                []
            ),
        ]
        expected_results_with_filter = [
            extraction_result(
                intent_classification_result("intent1", 0.5),
                [custom_slot(
                    unresolved_slot((0, 3), "foo", "entity1", "slot1"))]
            ),
            extraction_result(
                intent_classification_result(None, 0.15),
                []
            ),
            extraction_result(
                intent_classification_result("intent3", 0.05),
                []
            ),
        ]
        self.assertListEqual(expected_results, results)
        self.assertListEqual(expected_results_with_filter, results_with_filter)
Beispiel #35
0
    def test_should_handle_keyword_entities(self, mocked_regex_parse,
                                            mocked_crf_parse):
        # Given
        dataset = {
            "snips_nlu_version": "1.1.1",
            "intents": {
                "dummy_intent_1": {
                    "utterances": [
                        {
                            "data": [
                                {
                                    "text": "dummy_1",
                                    "entity": "dummy_entity_1",
                                    "slot_name": "dummy_slot_name"
                                },
                                {
                                    "text": " dummy_2",
                                    "entity": "dummy_entity_2",
                                    "slot_name": "other_dummy_slot_name"
                                }
                            ]
                        }
                    ]
                }
            },
            "entities": {
                "dummy_entity_1": {
                    "use_synonyms": True,
                    "automatically_extensible": False,
                    "data": [
                        {
                            "value": "dummy1",
                            "synonyms": [
                                "dummy1",
                                "dummy1_bis"
                            ]
                        },
                        {
                            "value": "dummy2",
                            "synonyms": [
                                "dummy2",
                                "dummy2_bis"
                            ]
                        }
                    ]
                },
                "dummy_entity_2": {
                    "use_synonyms": False,
                    "automatically_extensible": True,
                    "data": [
                        {
                            "value": "dummy2",
                            "synonyms": [
                                "dummy2"
                            ]
                        }
                    ]
                }
            },
            "language": "en"
        }

        text = "dummy_3 dummy_4"
        mocked_crf_intent = intent_classification_result("dummy_intent_1", 1.0)
        mocked_crf_slots = [unresolved_slot(match_range=(0, 7),
                                            value="dummy_3",
                                            entity="dummy_entity_1",
                                            slot_name="dummy_slot_name"),
                            unresolved_slot(match_range=(8, 15),
                                            value="dummy_4",
                                            entity="dummy_entity_2",
                                            slot_name="other_dummy_slot_name")]

        mocked_regex_parse.return_value = empty_result(text)
        mocked_crf_parse.return_value = parsing_result(
            text, mocked_crf_intent, mocked_crf_slots)

        engine = SnipsNLUEngine()

        # When
        engine = engine.fit(dataset)
        result = engine.parse(text)

        # Then
        expected_slot = custom_slot(unresolved_slot(
            match_range=(8, 15), value="dummy_4", entity="dummy_entity_2",
            slot_name="other_dummy_slot_name"))
        expected_result = parsing_result(text, intent=mocked_crf_intent,
                                         slots=[expected_slot])
        self.assertEqual(expected_result, result)