Example #1
0
    def test_should_parse_intent(self):
        # Given
        dataset_stream = io.StringIO(
            """
---
type: intent
name: intent1
utterances:
  - foo bar baz

---
type: intent
name: intent2
utterances:
  - foo bar ban""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser = LookupIntentParser().fit(dataset)
        text = "foo bar ban"

        # When
        parsing = parser.parse(text)

        # Then
        probability = 1.0
        expected_intent = intent_classification_result(
            intent_name="intent2", probability=probability)

        self.assertEqual(expected_intent, parsing[RES_INTENT])
Example #2
0
    def test_should_be_serializable_into_bytearray(self):
        # Given
        dataset_stream = io.StringIO(
            """
---
type: intent
name: MakeTea
utterances:
- make me [number_of_cups:snips/number](one) cup of tea
- i want [number_of_cups] cups of tea please
- can you prepare [number_of_cups] cup of tea ?

---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](two) cups of coffee
- brew [number_of_cups] cups of coffee
- can you prepare [number_of_cups] cup of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        shared = self.get_shared_data(dataset)
        intent_parser = LookupIntentParser(**shared).fit(dataset)

        # When
        intent_parser_bytes = intent_parser.to_byte_array()
        loaded_intent_parser = LookupIntentParser.from_byte_array(
            intent_parser_bytes, **shared)
        result = loaded_intent_parser.parse("make me two cups of coffee")

        # Then
        self.assertEqual("MakeCoffee", result[RES_INTENT][RES_INTENT_NAME])
Example #3
0
    def test_should_be_serializable_before_fitting(self):
        # Given
        config = LookupIntentParserConfig(ignore_stop_words=True)
        parser = LookupIntentParser(config=config)

        # When
        parser.persist(self.tmp_file_path)

        # Then
        expected_dict = {
            "config": {
                "unit_name": "lookup_intent_parser",
                "ignore_stop_words": True,
            },
            "language_code": None,
            "intents_names": [],
            "map": None,
            "slots_names": [],
            "entity_scopes": None,
            "stop_words_whitelist": None
        }

        metadata = {"unit_name": "lookup_intent_parser"}
        self.assertJsonContent(self.tmp_file_path / "metadata.json", metadata)
        self.assertJsonContent(
            self.tmp_file_path / "intent_parser.json", expected_dict)
Example #4
0
    def test_should_parse_slightly_ambiguous_utterances(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: intent_1
utterances:
  - call tomorrow

---
type: intent
name: intent_2
utterances:
  - call [time:snips/datetime](today)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser = LookupIntentParser().fit(dataset)
        text = "call tomorrow"

        # When
        res = parser.parse(text)

        # Then
        expected_intent = intent_classification_result(
            intent_name="intent_1", probability=2. / 3.)
        expected_result = parsing_result(text, expected_intent, [])
        self.assertEqual(expected_result, res)
Example #5
0
    def test_should_parse_intent_with_ambivalent_words(self):
        # Given
        slots_dataset_stream = io.StringIO("""
---
type: intent
name: give_flower
utterances:
  - give a rose to [name](emily)
  - give a daisy to [name](tom)
  - give a tulip to [name](daisy)
  """)
        dataset = Dataset.from_yaml_files("en",
                                          [slots_dataset_stream]).json
        parser = LookupIntentParser().fit(dataset)
        text = "give a daisy to emily"

        # When
        parsing = parser.parse(text)

        # Then
        expected_intent = intent_classification_result(
            intent_name="give_flower", probability=1.0)
        expected_slots = [
            {
                "entity": "name",
                "range": {"end": 21, "start": 16},
                "slotName": "name",
                "value": "emily"
            }
        ]

        self.assertDictEqual(expected_intent, parsing[RES_INTENT])
        self.assertListEqual(expected_slots, parsing[RES_SLOTS])
Example #6
0
    def test_should_ignore_very_ambiguous_utterances(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: intent_1
utterances:
  - "[event_type](meeting) tomorrow"

---
type: intent
name: intent_2
utterances:
  - call [time:snips/datetime](today)

---
type: entity
name: event_type
values:
  - call
  - diner""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser = LookupIntentParser().fit(dataset)
        text = "call tomorrow"

        # When
        res = parser.parse(text)

        # Then
        self.assertEqual(empty_result(text, 1.0), res)
Example #7
0
    def test_should_not_parse_when_not_fitted(self):
        # Given
        parser = LookupIntentParser()

        # When / Then
        self.assertFalse(parser.fitted)
        with self.assertRaises(NotTrained):
            parser.parse("foobar")
Example #8
0
    def test_should_be_serializable(self, mock_get_stop_words):
        # Given
        dataset_stream = io.StringIO(
            """
---
type: intent
name: searchFlight
slots:
  - name: origin
    entity: city
  - name: destination
    entity: city
utterances:
  - find me a flight from [origin](Paris) to [destination](New York)
  - I need a flight to [destination](Berlin)

---
type: entity
name: city
values:
  - london
  - [new york, big apple]
  - [paris, city of lights]""")

        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

        mock_get_stop_words.return_value = {"a", "me"}
        config = LookupIntentParserConfig(ignore_stop_words=True)
        parser = LookupIntentParser(config=config).fit(dataset)

        # When
        parser.persist(self.tmp_file_path)

        # Then
        expected_dict = {
            "config": {
                "unit_name": "lookup_intent_parser",
                "ignore_stop_words": True,
            },
            "intents_names": ["searchFlight"],
            "language_code": "en",
            "map": {
                "-2020846245": [0, [0, 1]],
                "-1558674456": [0, [1]],
            },
            "slots_names": ["origin", "destination"],
            "entity_scopes": [
                {
                    "entity_scope": {"builtin": [], "custom": ["city"]},
                    "intent_group": ["searchFlight"]
                }
            ],
            "stop_words_whitelist": dict()
        }
        metadata = {"unit_name": "lookup_intent_parser"}
        self.assertJsonContent(self.tmp_file_path / "metadata.json", metadata)
        self.assertJsonContent(
            self.tmp_file_path / "intent_parser.json", expected_dict)
Example #9
0
    def test_should_get_intents(self):
        # Given
        dataset_stream = io.StringIO(
            """
---
type: intent
name: greeting1
utterances:
  - Hello John

---
type: intent
name: greeting2
utterances:
  - Hello [name](John)

---
type: intent
name: greeting3
utterances:
  - "[greeting](Hello) [name](John)"
        """)

        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser = LookupIntentParser().fit(dataset)

        # When
        top_intents = parser.get_intents("Hello John")

        # Then
        expected_intents = [
            {
                RES_INTENT_NAME: "greeting1",
                RES_PROBA: 1. / (1. + 1. / 2. + 1. / 3.)
            },
            {
                RES_INTENT_NAME: "greeting2",
                RES_PROBA: (1. / 2.) / (1. + 1. / 2. + 1. / 3.)
            },
            {
                RES_INTENT_NAME: "greeting3",
                RES_PROBA: (1. / 3.) / (1. + 1. / 2. + 1. / 3.)
            },
            {
                RES_INTENT_NAME: None,
                RES_PROBA: 0.0
            },
        ]

        self.assertListEqual(expected_intents, top_intents)
Example #10
0
    def test_should_get_no_slots_with_none_intent(self):
        # Given
        slots_dataset_stream = io.StringIO(
            """
---
type: intent
name: greeting
utterances:
  - Hello [name](John)""")
        dataset = Dataset.from_yaml_files("en", [slots_dataset_stream]).json
        parser = LookupIntentParser().fit(dataset)

        # When
        slots = parser.get_slots("Hello John", None)

        # Then
        self.assertListEqual([], slots)
Example #11
0
    def test_should_parse_stop_words_slots(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: search
utterances:
  - search
  - search [search_object](this)
  - search [search_object](a cat)

---
type: entity
name: search_object
values:
  - [this thing, that]
  """)

        resources = deepcopy(self.get_resources("en"))
        resources[STOP_WORDS] = {"a", "this", "that"}
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser_config = LookupIntentParserConfig(ignore_stop_words=True)
        parser = LookupIntentParser(config=parser_config, resources=resources)
        parser.fit(dataset)

        # When
        res_1 = parser.parse("search this")
        res_2 = parser.parse("search that")

        # Then
        expected_intent = intent_classification_result(
            intent_name="search", probability=1.0)
        expected_slots_1 = [
            unresolved_slot(match_range=(7, 11), value="this",
                            entity="search_object",
                            slot_name="search_object")
        ]
        expected_slots_2 = [
            unresolved_slot(match_range=(7, 11), value="that",
                            entity="search_object",
                            slot_name="search_object")
        ]
        self.assertEqual(expected_intent, res_1[RES_INTENT])
        self.assertEqual(expected_intent, res_2[RES_INTENT])
        self.assertListEqual(expected_slots_1, res_1[RES_SLOTS])
        self.assertListEqual(expected_slots_2, res_2[RES_SLOTS])
Example #12
0
    def test_should_parse_intent_with_stop_words(self, mock_get_stop_words):
        # Given
        mock_get_stop_words.return_value = {"a", "hey"}
        dataset = self.slots_dataset
        config = LookupIntentParserConfig(ignore_stop_words=True)
        parser = LookupIntentParser(config).fit(dataset)
        text = "Hey this is dummy_a query with another dummy_c at 10p.m. " \
               "or at 12p.m."

        # When
        parsing = parser.parse(text)

        # Then
        probability = 1.0
        expected_intent = intent_classification_result(
            intent_name="dummy_intent_1", probability=probability)

        self.assertEqual(expected_intent, parsing[RES_INTENT])
Example #13
0
    def test_should_fit_and_parse_with_non_ascii_tags(self):
        # Given
        inputs = ["string%s" % i for i in range(10)]
        utterances = [
            {
                DATA: [
                    {
                        TEXT: string,
                        ENTITY: "non_ascìi_entïty",
                        SLOT_NAME: "non_ascìi_slöt",
                    }
                ]
            }
            for string in inputs
        ]

        # When
        naughty_dataset = {
            "intents": {"naughty_intent": {"utterances": utterances}},
            "entities": {
                "non_ascìi_entïty": {
                    "use_synonyms": False,
                    "automatically_extensible": True,
                    "matching_strictness": 1.0,
                    "data": [],
                }
            },
            "language": "en",
        }

        # Then
        with self.fail_if_exception("Exception raised"):
            parser = LookupIntentParser().fit(naughty_dataset)
            parsing = parser.parse("string0")

            expected_slot = {
                "entity": "non_ascìi_entïty",
                "range": {"start": 0, "end": 7},
                "slotName": "non_ascìi_slöt",
                "value": "string0",
            }
            intent_name = parsing[RES_INTENT][RES_INTENT_NAME]
            self.assertEqual("naughty_intent", intent_name)
            self.assertListEqual([expected_slot], parsing[RES_SLOTS])
Example #14
0
    def test_should_parse_intent_after_deserialization(self):
        # Given
        dataset = self.slots_dataset
        shared = self.get_shared_data(dataset)
        parser = LookupIntentParser(**shared).fit(dataset)
        parser.persist(self.tmp_file_path)
        deserialized_parser = LookupIntentParser.from_path(
            self.tmp_file_path, **shared)
        text = "this is a dummy_a query with another dummy_c at 10p.m. or " \
               "at 12p.m."

        # When
        parsing = deserialized_parser.parse(text)

        # Then
        probability = 1.0
        expected_intent = intent_classification_result(
            intent_name="dummy_intent_1", probability=probability)
        self.assertEqual(expected_intent, parsing[RES_INTENT])
Example #15
0
    def test_get_slots_should_raise_with_unknown_intent(self):
        # Given
        slots_dataset_stream = io.StringIO(
            """
---
type: intent
name: greeting1
utterances:
  - Hello [name1](John)

---
type: intent
name: goodbye
utterances:
  - Goodbye [name](Eric)""")
        dataset = Dataset.from_yaml_files("en", [slots_dataset_stream]).json
        parser = LookupIntentParser().fit(dataset)

        # When / Then
        with self.assertRaises(IntentNotFoundError):
            parser.get_slots("Hello John", "greeting3")
Example #16
0
    def test_should_parse_intent_with_duplicated_slot_names(self):
        # Given
        slots_dataset_stream = io.StringIO("""
---
type: intent
name: math_operation
slots:
  - name: number
    entity: snips/number
utterances:
  - what is [number](one) plus [number](one)""")
        dataset = Dataset.from_yaml_files("en", [slots_dataset_stream]).json
        parser = LookupIntentParser().fit(dataset)
        text = "what is one plus one"

        # When
        parsing = parser.parse(text)

        # Then
        probability = 1.0
        expected_intent = intent_classification_result(
            intent_name="math_operation", probability=probability)
        expected_slots = [
            {
                "entity": "snips/number",
                "range": {"end": 11, "start": 8},
                "slotName": "number",
                "value": "one"
            },
            {
                "entity": "snips/number",
                "range": {"end": 20, "start": 17},
                "slotName": "number",
                "value": "one"
            }
        ]

        self.assertDictEqual(expected_intent, parsing[RES_INTENT])
        self.assertListEqual(expected_slots, parsing[RES_SLOTS])
Example #17
0
    def test_should_parse_naughty_strings(self):
        # Given
        dataset_stream = io.StringIO(
            """
---
type: intent
name: my_intent
utterances:
- this is [slot1:entity1](my first entity)
- this is [slot2:entity2](second_entity)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        naughty_strings_path = TEST_PATH / "resources" / "naughty_strings.txt"
        with naughty_strings_path.open(encoding="utf8") as f:
            naughty_strings = [line.strip("\n") for line in f.readlines()]

        # When
        parser = LookupIntentParser().fit(dataset)

        # Then
        for s in naughty_strings:
            with self.fail_if_exception("Exception raised"):
                parser.parse(s)
Example #18
0
    def test_should_get_slots(self):
        # Given
        slots_dataset_stream = io.StringIO(
            """
---
type: intent
name: greeting1
utterances:
  - Hello [name1](John)

---
type: intent
name: greeting2
utterances:
  - Hello [name2](Thomas)

---
type: intent
name: goodbye
utterances:
  - Goodbye [name](Eric)""")
        dataset = Dataset.from_yaml_files("en", [slots_dataset_stream]).json
        parser = LookupIntentParser().fit(dataset)

        # When
        slots_greeting1 = parser.get_slots("Hello John", "greeting1")
        slots_greeting2 = parser.get_slots("Hello Thomas", "greeting2")
        slots_goodbye = parser.get_slots("Goodbye Eric", "greeting1")

        # Then
        self.assertEqual(1, len(slots_greeting1))
        self.assertEqual(1, len(slots_greeting2))
        self.assertEqual(0, len(slots_goodbye))

        self.assertEqual("John", slots_greeting1[0][RES_VALUE])
        self.assertEqual("name1", slots_greeting1[0][RES_ENTITY])
        self.assertEqual("Thomas", slots_greeting2[0][RES_VALUE])
        self.assertEqual("name2", slots_greeting2[0][RES_ENTITY])
Example #19
0
    def test_should_be_deserializable_before_fitting(self):
        # Given
        parser_dict = {
            "config": {},
            "language_code": None,
            "map": None,
            "slots_names": [],
            "intents_names": [],
            "entity_scopes": None
        }
        self.tmp_file_path.mkdir()
        metadata = {"unit_name": "dict_deterministic_intent_parser"}
        self.writeJsonContent(
            self.tmp_file_path / "intent_parser.json", parser_dict)
        self.writeJsonContent(self.tmp_file_path / "metadata.json", metadata)

        # When
        parser = LookupIntentParser.from_path(self.tmp_file_path)

        # Then
        config = LookupIntentParserConfig()
        expected_parser = LookupIntentParser(config=config)
        self.assertEqual(parser.to_dict(), expected_parser.to_dict())
Example #20
0
    def test_should_parse_intent_with_filter(self):
        # Given
        dataset_stream = io.StringIO(
            """
---
type: intent
name: intent1
utterances:
  - foo bar baz

---
type: intent
name: intent2
utterances:
  - foo bar ban""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser = LookupIntentParser().fit(dataset)
        text = "foo bar ban"

        # When
        parsing = parser.parse(text, intents=["intent1"])

        # Then
        self.assertEqual(empty_result(text, 1.0), parsing)
Example #21
0
    def test_should_ignore_completely_ambiguous_utterances(self):
        # Given
        dataset_stream = io.StringIO(
            """
---
type: intent
name: dummy_intent_1
utterances:
  - Hello world

---
type: intent
name: dummy_intent_2
utterances:
  - Hello world""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser = LookupIntentParser().fit(dataset)
        text = "Hello world"

        # When
        res = parser.parse(text)

        # Then
        self.assertEqual(empty_result(text, 1.0), res)
Example #22
0
    def test_should_fit_with_naughty_strings_no_tags(self):
        # Given
        naughty_strings_path = TEST_PATH / "resources" / "naughty_strings.txt"
        with naughty_strings_path.open(encoding="utf8") as f:
            naughty_strings = [line.strip("\n") for line in f.readlines()]

        utterances = [
            {DATA: [{TEXT: naughty_string}]}
            for naughty_string in naughty_strings
        ]

        # When
        naughty_dataset = {
            "intents": {"naughty_intent": {"utterances": utterances}},
            "entities": dict(),
            "language": "en",
        }

        # Then
        with self.fail_if_exception("Exception raised"):
            LookupIntentParser().fit(naughty_dataset)
Example #23
0
    def test_should_be_deserializable(self):
        # Given
        parser_dict = {
            "config": {
                "unit_name": "lookup_intent_parser",
                "ignore_stop_words": True
            },
            "language_code": "en",
            "map": {
                hash_str("make coffee"): [0, []],
                hash_str("prepare % snipsnumber % coffees"): [0, [0]],
                hash_str("% snipsnumber % teas at % snipstemperature %"):
                    [1, [0, 1]],
            },
            "slots_names": ["nb_cups", "tea_temperature"],
            "intents_names": ["MakeCoffee", "MakeTea"],
            "entity_scopes": [
                {
                    "entity_scope": {
                        "builtin": ["snips/number"],
                        "custom": [],
                    },
                    "intent_group": ["MakeCoffee"]
                },
                {
                    "entity_scope": {
                        "builtin": ["snips/number", "snips/temperature"],
                        "custom": [],
                    },
                    "intent_group": ["MakeTea"]
                },
            ],
            "stop_words_whitelist": dict()
        }
        self.tmp_file_path.mkdir()
        metadata = {"unit_name": "lookup_intent_parser"}
        self.writeJsonContent(
            self.tmp_file_path / "intent_parser.json", parser_dict)
        self.writeJsonContent(self.tmp_file_path / "metadata.json", metadata)
        resources = self.get_resources("en")
        builtin_entity_parser = BuiltinEntityParser.build(language="en")
        custom_entity_parser = EntityParserMock()

        # When
        parser = LookupIntentParser.from_path(
            self.tmp_file_path, custom_entity_parser=custom_entity_parser,
            builtin_entity_parser=builtin_entity_parser,
            resources=resources)
        res_make_coffee = parser.parse("make me a coffee")
        res_make_tea = parser.parse("two teas at 90°C please")

        # Then
        expected_result_coffee = parsing_result(
            input="make me a coffee",
            intent=intent_classification_result("MakeCoffee", 1.0),
            slots=[])
        expected_result_tea = parsing_result(
            input="two teas at 90°C please",
            intent=intent_classification_result("MakeTea", 1.0),
            slots=[
                {
                    "entity": "snips/number",
                    "range": {"end": 3, "start": 0},
                    "slotName": "nb_cups",
                    "value": "two"
                },
                {
                    "entity": "snips/temperature",
                    "range": {"end": 16, "start": 12},
                    "slotName": "tea_temperature",
                    "value": "90°C"
                }
            ])
        self.assertEqual(expected_result_coffee, res_make_coffee)
        self.assertEqual(expected_result_tea, res_make_tea)
Example #24
0
    def test_should_parse_slots(self):
        # Given
        dataset = self.slots_dataset
        parser = LookupIntentParser().fit(dataset)
        texts = [
            (
                "this is a dummy a query with another dummy_c at 10p.m. or at"
                " 12p.m.",
                [
                    unresolved_slot(
                        match_range=(10, 17),
                        value="dummy a",
                        entity="dummy_entity_1",
                        slot_name="dummy_slot_name",
                    ),
                    unresolved_slot(
                        match_range=(37, 44),
                        value="dummy_c",
                        entity="dummy_entity_2",
                        slot_name="dummy_slot_name2",
                    ),
                    unresolved_slot(
                        match_range=(45, 54),
                        value="at 10p.m.",
                        entity="snips/datetime",
                        slot_name="startTime",
                    ),
                    unresolved_slot(
                        match_range=(58, 67),
                        value="at 12p.m.",
                        entity="snips/datetime",
                        slot_name="startTime",
                    ),
                ],
            ),
            (
                "this, is,, a, dummy a query with another dummy_c at 10pm or "
                "at 12p.m.",
                [
                    unresolved_slot(
                        match_range=(14, 21),
                        value="dummy a",
                        entity="dummy_entity_1",
                        slot_name="dummy_slot_name",
                    ),
                    unresolved_slot(
                        match_range=(41, 48),
                        value="dummy_c",
                        entity="dummy_entity_2",
                        slot_name="dummy_slot_name2",
                    ),
                    unresolved_slot(
                        match_range=(49, 56),
                        value="at 10pm",
                        entity="snips/datetime",
                        slot_name="startTime",
                    ),
                    unresolved_slot(
                        match_range=(60, 69),
                        value="at 12p.m.",
                        entity="snips/datetime",
                        slot_name="startTime",
                    ),
                ],
            ),
            (
                "this is a dummy b",
                [
                    unresolved_slot(
                        match_range=(10, 17),
                        value="dummy b",
                        entity="dummy_entity_1",
                        slot_name="dummy_slot_name",
                    )
                ],
            ),
            (
                " this is a dummy b ",
                [
                    unresolved_slot(
                        match_range=(11, 18),
                        value="dummy b",
                        entity="dummy_entity_1",
                        slot_name="dummy_slot_name",
                    )
                ],
            ),
            (
                " at 8am ’ there is a dummy  a",
                [
                    unresolved_slot(
                        match_range=(1, 7),
                        value="at 8am",
                        entity="snips/datetime",
                        slot_name="startTime",
                    ),
                    unresolved_slot(
                        match_range=(21, 29),
                        value="dummy  a",
                        entity="dummy_entity_1",
                        slot_name="dummy_slot_name",
                    ),
                ],
            ),
        ]

        for text, expected_slots in texts:
            # When
            parsing = parser.parse(text)

            # Then
            self.assertListEqual(expected_slots, parsing[RES_SLOTS])
Example #25
0
    def test_should_parse_top_intents(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: intent1
utterances:
  - meeting [time:snips/datetime](today)

---
type: intent
name: intent2
utterances:
  - meeting tomorrow

---
type: intent
name: intent3
utterances:
  - "[event_type](call) [time:snips/datetime](at 9pm)"

---
type: entity
name: event_type
values:
  - meeting
  - feedback session""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser = LookupIntentParser().fit(dataset)
        text = "meeting tomorrow"

        # When
        results = parser.parse(text, top_n=3)

        # Then
        time_slot = {
            "entity": "snips/datetime",
            "range": {"end": 16, "start": 8},
            "slotName": "time",
            "value": "tomorrow"
        }
        event_slot = {
            "entity": "event_type",
            "range": {"end": 7, "start": 0},
            "slotName": "event_type",
            "value": "meeting"
        }
        weight_intent_1 = 1. / 2.
        weight_intent_2 = 1.
        weight_intent_3 = 1. / 3.
        total_weight = weight_intent_1 + weight_intent_2 + weight_intent_3
        proba_intent2 = weight_intent_2 / total_weight
        proba_intent1 = weight_intent_1 / total_weight
        proba_intent3 = weight_intent_3 / total_weight
        expected_results = [
            extraction_result(
                intent_classification_result(
                    intent_name="intent2", probability=proba_intent2),
                slots=[]),
            extraction_result(
                intent_classification_result(
                    intent_name="intent1", probability=proba_intent1),
                slots=[time_slot]),
            extraction_result(
                intent_classification_result(
                    intent_name="intent3", probability=proba_intent3),
                slots=[event_slot, time_slot])
        ]
        self.assertEqual(expected_results, results)
Example #26
0
    def test_should_parse_slots_after_deserialization(self):
        # Given
        dataset = self.slots_dataset
        shared = self.get_shared_data(dataset)
        parser = LookupIntentParser(**shared).fit(dataset)
        parser.persist(self.tmp_file_path)
        deserialized_parser = LookupIntentParser.from_path(
            self.tmp_file_path, **shared)

        texts = [
            (
                "this is a dummy a query with another dummy_c at 10p.m. or at"
                " 12p.m.",
                [
                    unresolved_slot(
                        match_range=(10, 17),
                        value="dummy a",
                        entity="dummy_entity_1",
                        slot_name="dummy_slot_name",
                    ),
                    unresolved_slot(
                        match_range=(37, 44),
                        value="dummy_c",
                        entity="dummy_entity_2",
                        slot_name="dummy_slot_name2",
                    ),
                    unresolved_slot(
                        match_range=(45, 54),
                        value="at 10p.m.",
                        entity="snips/datetime",
                        slot_name="startTime",
                    ),
                    unresolved_slot(
                        match_range=(58, 67),
                        value="at 12p.m.",
                        entity="snips/datetime",
                        slot_name="startTime",
                    ),
                ],
            ),
            (
                "this, is,, a, dummy a query with another dummy_c at 10pm or "
                "at 12p.m.",
                [
                    unresolved_slot(
                        match_range=(14, 21),
                        value="dummy a",
                        entity="dummy_entity_1",
                        slot_name="dummy_slot_name",
                    ),
                    unresolved_slot(
                        match_range=(41, 48),
                        value="dummy_c",
                        entity="dummy_entity_2",
                        slot_name="dummy_slot_name2",
                    ),
                    unresolved_slot(
                        match_range=(49, 56),
                        value="at 10pm",
                        entity="snips/datetime",
                        slot_name="startTime",
                    ),
                    unresolved_slot(
                        match_range=(60, 69),
                        value="at 12p.m.",
                        entity="snips/datetime",
                        slot_name="startTime",
                    ),
                ],
            ),
            (
                "this is a dummy b",
                [
                    unresolved_slot(
                        match_range=(10, 17),
                        value="dummy b",
                        entity="dummy_entity_1",
                        slot_name="dummy_slot_name",
                    )
                ],
            ),
            (
                " this is a dummy b ",
                [
                    unresolved_slot(
                        match_range=(11, 18),
                        value="dummy b",
                        entity="dummy_entity_1",
                        slot_name="dummy_slot_name",
                    )
                ],
            ),
        ]

        for text, expected_slots in texts:
            # When
            parsing = deserialized_parser.parse(text)

            # Then
            self.assertListEqual(expected_slots, parsing[RES_SLOTS])