Esempio n. 1
0
    def test_should_fit_and_parse_empty_intent(self):
        # Given
        dataset = {
            "intents": {
                "dummy_intent": {
                    "utterances": [
                        {
                            "data": [
                                {
                                    "text": " "
                                }
                            ]
                        }
                    ]
                }
            },
            "language": "en",
            "entities": dict()
        }

        slot_filler = CRFSlotFiller(**self.get_shared_data(dataset))

        # When
        slot_filler.fit(dataset, "dummy_intent")
        slot_filler.get_slots("ya")
Esempio n. 2
0
    def test_should_not_get_slots_when_not_fitted(self):
        # Given
        slot_filler = CRFSlotFiller()

        # When / Then
        self.assertFalse(slot_filler.fitted)
        with self.assertRaises(NotTrained):
            slot_filler.get_slots("foobar")
Esempio n. 3
0
    def test_should_parse_naughty_strings(self):
        # Given
        dataset = validate_and_format_dataset(SAMPLE_DATASET)
        naughty_strings_path = TEST_PATH / "resources" / "naughty_strings.txt"
        with naughty_strings_path.open(encoding='utf8') as f:
            naughty_strings = [line.strip("\n") for line in f.readlines()]

        # When
        slot_filler = CRFSlotFiller().fit(dataset, "dummy_intent_1")

        # Then
        for s in naughty_strings:
            with self.fail_if_exception("Naughty string crashes"):
                slot_filler.get_slots(s)
Esempio n. 4
0
    def test_should_get_slots(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me [number_of_cups:snips/number](five) cups of tea
- please I want [number_of_cups](two) cups of tea""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        shared = self.get_shared_data(dataset)
        shared[RANDOM_STATE] = 42
        slot_filler = CRFSlotFiller(**shared)
        intent = "MakeTea"
        slot_filler.fit(dataset, intent)

        # When
        slots = slot_filler.get_slots("make me two cups of tea")

        # Then
        expected_slots = [
            unresolved_slot(match_range={
                START: 8,
                END: 11
            },
                            value='two',
                            entity='snips/number',
                            slot_name='number_of_cups')
        ]
        self.assertListEqual(slots, expected_slots)
Esempio n. 5
0
    def test_should_not_use_crf_when_dataset_with_no_slots(self):
        # Given
        dataset = {
            "language": "en",
            "intents": {
                "intent1": {
                    "utterances": [{
                        "data": [{
                            "text":
                            "This is an utterance without "
                            "slots"
                        }]
                    }]
                }
            },
            "entities": {}
        }
        slot_filler = CRFSlotFiller(**self.get_shared_data(dataset))
        mock_compute_features = MagicMock()
        slot_filler.compute_features = mock_compute_features

        # When
        slot_filler.fit(dataset, "intent1")
        slots = slot_filler.get_slots("This is an utterance without slots")

        # Then
        mock_compute_features.assert_not_called()
        self.assertListEqual([], slots)
Esempio n. 6
0
    def test_should_get_builtin_slots(self):
        # Given
        dataset = validate_and_format_dataset(WEATHER_DATASET)
        config = CRFSlotFillerConfig(random_seed=42)
        intent = "SearchWeatherForecast"
        slot_filler = CRFSlotFiller(config)
        slot_filler.fit(dataset, intent)

        # When
        slots = slot_filler.get_slots("Give me the weather at 9p.m. in Paris")

        # Then
        expected_slots = [
            unresolved_slot(match_range={
                START: 20,
                END: 28
            },
                            value='at 9p.m.',
                            entity='snips/datetime',
                            slot_name='datetime'),
            unresolved_slot(match_range={
                START: 32,
                END: 37
            },
                            value='Paris',
                            entity='weather_location',
                            slot_name='location')
        ]
        self.assertListEqual(expected_slots, slots)
Esempio n. 7
0
    def test_should_get_sub_builtin_slots(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: PlanBreak
utterances:
- 'I want to leave from [start:snips/datetime](tomorrow) until 
  [end:snips/datetime](next thursday)'
- find me something from [start](9am) to [end](12pm)
- I need a break from [start](2pm) until [end](4pm)
- Can you suggest something from [start](april 4th) until [end](april 6th) ?
- Book me a trip from [start](this friday) to [end](next tuesday)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        config = CRFSlotFillerConfig(random_seed=42)
        intent = "PlanBreak"
        slot_filler = CRFSlotFiller(config,
                                    **self.get_shared_data(dataset))
        slot_filler.fit(dataset, intent)

        # When
        slots = slot_filler.get_slots("Find me a plan from 5pm to 6pm")

        # Then
        expected_slots = [
            unresolved_slot(match_range={START: 20, END: 23},
                            value="5pm",
                            entity="snips/datetime",
                            slot_name="start"),
            unresolved_slot(match_range={START: 27, END: 30},
                            value="6pm",
                            entity="snips/datetime",
                            slot_name="end")
        ]
        self.assertListEqual(expected_slots, slots)
Esempio n. 8
0
    def test_should_get_builtin_slots(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: GetWeather
utterances:
- what is the weather [datetime:snips/datetime](at 9pm)
- what's the weather in [location:weather_location](berlin)
- What's the weather in [location](tokyo) [datetime](this weekend)?
- Can you tell me the weather [datetime] please ?
- what is the weather forecast [datetime] in [location](paris)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        config = CRFSlotFillerConfig(random_seed=42)
        intent = "GetWeather"
        slot_filler = CRFSlotFiller(config, **self.get_shared_data(dataset))
        slot_filler.fit(dataset, intent)

        # When
        slots = slot_filler.get_slots("Give me the weather at 9pm in Paris")

        # Then
        expected_slots = [
            unresolved_slot(match_range={START: 20, END: 26},
                            value='at 9pm',
                            entity='snips/datetime',
                            slot_name='datetime'),
            unresolved_slot(match_range={START: 30, END: 35},
                            value='Paris',
                            entity='weather_location',
                            slot_name='location')
        ]
        self.assertListEqual(expected_slots, slots)
Esempio n. 9
0
    def test_should_parse_naughty_strings(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: my_intent
utterances:
- this is [entity1](my first entity)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        naughty_strings_path = TEST_PATH / "resources" / "naughty_strings.txt"
        with naughty_strings_path.open(encoding='utf8') as f:
            naughty_strings = [line.strip("\n") for line in f.readlines()]

        # When
        shared = self.get_shared_data(dataset)
        slot_filler = CRFSlotFiller(**shared).fit(dataset, "my_intent")

        # Then
        for s in naughty_strings:
            with self.fail_if_exception("Naughty string crashes"):
                slot_filler.get_slots(s)
Esempio n. 10
0
    def test_should_fit_and_parse_with_non_ascii_tags(self):
        # Given
        inputs = ("string%s" % i for i in range(10))
        utterances = [{
            DATA: [{
                TEXT: string,
                ENTITY: "non_ascìi_entïty",
                SLOT_NAME: "non_ascìi_slöt"
            }]
        } for string in inputs]

        # When
        naughty_dataset = {
            "intents": {
                "naughty_intent": {
                    "utterances": utterances
                }
            },
            "entities": {
                "non_ascìi_entïty": {
                    "use_synonyms": False,
                    "automatically_extensible": True,
                    "data": []
                }
            },
            "language": "en",
            "snips_nlu_version": "0.0.1"
        }

        naughty_dataset = validate_and_format_dataset(naughty_dataset)

        # Then
        with self.fail_if_exception("Naughty string make NLU crash"):
            slot_filler = CRFSlotFiller()
            slot_filler.fit(naughty_dataset, "naughty_intent")
            slots = slot_filler.get_slots("string0")
            expected_slot = {
                "entity": "non_ascìi_entïty",
                "range": {
                    "start": 0,
                    "end": 7
                },
                "slotName": u"non_ascìi_slöt",
                "value": u"string0"
            }
            self.assertListEqual([expected_slot], slots)
Esempio n. 11
0
    def test_should_get_slots(self):
        # Given
        dataset = validate_and_format_dataset(BEVERAGE_DATASET)
        config = CRFSlotFillerConfig(random_seed=42)
        intent = "MakeTea"
        slot_filler = CRFSlotFiller(config)
        slot_filler.fit(dataset, intent)

        # When
        slots = slot_filler.get_slots("make me two cups of tea")

        # Then
        expected_slots = [
            unresolved_slot(match_range={START: 8, END: 11},
                            value='two',
                            entity='snips/number',
                            slot_name='number_of_cups')]
        self.assertListEqual(slots, expected_slots)