def test_training_should_be_reproducible(self):
        # Given
        random_state = 42
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups

---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

        # When
        parser1 = DeterministicIntentParser(random_state=random_state)
        parser1.fit(dataset)

        parser2 = DeterministicIntentParser(random_state=random_state)
        parser2.fit(dataset)

        # Then
        with temp_dir() as tmp_dir:
            dir_parser1 = tmp_dir / "parser1"
            dir_parser2 = tmp_dir / "parser2"
            parser1.persist(dir_parser1)
            parser2.persist(dir_parser2)
            hash1 = dirhash(str(dir_parser1), 'sha256')
            hash2 = dirhash(str(dir_parser2), 'sha256')
            self.assertEqual(hash1, hash2)
    def test_should_parse_stop_words_slots(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: search
utterances:
  - search
  - search [search_object](this)
  - search [search_object](a cat)
  
---
type: entity
name: search_object
values:
  - [this thing, that]
  """)

        resources = self.get_resources("en")
        resources[STOP_WORDS] = {"a", "this", "that"}
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser_config = DeterministicIntentParserConfig(ignore_stop_words=True)
        parser = DeterministicIntentParser(config=parser_config,
                                           resources=resources)
        parser.fit(dataset)

        # When
        res_1 = parser.parse("search this")
        res_2 = parser.parse("search that")

        # Then
        expected_intent = intent_classification_result(intent_name="search",
                                                       probability=1.0)
        expected_slots_1 = [
            unresolved_slot(match_range=(7, 11),
                            value="this",
                            entity="search_object",
                            slot_name="search_object")
        ]
        expected_slots_2 = [
            unresolved_slot(match_range=(7, 11),
                            value="that",
                            entity="search_object",
                            slot_name="search_object")
        ]
        self.assertEqual(expected_intent, res_1[RES_INTENT])
        self.assertEqual(expected_intent, res_2[RES_INTENT])
        self.assertListEqual(expected_slots_1, res_1[RES_SLOTS])
        self.assertListEqual(expected_slots_2, res_2[RES_SLOTS])
Ejemplo n.º 3
0
    def test_should_fit_and_parse_with_non_ascii_tags(self):
        # Given
        inputs = ("string%s" % i for i in range(10))
        utterances = [{
            DATA: [{
                TEXT: string,
                ENTITY: "non_ascìi_entïty",
                SLOT_NAME: "non_ascìi_slöt"
            }]
        } for string in inputs]

        # When
        naughty_dataset = {
            "intents": {
                "naughty_intent": {
                    "utterances": utterances
                }
            },
            "entities": {
                "non_ascìi_entïty": {
                    "use_synonyms": False,
                    "automatically_extensible": True,
                    "matching_strictness": 1.0,
                    "data": []
                }
            },
            "language": "en",
        }

        naughty_dataset = validate_and_format_dataset(naughty_dataset)

        # Then
        with self.fail_if_exception("Exception raised"):
            parser = DeterministicIntentParser()
            parser.fit(naughty_dataset)
            parsing = parser.parse("string0")

            expected_slot = {
                'entity': 'non_ascìi_entïty',
                'range': {
                    "start": 0,
                    "end": 7
                },
                'slotName': u'non_ascìi_slöt',
                'value': u'string0'
            }
            intent_name = parsing[RES_INTENT][RES_INTENT_NAME]
            self.assertEqual("naughty_intent", intent_name)
            self.assertListEqual([expected_slot], parsing[RES_SLOTS])