def test_should_be_deserializable_when_fitted_without_slots(self):
        # Given
        dataset = {
            "language": "en",
            "intents": {
                "intent1": {
                    "utterances": [{
                        "data": [{
                            "text":
                            "This is an utterance without "
                            "slots"
                        }]
                    }]
                }
            },
            "entities": {}
        }

        shared = self.get_shared_data(dataset)
        slot_filler = CRFSlotFiller(**shared)
        slot_filler.fit(dataset, intent="intent1")
        slot_filler.persist(self.tmp_file_path)
        loaded_slot_filler = CRFSlotFiller.from_path(self.tmp_file_path,
                                                     **shared)

        # When
        slots = loaded_slot_filler.get_slots(
            "This is an utterance without slots")

        # Then
        self.assertListEqual([], slots)
Exemple #2
0
    def test_should_be_deserializable_before_fit(self):
        # Given
        features_factories = [
            {
                "factory_name": ShapeNgramFactory.name,
                "args": {"n": 1},
                "offsets": [0]
            },
            {
                "factory_name": IsDigitFactory.name,
                "args": {},
                "offsets": [-1, 0]
            }
        ]
        slot_filler_config = CRFSlotFillerConfig(
            feature_factory_configs=features_factories)
        slot_filler_dict = {
            "unit_name": "crf_slot_filler",
            "crf_model_file": None,
            "language_code": None,
            "intent": None,
            "slot_name_mapping": None,
            "config": slot_filler_config.to_dict()
        }
        metadata = {"unit_name": "crf_slot_filler"}
        self.tmp_file_path.mkdir()
        self.writeJsonContent(self.tmp_file_path / "metadata.json", metadata)
        self.writeJsonContent(self.tmp_file_path / "slot_filler.json",
                              slot_filler_dict)

        # When
        slot_filler = CRFSlotFiller.from_path(self.tmp_file_path)

        # Then
        expected_features_factories = [
            {
                "factory_name": ShapeNgramFactory.name,
                "args": {"n": 1},
                "offsets": [0]
            },
            {
                "factory_name": IsDigitFactory.name,
                "args": {},
                "offsets": [-1, 0]
            }
        ]
        expected_language = None
        expected_config = CRFSlotFillerConfig(
            feature_factory_configs=expected_features_factories)
        expected_intent = None
        expected_slot_name_mapping = None
        expected_crf_model = None

        self.assertEqual(slot_filler.crf_model, expected_crf_model)
        self.assertEqual(slot_filler.language, expected_language)
        self.assertEqual(slot_filler.intent, expected_intent)
        self.assertEqual(slot_filler.slot_name_mapping,
                         expected_slot_name_mapping)
        self.assertDictEqual(expected_config.to_dict(),
                             slot_filler.config.to_dict())
    def test_should_get_slots_after_deserialization(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me [number_of_cups:snips/number](one) cup of tea
- i want [number_of_cups] cups of tea please
- can you prepare [number_of_cups] cups of tea ?""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        intent = "MakeTea"
        shared = self.get_shared_data(dataset)
        shared[RANDOM_STATE] = 42
        slot_filler = CRFSlotFiller(**shared)
        slot_filler.fit(dataset, intent)
        slot_filler.persist(self.tmp_file_path)

        deserialized_slot_filler = CRFSlotFiller.from_path(
            self.tmp_file_path, **shared)

        # When
        slots = deserialized_slot_filler.get_slots("make me two cups of tea")

        # Then
        expected_slots = [
            unresolved_slot(match_range={
                START: 8,
                END: 11
            },
                            value='two',
                            entity='snips/number',
                            slot_name='number_of_cups')
        ]
        self.assertListEqual(expected_slots, slots)
Exemple #4
0
    def test_should_get_slots_after_deserialization(self):
        # Given
        dataset = BEVERAGE_DATASET
        config = CRFSlotFillerConfig(random_seed=42)
        intent = "MakeTea"
        slot_filler = CRFSlotFiller(config)
        slot_filler.fit(dataset, intent)
        slot_filler.persist(self.tmp_file_path)

        custom_entity_parser = slot_filler.custom_entity_parser
        builtin_entity_parser = slot_filler.builtin_entity_parser

        deserialized_slot_filler = CRFSlotFiller.from_path(
            self.tmp_file_path,
            custom_entity_parser=custom_entity_parser,
            builtin_entity_parser=builtin_entity_parser)

        # When
        slots = deserialized_slot_filler.get_slots("make me two cups of tea")

        # Then
        expected_slots = [
            unresolved_slot(match_range={
                START: 8,
                END: 11
            },
                            value='two',
                            entity='snips/number',
                            slot_name='number_of_cups')
        ]
        self.assertListEqual(expected_slots, slots)
    def test_should_be_deserializable(self):
        # Given
        language = LANGUAGE_EN
        feature_factories = [{
            "factory_name": ShapeNgramFactory.name,
            "args": {
                "n": 1,
                "language_code": language
            },
            "offsets": [0]
        }, {
            "factory_name": IsDigitFactory.name,
            "args": {},
            "offsets": [-1, 0]
        }]
        slot_filler_config = CRFSlotFillerConfig(
            feature_factory_configs=feature_factories)
        slot_filler_dict = {
            "unit_name": "crf_slot_filler",
            "crf_model_file": "foobar.crfsuite",
            "language_code": "en",
            "intent": "dummy_intent_1",
            "slot_name_mapping": {
                "dummy_intent_1": {
                    "dummy_slot_name": "dummy_entity_1",
                }
            },
            "config": slot_filler_config.to_dict()
        }
        metadata = {"unit_name": "crf_slot_filler"}
        self.tmp_file_path.mkdir()
        self.writeJsonContent(self.tmp_file_path / "metadata.json", metadata)
        self.writeJsonContent(self.tmp_file_path / "slot_filler.json",
                              slot_filler_dict)
        self.writeFileContent(self.tmp_file_path / "foobar.crfsuite",
                              "foo bar")

        # When
        slot_filler = CRFSlotFiller.from_path(self.tmp_file_path)

        # Then
        expected_language = LANGUAGE_EN
        expected_feature_factories = [{
            "factory_name": ShapeNgramFactory.name,
            "args": {
                "n": 1,
                "language_code": language
            },
            "offsets": [0]
        }, {
            "factory_name": IsDigitFactory.name,
            "args": {},
            "offsets": [-1, 0]
        }]
        expected_config = CRFSlotFillerConfig(
            feature_factory_configs=expected_feature_factories)
        expected_intent = "dummy_intent_1"
        expected_slot_name_mapping = {
            "dummy_intent_1": {
                "dummy_slot_name": "dummy_entity_1",
            }
        }

        self.assertEqual(slot_filler.language, expected_language)
        self.assertEqual(slot_filler.intent, expected_intent)
        self.assertEqual(slot_filler.slot_name_mapping,
                         expected_slot_name_mapping)
        self.assertDictEqual(expected_config.to_dict(),
                             slot_filler.config.to_dict())
        crf_path = Path(slot_filler.crf_model.modelfile.name)
        self.assertFileContent(crf_path, "foo bar")