Example #1
0
    def test_should_be_deserializable_before_fit(self):
        # Given
        features_factories = [
            {
                "factory_name": ShapeNgramFactory.name,
                "args": {"n": 1},
                "offsets": [0]
            },
            {
                "factory_name": IsDigitFactory.name,
                "args": {},
                "offsets": [-1, 0]
            }
        ]
        slot_filler_config = CRFSlotFillerConfig(
            feature_factory_configs=features_factories)
        slot_filler_dict = {
            "unit_name": "crf_slot_filler",
            "crf_model_file": None,
            "language_code": None,
            "intent": None,
            "slot_name_mapping": None,
            "config": slot_filler_config.to_dict()
        }
        metadata = {"unit_name": "crf_slot_filler"}
        self.tmp_file_path.mkdir()
        self.writeJsonContent(self.tmp_file_path / "metadata.json", metadata)
        self.writeJsonContent(self.tmp_file_path / "slot_filler.json",
                              slot_filler_dict)

        # When
        slot_filler = CRFSlotFiller.from_path(self.tmp_file_path)

        # Then
        expected_features_factories = [
            {
                "factory_name": ShapeNgramFactory.name,
                "args": {"n": 1},
                "offsets": [0]
            },
            {
                "factory_name": IsDigitFactory.name,
                "args": {},
                "offsets": [-1, 0]
            }
        ]
        expected_language = None
        expected_config = CRFSlotFillerConfig(
            feature_factory_configs=expected_features_factories)
        expected_intent = None
        expected_slot_name_mapping = None
        expected_crf_model = None

        self.assertEqual(slot_filler.crf_model, expected_crf_model)
        self.assertEqual(slot_filler.language, expected_language)
        self.assertEqual(slot_filler.intent, expected_intent)
        self.assertEqual(slot_filler.slot_name_mapping,
                         expected_slot_name_mapping)
        self.assertDictEqual(expected_config.to_dict(),
                             slot_filler.config.to_dict())
Example #2
0
    def test_should_be_deserializable_before_fit(self,
                                                 mock_deserialize_crf_model):
        # Given
        mock_deserialize_crf_model.return_value = None
        features_factories = [
            {
                "factory_name": ShapeNgramFactory.name,
                "args": {"n": 1},
                "offsets": [0]
            },
            {
                "factory_name": IsDigitFactory.name,
                "args": {},
                "offsets": [-1, 0]
            }
        ]
        slot_filler_config = CRFSlotFillerConfig(
            feature_factory_configs=features_factories)
        slot_filler_dict = {
            "unit_name": "crf_slot_filler",
            "crf_model_data": None,
            "language_code": None,
            "intent": None,
            "slot_name_mapping": None,
            "config": slot_filler_config.to_dict()
        }

        # When
        slot_filler = CRFSlotFiller.from_dict(slot_filler_dict)

        # Then
        expected_features_factories = [
            {
                "factory_name": ShapeNgramFactory.name,
                "args": {"n": 1},
                "offsets": [0]
            },
            {
                "factory_name": IsDigitFactory.name,
                "args": {},
                "offsets": [-1, 0]
            }
        ]
        expected_language = None
        expected_config = CRFSlotFillerConfig(
            feature_factory_configs=expected_features_factories)
        expected_intent = None
        expected_slot_name_mapping = None
        expected_crf_model = None

        self.assertEqual(slot_filler.crf_model, expected_crf_model)
        self.assertEqual(slot_filler.language, expected_language)
        self.assertEqual(slot_filler.intent, expected_intent)
        self.assertEqual(slot_filler.slot_name_mapping,
                         expected_slot_name_mapping)
        self.assertDictEqual(expected_config.to_dict(),
                             slot_filler.config.to_dict())
Example #3
0
    def test_should_be_serializable_before_fit(self):
        # Given
        features_factories = [{
            "factory_name": ShapeNgramFactory.name,
            "args": {
                "n": 1
            },
            "offsets": [0]
        }, {
            "factory_name": IsDigitFactory.name,
            "args": {},
            "offsets": [-1, 0]
        }]
        config = CRFSlotFillerConfig(
            tagging_scheme=TaggingScheme.BILOU,
            feature_factory_configs=features_factories)

        slot_filler = CRFSlotFiller(config)

        # When
        slot_filler.persist(self.tmp_file_path)

        # Then
        metadata_path = self.tmp_file_path / "metadata.json"
        self.assertJsonContent(metadata_path, {"unit_name": "crf_slot_filler"})

        expected_slot_filler_dict = {
            "crf_model_file": None,
            "language_code": None,
            "config": config.to_dict(),
            "intent": None,
            "slot_name_mapping": None,
        }
        slot_filler_path = self.tmp_file_path / "slot_filler.json"
        self.assertJsonContent(slot_filler_path, expected_slot_filler_dict)
Example #4
0
    def test_should_be_serializable_before_fit(self):
        # Given
        features_factories = [
            {
                "factory_name": ShapeNgramFactory.name,
                "args": {"n": 1},
                "offsets": [0]
            },
            {
                "factory_name": IsDigitFactory.name,
                "args": {},
                "offsets": [-1, 0]
            }
        ]
        config = CRFSlotFillerConfig(
            tagging_scheme=TaggingScheme.BILOU,
            feature_factory_configs=features_factories)

        slot_filler = CRFSlotFiller(config)

        # When
        actual_slot_filler_dict = slot_filler.to_dict()

        # Then
        expected_slot_filler_dict = {
            "unit_name": "crf_slot_filler",
            "crf_model_data": None,
            "language_code": None,
            "config": config.to_dict(),
            "intent": None,
            "slot_name_mapping": None,
        }
        self.assertDictEqual(actual_slot_filler_dict,
                             expected_slot_filler_dict)
Example #5
0
    def test_should_be_serializable(self):
        # Given
        features_factories = [{
            "factory_name": ShapeNgramFactory.name,
            "args": {
                "n": 1
            },
            "offsets": [0]
        }, {
            "factory_name": IsDigitFactory.name,
            "args": {},
            "offsets": [-1, 0]
        }]
        config = CRFSlotFillerConfig(
            tagging_scheme=TaggingScheme.BILOU,
            feature_factory_configs=features_factories)
        dataset = SAMPLE_DATASET

        slot_filler = CRFSlotFiller(config)
        intent = "dummy_intent_1"
        slot_filler.fit(dataset, intent=intent)

        # When
        slot_filler.persist(self.tmp_file_path)

        # Then
        metadata_path = self.tmp_file_path / "metadata.json"
        self.assertJsonContent(metadata_path, {"unit_name": "crf_slot_filler"})

        expected_crf_file = Path(slot_filler.crf_model.modelfile.name).name
        self.assertTrue((self.tmp_file_path / expected_crf_file).exists())

        expected_feature_factories = [{
            "factory_name": ShapeNgramFactory.name,
            "args": {
                "n": 1,
                "language_code": "en"
            },
            "offsets": [0]
        }, {
            "factory_name": IsDigitFactory.name,
            "args": {},
            "offsets": [-1, 0]
        }]
        expected_config = CRFSlotFillerConfig(
            tagging_scheme=TaggingScheme.BILOU,
            feature_factory_configs=expected_feature_factories)
        expected_slot_filler_dict = {
            "crf_model_file": expected_crf_file,
            "language_code": "en",
            "config": expected_config.to_dict(),
            "intent": intent,
            "slot_name_mapping": {
                "dummy_slot_name": "dummy_entity_1",
                "dummy_slot_name2": "dummy_entity_2",
                "dummy_slot_name3": "dummy_entity_2",
            }
        }
        slot_filler_path = self.tmp_file_path / "slot_filler.json"
        self.assertJsonContent(slot_filler_path, expected_slot_filler_dict)
Example #6
0
    def test_should_be_serializable(self, mock_serialize_crf_model):
        # Given
        mock_serialize_crf_model.return_value = "mocked_crf_model_data"
        features_factories = [
            {
                "factory_name": ShapeNgramFactory.name,
                "args": {"n": 1},
                "offsets": [0]
            },
            {
                "factory_name": IsDigitFactory.name,
                "args": {},
                "offsets": [-1, 0]
            }
        ]
        config = CRFSlotFillerConfig(
            tagging_scheme=TaggingScheme.BILOU,
            feature_factory_configs=features_factories)
        dataset = validate_and_format_dataset(SAMPLE_DATASET)

        slot_filler = CRFSlotFiller(config)
        intent = "dummy_intent_1"
        slot_filler.fit(dataset, intent=intent)

        # When
        actual_slot_filler_dict = slot_filler.to_dict()

        # Then
        expected_feature_factories = [
            {
                "factory_name": ShapeNgramFactory.name,
                "args": {"n": 1, "language_code": "en"},
                "offsets": [0]
            },
            {
                "factory_name": IsDigitFactory.name,
                "args": {},
                "offsets": [-1, 0]
            }
        ]
        expected_config = CRFSlotFillerConfig(
            tagging_scheme=TaggingScheme.BILOU,
            feature_factory_configs=expected_feature_factories)
        expected_slot_filler_dict = {
            "unit_name": "crf_slot_filler",
            "crf_model_data": "mocked_crf_model_data",
            "language_code": "en",
            "config": expected_config.to_dict(),
            "intent": intent,
            "slot_name_mapping": {
                "dummy_slot_name": "dummy_entity_1",
                "dummy_slot_name2": "dummy_entity_2",
                "dummy_slot_name3": "dummy_entity_2",
            }
        }
        self.assertDictEqual(actual_slot_filler_dict,
                             expected_slot_filler_dict)
Example #7
0
    def test_should_be_deserializable(self):
        # Given
        language = LANGUAGE_EN
        feature_factories = [{
            "factory_name": ShapeNgramFactory.name,
            "args": {
                "n": 1,
                "language_code": language
            },
            "offsets": [0]
        }, {
            "factory_name": IsDigitFactory.name,
            "args": {},
            "offsets": [-1, 0]
        }]
        slot_filler_config = CRFSlotFillerConfig(
            feature_factory_configs=feature_factories)
        slot_filler_dict = {
            "unit_name": "crf_slot_filler",
            "crf_model_file": "foobar.crfsuite",
            "language_code": "en",
            "intent": "dummy_intent_1",
            "slot_name_mapping": {
                "dummy_intent_1": {
                    "dummy_slot_name": "dummy_entity_1",
                }
            },
            "config": slot_filler_config.to_dict()
        }
        metadata = {"unit_name": "crf_slot_filler"}
        self.tmp_file_path.mkdir()
        self.writeJsonContent(self.tmp_file_path / "metadata.json", metadata)
        self.writeJsonContent(self.tmp_file_path / "slot_filler.json",
                              slot_filler_dict)
        self.writeFileContent(self.tmp_file_path / "foobar.crfsuite",
                              "foo bar")

        # When
        slot_filler = CRFSlotFiller.from_path(self.tmp_file_path)

        # Then
        expected_language = LANGUAGE_EN
        expected_feature_factories = [{
            "factory_name": ShapeNgramFactory.name,
            "args": {
                "n": 1,
                "language_code": language
            },
            "offsets": [0]
        }, {
            "factory_name": IsDigitFactory.name,
            "args": {},
            "offsets": [-1, 0]
        }]
        expected_config = CRFSlotFillerConfig(
            feature_factory_configs=expected_feature_factories)
        expected_intent = "dummy_intent_1"
        expected_slot_name_mapping = {
            "dummy_intent_1": {
                "dummy_slot_name": "dummy_entity_1",
            }
        }

        self.assertEqual(slot_filler.language, expected_language)
        self.assertEqual(slot_filler.intent, expected_intent)
        self.assertEqual(slot_filler.slot_name_mapping,
                         expected_slot_name_mapping)
        self.assertDictEqual(expected_config.to_dict(),
                             slot_filler.config.to_dict())
        crf_path = Path(slot_filler.crf_model.modelfile.name)
        self.assertFileContent(crf_path, "foo bar")
Example #8
0
    def test_should_be_serializable(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: my_intent
utterances:
- this is [slot1:entity1](my first entity)
- this is [slot2:entity2](second_entity)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        features_factories = [{
            "factory_name": ShapeNgramFactory.name,
            "args": {
                "n": 1
            },
            "offsets": [0]
        }, {
            "factory_name": IsDigitFactory.name,
            "args": {},
            "offsets": [-1, 0]
        }]
        config = CRFSlotFillerConfig(
            tagging_scheme=TaggingScheme.BILOU,
            feature_factory_configs=features_factories)
        shared = self.get_shared_data(dataset)
        slot_filler = CRFSlotFiller(config, **shared)
        intent = "my_intent"
        slot_filler.fit(dataset, intent=intent)

        # When
        slot_filler.persist(self.tmp_file_path)

        # Then
        metadata_path = self.tmp_file_path / "metadata.json"
        self.assertJsonContent(metadata_path, {"unit_name": "crf_slot_filler"})

        self.assertTrue((self.tmp_file_path / CRF_MODEL_FILENAME).exists())

        expected_feature_factories = [{
            "factory_name": ShapeNgramFactory.name,
            "args": {
                "n": 1,
                "language_code": "en"
            },
            "offsets": [0]
        }, {
            "factory_name": IsDigitFactory.name,
            "args": {},
            "offsets": [-1, 0]
        }]
        expected_config = CRFSlotFillerConfig(
            tagging_scheme=TaggingScheme.BILOU,
            feature_factory_configs=expected_feature_factories)
        expected_slot_filler_dict = {
            "crf_model_file": CRF_MODEL_FILENAME,
            "language_code": "en",
            "config": expected_config.to_dict(),
            "intent": intent,
            "slot_name_mapping": {
                "slot1": "entity1",
                "slot2": "entity2",
            }
        }
        slot_filler_path = self.tmp_file_path / "slot_filler.json"
        self.assertJsonContent(slot_filler_path, expected_slot_filler_dict)
Example #9
0
    def test_should_be_deserializable(self, mock_deserialize_crf_model):
        # Given
        language = LANGUAGE_EN
        mock_deserialize_crf_model.return_value = None
        feature_factories = [
            {
                "factory_name": ShapeNgramFactory.name,
                "args": {"n": 1, "language_code": language},
                "offsets": [0]
            },
            {
                "factory_name": IsDigitFactory.name,
                "args": {},
                "offsets": [-1, 0]
            }
        ]
        slot_filler_config = CRFSlotFillerConfig(
            feature_factory_configs=feature_factories)
        slot_filler_dict = {
            "unit_name": "crf_slot_filler",
            "crf_model_data": "mocked_crf_model_data",
            "language_code": "en",
            "intent": "dummy_intent_1",
            "slot_name_mapping": {
                "dummy_intent_1": {
                    "dummy_slot_name": "dummy_entity_1",
                }
            },
            "config": slot_filler_config.to_dict()
        }
        # When
        slot_filler = CRFSlotFiller.from_dict(slot_filler_dict)

        # Then
        mock_deserialize_crf_model.assert_called_once_with(
            "mocked_crf_model_data")
        expected_language = LANGUAGE_EN
        expected_feature_factories = [
            {
                "factory_name": ShapeNgramFactory.name,
                "args": {"n": 1, "language_code": language},
                "offsets": [0]
            },
            {
                "factory_name": IsDigitFactory.name,
                "args": {},
                "offsets": [-1, 0]
            }
        ]
        expected_config = CRFSlotFillerConfig(
            feature_factory_configs=expected_feature_factories)
        expected_intent = "dummy_intent_1"
        expected_slot_name_mapping = {
            "dummy_intent_1": {
                "dummy_slot_name": "dummy_entity_1",
            }
        }

        self.assertEqual(slot_filler.language, expected_language)
        self.assertEqual(slot_filler.intent, expected_intent)
        self.assertEqual(slot_filler.slot_name_mapping,
                         expected_slot_name_mapping)
        self.assertDictEqual(expected_config.to_dict(),
                             slot_filler.config.to_dict())