def test_should_be_deserializable_before_fit(self): # Given features_factories = [ { "factory_name": ShapeNgramFactory.name, "args": {"n": 1}, "offsets": [0] }, { "factory_name": IsDigitFactory.name, "args": {}, "offsets": [-1, 0] } ] slot_filler_config = CRFSlotFillerConfig( feature_factory_configs=features_factories) slot_filler_dict = { "unit_name": "crf_slot_filler", "crf_model_file": None, "language_code": None, "intent": None, "slot_name_mapping": None, "config": slot_filler_config.to_dict() } metadata = {"unit_name": "crf_slot_filler"} self.tmp_file_path.mkdir() self.writeJsonContent(self.tmp_file_path / "metadata.json", metadata) self.writeJsonContent(self.tmp_file_path / "slot_filler.json", slot_filler_dict) # When slot_filler = CRFSlotFiller.from_path(self.tmp_file_path) # Then expected_features_factories = [ { "factory_name": ShapeNgramFactory.name, "args": {"n": 1}, "offsets": [0] }, { "factory_name": IsDigitFactory.name, "args": {}, "offsets": [-1, 0] } ] expected_language = None expected_config = CRFSlotFillerConfig( feature_factory_configs=expected_features_factories) expected_intent = None expected_slot_name_mapping = None expected_crf_model = None self.assertEqual(slot_filler.crf_model, expected_crf_model) self.assertEqual(slot_filler.language, expected_language) self.assertEqual(slot_filler.intent, expected_intent) self.assertEqual(slot_filler.slot_name_mapping, expected_slot_name_mapping) self.assertDictEqual(expected_config.to_dict(), slot_filler.config.to_dict())
def test_should_be_deserializable_before_fit(self, mock_deserialize_crf_model): # Given mock_deserialize_crf_model.return_value = None features_factories = [ { "factory_name": ShapeNgramFactory.name, "args": {"n": 1}, "offsets": [0] }, { "factory_name": IsDigitFactory.name, "args": {}, "offsets": [-1, 0] } ] slot_filler_config = CRFSlotFillerConfig( feature_factory_configs=features_factories) slot_filler_dict = { "unit_name": "crf_slot_filler", "crf_model_data": None, "language_code": None, "intent": None, "slot_name_mapping": None, "config": slot_filler_config.to_dict() } # When slot_filler = CRFSlotFiller.from_dict(slot_filler_dict) # Then expected_features_factories = [ { "factory_name": ShapeNgramFactory.name, "args": {"n": 1}, "offsets": [0] }, { "factory_name": IsDigitFactory.name, "args": {}, "offsets": [-1, 0] } ] expected_language = None expected_config = CRFSlotFillerConfig( feature_factory_configs=expected_features_factories) expected_intent = None expected_slot_name_mapping = None expected_crf_model = None self.assertEqual(slot_filler.crf_model, expected_crf_model) self.assertEqual(slot_filler.language, expected_language) self.assertEqual(slot_filler.intent, expected_intent) self.assertEqual(slot_filler.slot_name_mapping, expected_slot_name_mapping) self.assertDictEqual(expected_config.to_dict(), slot_filler.config.to_dict())
def test_should_be_serializable_before_fit(self): # Given features_factories = [{ "factory_name": ShapeNgramFactory.name, "args": { "n": 1 }, "offsets": [0] }, { "factory_name": IsDigitFactory.name, "args": {}, "offsets": [-1, 0] }] config = CRFSlotFillerConfig( tagging_scheme=TaggingScheme.BILOU, feature_factory_configs=features_factories) slot_filler = CRFSlotFiller(config) # When slot_filler.persist(self.tmp_file_path) # Then metadata_path = self.tmp_file_path / "metadata.json" self.assertJsonContent(metadata_path, {"unit_name": "crf_slot_filler"}) expected_slot_filler_dict = { "crf_model_file": None, "language_code": None, "config": config.to_dict(), "intent": None, "slot_name_mapping": None, } slot_filler_path = self.tmp_file_path / "slot_filler.json" self.assertJsonContent(slot_filler_path, expected_slot_filler_dict)
def test_should_be_serializable_before_fit(self): # Given features_factories = [ { "factory_name": ShapeNgramFactory.name, "args": {"n": 1}, "offsets": [0] }, { "factory_name": IsDigitFactory.name, "args": {}, "offsets": [-1, 0] } ] config = CRFSlotFillerConfig( tagging_scheme=TaggingScheme.BILOU, feature_factory_configs=features_factories) slot_filler = CRFSlotFiller(config) # When actual_slot_filler_dict = slot_filler.to_dict() # Then expected_slot_filler_dict = { "unit_name": "crf_slot_filler", "crf_model_data": None, "language_code": None, "config": config.to_dict(), "intent": None, "slot_name_mapping": None, } self.assertDictEqual(actual_slot_filler_dict, expected_slot_filler_dict)
def test_should_be_serializable(self): # Given features_factories = [{ "factory_name": ShapeNgramFactory.name, "args": { "n": 1 }, "offsets": [0] }, { "factory_name": IsDigitFactory.name, "args": {}, "offsets": [-1, 0] }] config = CRFSlotFillerConfig( tagging_scheme=TaggingScheme.BILOU, feature_factory_configs=features_factories) dataset = SAMPLE_DATASET slot_filler = CRFSlotFiller(config) intent = "dummy_intent_1" slot_filler.fit(dataset, intent=intent) # When slot_filler.persist(self.tmp_file_path) # Then metadata_path = self.tmp_file_path / "metadata.json" self.assertJsonContent(metadata_path, {"unit_name": "crf_slot_filler"}) expected_crf_file = Path(slot_filler.crf_model.modelfile.name).name self.assertTrue((self.tmp_file_path / expected_crf_file).exists()) expected_feature_factories = [{ "factory_name": ShapeNgramFactory.name, "args": { "n": 1, "language_code": "en" }, "offsets": [0] }, { "factory_name": IsDigitFactory.name, "args": {}, "offsets": [-1, 0] }] expected_config = CRFSlotFillerConfig( tagging_scheme=TaggingScheme.BILOU, feature_factory_configs=expected_feature_factories) expected_slot_filler_dict = { "crf_model_file": expected_crf_file, "language_code": "en", "config": expected_config.to_dict(), "intent": intent, "slot_name_mapping": { "dummy_slot_name": "dummy_entity_1", "dummy_slot_name2": "dummy_entity_2", "dummy_slot_name3": "dummy_entity_2", } } slot_filler_path = self.tmp_file_path / "slot_filler.json" self.assertJsonContent(slot_filler_path, expected_slot_filler_dict)
def test_should_be_serializable(self, mock_serialize_crf_model): # Given mock_serialize_crf_model.return_value = "mocked_crf_model_data" features_factories = [ { "factory_name": ShapeNgramFactory.name, "args": {"n": 1}, "offsets": [0] }, { "factory_name": IsDigitFactory.name, "args": {}, "offsets": [-1, 0] } ] config = CRFSlotFillerConfig( tagging_scheme=TaggingScheme.BILOU, feature_factory_configs=features_factories) dataset = validate_and_format_dataset(SAMPLE_DATASET) slot_filler = CRFSlotFiller(config) intent = "dummy_intent_1" slot_filler.fit(dataset, intent=intent) # When actual_slot_filler_dict = slot_filler.to_dict() # Then expected_feature_factories = [ { "factory_name": ShapeNgramFactory.name, "args": {"n": 1, "language_code": "en"}, "offsets": [0] }, { "factory_name": IsDigitFactory.name, "args": {}, "offsets": [-1, 0] } ] expected_config = CRFSlotFillerConfig( tagging_scheme=TaggingScheme.BILOU, feature_factory_configs=expected_feature_factories) expected_slot_filler_dict = { "unit_name": "crf_slot_filler", "crf_model_data": "mocked_crf_model_data", "language_code": "en", "config": expected_config.to_dict(), "intent": intent, "slot_name_mapping": { "dummy_slot_name": "dummy_entity_1", "dummy_slot_name2": "dummy_entity_2", "dummy_slot_name3": "dummy_entity_2", } } self.assertDictEqual(actual_slot_filler_dict, expected_slot_filler_dict)
def test_should_be_deserializable(self): # Given language = LANGUAGE_EN feature_factories = [{ "factory_name": ShapeNgramFactory.name, "args": { "n": 1, "language_code": language }, "offsets": [0] }, { "factory_name": IsDigitFactory.name, "args": {}, "offsets": [-1, 0] }] slot_filler_config = CRFSlotFillerConfig( feature_factory_configs=feature_factories) slot_filler_dict = { "unit_name": "crf_slot_filler", "crf_model_file": "foobar.crfsuite", "language_code": "en", "intent": "dummy_intent_1", "slot_name_mapping": { "dummy_intent_1": { "dummy_slot_name": "dummy_entity_1", } }, "config": slot_filler_config.to_dict() } metadata = {"unit_name": "crf_slot_filler"} self.tmp_file_path.mkdir() self.writeJsonContent(self.tmp_file_path / "metadata.json", metadata) self.writeJsonContent(self.tmp_file_path / "slot_filler.json", slot_filler_dict) self.writeFileContent(self.tmp_file_path / "foobar.crfsuite", "foo bar") # When slot_filler = CRFSlotFiller.from_path(self.tmp_file_path) # Then expected_language = LANGUAGE_EN expected_feature_factories = [{ "factory_name": ShapeNgramFactory.name, "args": { "n": 1, "language_code": language }, "offsets": [0] }, { "factory_name": IsDigitFactory.name, "args": {}, "offsets": [-1, 0] }] expected_config = CRFSlotFillerConfig( feature_factory_configs=expected_feature_factories) expected_intent = "dummy_intent_1" expected_slot_name_mapping = { "dummy_intent_1": { "dummy_slot_name": "dummy_entity_1", } } self.assertEqual(slot_filler.language, expected_language) self.assertEqual(slot_filler.intent, expected_intent) self.assertEqual(slot_filler.slot_name_mapping, expected_slot_name_mapping) self.assertDictEqual(expected_config.to_dict(), slot_filler.config.to_dict()) crf_path = Path(slot_filler.crf_model.modelfile.name) self.assertFileContent(crf_path, "foo bar")
def test_should_be_serializable(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: my_intent utterances: - this is [slot1:entity1](my first entity) - this is [slot2:entity2](second_entity)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json features_factories = [{ "factory_name": ShapeNgramFactory.name, "args": { "n": 1 }, "offsets": [0] }, { "factory_name": IsDigitFactory.name, "args": {}, "offsets": [-1, 0] }] config = CRFSlotFillerConfig( tagging_scheme=TaggingScheme.BILOU, feature_factory_configs=features_factories) shared = self.get_shared_data(dataset) slot_filler = CRFSlotFiller(config, **shared) intent = "my_intent" slot_filler.fit(dataset, intent=intent) # When slot_filler.persist(self.tmp_file_path) # Then metadata_path = self.tmp_file_path / "metadata.json" self.assertJsonContent(metadata_path, {"unit_name": "crf_slot_filler"}) self.assertTrue((self.tmp_file_path / CRF_MODEL_FILENAME).exists()) expected_feature_factories = [{ "factory_name": ShapeNgramFactory.name, "args": { "n": 1, "language_code": "en" }, "offsets": [0] }, { "factory_name": IsDigitFactory.name, "args": {}, "offsets": [-1, 0] }] expected_config = CRFSlotFillerConfig( tagging_scheme=TaggingScheme.BILOU, feature_factory_configs=expected_feature_factories) expected_slot_filler_dict = { "crf_model_file": CRF_MODEL_FILENAME, "language_code": "en", "config": expected_config.to_dict(), "intent": intent, "slot_name_mapping": { "slot1": "entity1", "slot2": "entity2", } } slot_filler_path = self.tmp_file_path / "slot_filler.json" self.assertJsonContent(slot_filler_path, expected_slot_filler_dict)
def test_should_be_deserializable(self, mock_deserialize_crf_model): # Given language = LANGUAGE_EN mock_deserialize_crf_model.return_value = None feature_factories = [ { "factory_name": ShapeNgramFactory.name, "args": {"n": 1, "language_code": language}, "offsets": [0] }, { "factory_name": IsDigitFactory.name, "args": {}, "offsets": [-1, 0] } ] slot_filler_config = CRFSlotFillerConfig( feature_factory_configs=feature_factories) slot_filler_dict = { "unit_name": "crf_slot_filler", "crf_model_data": "mocked_crf_model_data", "language_code": "en", "intent": "dummy_intent_1", "slot_name_mapping": { "dummy_intent_1": { "dummy_slot_name": "dummy_entity_1", } }, "config": slot_filler_config.to_dict() } # When slot_filler = CRFSlotFiller.from_dict(slot_filler_dict) # Then mock_deserialize_crf_model.assert_called_once_with( "mocked_crf_model_data") expected_language = LANGUAGE_EN expected_feature_factories = [ { "factory_name": ShapeNgramFactory.name, "args": {"n": 1, "language_code": language}, "offsets": [0] }, { "factory_name": IsDigitFactory.name, "args": {}, "offsets": [-1, 0] } ] expected_config = CRFSlotFillerConfig( feature_factory_configs=expected_feature_factories) expected_intent = "dummy_intent_1" expected_slot_name_mapping = { "dummy_intent_1": { "dummy_slot_name": "dummy_entity_1", } } self.assertEqual(slot_filler.language, expected_language) self.assertEqual(slot_filler.intent, expected_intent) self.assertEqual(slot_filler.slot_name_mapping, expected_slot_name_mapping) self.assertDictEqual(expected_config.to_dict(), slot_filler.config.to_dict())