async def test_model_json_data_partition_location_consistency(self): ''' Testing whether DataPartition Location is consistently populated when: 1. Manifest is read directly. 2. Manifest is obtained by converting a model.json. ''' test_name = 'test_model_json_data_partition_location_consistency' test_name_in_pascal_case = StringUtils.snake_case_to_pascal_case( test_name) cdm_corpus = TestHelper.get_local_corpus(self.tests_subpath, test_name) manifest_read = await cdm_corpus.fetch_object_async( 'default.manifest.cdm.json', cdm_corpus.storage.fetch_root_folder('local')) self.assertEqual('EpisodeOfCare/partition-data.csv', manifest_read.entities[0].data_partitions[0].location) converted_to_model_json = await ManifestPersistence.to_data( manifest_read, None, None) location = converted_to_model_json.entities[0]['partitions'][0][ 'location'] # type: str location_path = os.path.join('TestData', 'Persistence', 'ModelJson', 'DataPartition', test_name_in_pascal_case, 'Input', 'EpisodeOfCare', 'partition-data.csv') # Model Json uses absolute adapter path. self.assertTrue(location.find(location_path) != -1) cdm_corpus2 = TestHelper.get_local_corpus(self.tests_subpath, test_name) manifest_after_convertion = await ManifestPersistence.from_object( cdm_corpus2.ctx, converted_to_model_json, cdm_corpus2.storage.fetch_root_folder('local')) self.assertEqual( 'EpisodeOfCare/partition-data.csv', manifest_after_convertion.entities[0].data_partitions[0].location) # TODO: Need to change path in C# cdm_corpus3 = TestHelper.get_local_corpus(self.tests_subpath, test_name) read_file = TestHelper.get_input_file_content(self.tests_subpath, test_name, 'model.json') namespace_folder = cdm_corpus3.storage.fetch_root_folder('local') location_path3 = 'C:\\\\cdm\\\\testData\\\\Persistence\\\\ModelJson\\\\DataPartition\\\\TestModelJsonDataPartitionLocationConsistency\\\\Input\\\\EpisodeOfCare\\\\partition-data.csv' model_json_as_string = read_file.replace('\uFEFF', '').replace( location_path3, location.replace('\\', '\\\\')) manifest_read_from_model_json = await ManifestPersistence.from_object( cdm_corpus3.ctx, Model().decode(model_json_as_string), namespace_folder) self.assertEqual( 'EpisodeOfCare/partition-data.csv', manifest_read_from_model_json. entities[0].data_partitions[0].location)
async def test_model_json_data_partition_location_consistency(self): test_name = 'TestModelJsonDataPartitionLocationConsistency' cdm_corpus = TestHelper.get_local_corpus(self.tests_subpath, test_name) manifest_read = await cdm_corpus.fetch_object_async( 'default.manifest.cdm.json', cdm_corpus.storage.fetch_root_folder('local')) self.assertEqual('EpisodeOfCare/partition-data.csv', manifest_read.entities[0].data_partitions[0].location) converted_to_model_json = await ManifestPersistence.to_data( manifest_read, None, None) location = converted_to_model_json.entities[0]['partitions'][0][ 'location'] # type: str # Model Json uses absolute adapter path. self.assertTrue( location.find( 'testdata\\persistence\\modeljson\\data_partition\\TestModelJsonDataPartitionLocationConsistency\\input\\EpisodeOfCare\\partition-data.csv' ) != -1) cdm_corpus2 = TestHelper.get_local_corpus(self.tests_subpath, test_name) manifest_after_convertion = await ManifestPersistence.from_data( cdm_corpus2.ctx, converted_to_model_json, cdm_corpus2.storage.fetch_root_folder('local')) self.assertEqual( 'EpisodeOfCare/partition-data.csv', manifest_after_convertion.entities[0].data_partitions[0].location) cdm_corpus3 = TestHelper.get_local_corpus(self.tests_subpath, test_name) read_file = TestHelper.get_input_file_content(self.tests_subpath, test_name, 'model.json') namespace_folder = cdm_corpus3.storage.fetch_root_folder('local') model_json_as_string = read_file.replace( 'C:\\\\cdm\\\\CDM.ObjectModel.CSharp\\\\Microsoft.CommonDataModel\\\\Microsoft.CommonDataModel.ObjectModel.Tests\\\\TestData\\\\Persistence\\\\ModelJson\\\\DataPartition\\\\TestModelJsonDataPartitionLocationConsistency\\\\Input\\\\EpisodeOfCare\\\\partition-data.csv', location.replace('\\', '\\\\')) manifest_read_from_model_json = await ManifestPersistence.from_data( cdm_corpus3.ctx, Model().decode(model_json_as_string), namespace_folder) self.assertEqual( 'EpisodeOfCare/partition-data.csv', manifest_read_from_model_json. entities[0].data_partitions[0].location)
async def _load_document_from_path_async(self, folder: 'CdmFolderDefinition', doc_name: str, doc_container: 'CdmDocumentDefinition', res_opt: Optional[ResolveOptions] = None) \ -> 'CdmDocumentDefinition': # go get the doc doc_content = None # type: Optional[CdmDocumentDefinition] json_data = None fs_modified_time = None doc_path = folder.folder_path + doc_name adapter = self._ctx.corpus.storage.fetch_adapter( folder.namespace) # type: StorageAdapter try: if adapter.can_read(): # log message used by navigator, do not change or remove logger.debug(self._ctx, self._TAG, self._load_document_from_path_async.__name__, doc_path, 'request file: {}'.format(doc_path)) json_data = await adapter.read_async(doc_path) # log message used by navigator, do not change or remove logger.debug(self._ctx, self._TAG, self._load_document_from_path_async.__name__, doc_path, 'received file: {}'.format(doc_path)) else: raise Exception('Storage Adapter is not enabled to read.') except Exception as e: # log message used by navigator, do not change or remove logger.debug(self._ctx, self._TAG, self._load_document_from_path_async.__name__, doc_path, 'fail file: {}'.format(doc_path)) # when shallow validation is enabled, log messages about being unable to find referenced documents as warnings instead of errors. if res_opt and res_opt.shallow_validation: logger.warning( self._ctx, self._TAG, PersistenceLayer._load_document_from_path_async.__name__, doc_path, CdmLogCode.WARN_PERSIST_FILE_READ_FAILURE, doc_path, folder.Namespace, e) else: logger.error(self._ctx, self._TAG, self._load_document_from_path_async.__name__, doc_path, CdmLogCode.ERR_PERSIST_FILE_READ_FAILURE, doc_path, str(folder.namespace), e) return None try: fs_modified_time = await adapter.compute_last_modified_time_async( doc_path) except Exception as e: logger.warning( self._ctx, self._TAG, PersistenceLayer._load_document_from_path_async.__name__, doc_path, CdmLogCode.WARN_PERSIST_FILE_MOD_COMPUTE_FAILED, e.Message) if not doc_name: logger.error(self._ctx, self._TAG, self._load_document_from_path_async.__name__, doc_path, CdmLogCode.ERR_PERSIST_NULL_DOC_NAME) return None doc_name_lower = doc_name.lower() # If loading an model.json file, check that it is named correctly. if doc_name_lower.endswith( self.MODEL_JSON_EXTENSION ) and not doc_name.lower() == self.MODEL_JSON_EXTENSION: logger.error(self._ctx, self._TAG, self._load_document_from_path_async.__name__, doc_path, CdmLogCode.ERR_PERSIST_DOC_NAME_LOAD_FAILURE, doc_name, self.MODEL_JSON_EXTENSION) return None try: if doc_name_lower.endswith(PersistenceLayer.MANIFEST_EXTENSION ) or doc_name_lower.endswith( PersistenceLayer.FOLIO_EXTENSION): from cdm.persistence.cdmfolder import ManifestPersistence from cdm.persistence.cdmfolder.types import ManifestContent manifest = ManifestContent() manifest.decode(json_data) doc_content = ManifestPersistence.from_object( self._ctx, doc_name, folder.namespace, folder.folder_path, manifest) elif doc_name_lower.endswith( PersistenceLayer.MODEL_JSON_EXTENSION): if doc_name_lower != PersistenceLayer.MODEL_JSON_EXTENSION: logger.error(self._ctx, self._TAG, self._load_document_from_path_async.__name__, doc_path, CdmLogCode.ERR_PERSIST_DOC_NAME_LOAD_FAILURE, doc_name, self.MODEL_JSON_EXTENSION) return None from cdm.persistence.modeljson import ManifestPersistence from cdm.persistence.modeljson.types import Model model = Model() model.decode(json_data) doc_content = await ManifestPersistence.from_object( self._ctx, model, folder) elif doc_name_lower.endswith(PersistenceLayer.CDM_EXTENSION): from cdm.persistence.cdmfolder import DocumentPersistence from cdm.persistence.cdmfolder.types import DocumentContent document = DocumentContent() document.decode(json_data) doc_content = DocumentPersistence.from_object( self._ctx, doc_name, folder.namespace, folder.folder_path, document) else: # Could not find a registered persistence class to handle this document type. logger.error(self._ctx, self._TAG, self._load_document_from_path_async.__name__, doc_path, CdmLogCode.ERR_PERSIST_CLASS_MISSING, doc_name) return None except Exception as e: logger.error(self._ctx, self._TAG, self._load_document_from_path_async.__name__, doc_path, CdmLogCode.ERR_PERSIST_DOC_CONVERSION_FAILURE, doc_path, e) return None # add document to the folder, this sets all the folder/path things, caches name to content association and may trigger indexing on content if doc_content is not None: if doc_container: # there are situations where a previously loaded document must be re-loaded. # the end of that chain of work is here where the old version of the document has been removed from # the corpus and we have created a new document and loaded it from storage and after this call we will probably # add it to the corpus and index it, etc. # it would be really rude to just kill that old object and replace it with this replicant, especially because # the caller has no idea this happened. so... sigh ... instead of returning the new object return the one that # was just killed off but make it contain everything the new document loaded. doc_content = doc_content.copy( ResolveOptions(wrt_doc=doc_container, directives=self._ctx.corpus. default_resolution_directives), doc_container) folder.documents.append(doc_content, doc_name) doc_content._file_system_modified_time = fs_modified_time doc_content._is_dirty = False return doc_content
attrib = Attribute() attrib.dataType = 'Attribute' attrib.name = 'accountId' attrib.traits = [{'is.primaryId': 'true'}] attrib.extension_fields = 'extfieldsvalue' entity = LocalEntity() entity.type = 'LocalEntity' entity.name = 'Account' entity.traits = [{'is.some.trait': 'foo'}] entity.annotations = [{'some:annotation': 'bar'}] entity.attributes = [attrib] entity.extension_fields = 'extfieldsvalue' model = Model() model.name = 'SomeModel' model.description = 'SomeDescription' model.version = '1.0' model.entities = [entity] modelJson = model.encode() modelCheck = Model() modelCheck.decode(modelJson) modelCheckJson = modelCheck.encode() assert modelJson == modelCheckJson assert not modelCheck.entities[0].get('extension_fields') # there should be no extension_fields in deserialized object assert not modelCheck.entities[0].attributes[0].get('extension_fields') # there should be no extension_fields in deserialized object
async def _fetch_document_from_folder_path_async( self, document_path: str, adapter: 'StorageAdapterBase', force_reload: bool) -> 'CdmDocumentDefinition': """Gets the document from folder path. arguments: path: The path. adapter: The storage adapter where the document can be found.""" from .cdm_corpus_def import FOLIO_EXTENSION, MANIFEST_EXTENSION, MODEL_JSON_EXTENSION doc_name = None first = document_path.find('/') if first < 0: doc_name = document_path else: doc_name = document_path[0:first] if doc_name in self._document_lookup and not force_reload: return self._document_lookup[doc_name] is_cdm_folder = doc_name.endswith( FOLIO_EXTENSION) or doc_name.endswith(MANIFEST_EXTENSION) is_model_json = doc_name.endswith(MODEL_JSON_EXTENSION) # got that doc? doc = None # go get the doc doc_path = '{}{}'.format(self.folder_path, doc_name) json_data = None fs_modified_time = None try: if adapter.can_read(): json_data = await adapter.read_async(doc_path) fs_modified_time = await adapter.compute_last_modified_time_async( adapter.create_adapter_path(doc_path)) except Exception as e: self.corpus.ctx.logger.exception( 'Could not read %s from the \'%s\' namespace.', doc_path, self.namespace) return None if is_cdm_folder: from cdm.persistence.cdmfolder import ManifestPersistence from cdm.persistence.cdmfolder.types import ManifestContent manifest = ManifestContent() manifest.decode(json_data) doc = await ManifestPersistence.from_data(self.ctx, doc_name, self.namespace, self.folder_path, manifest) doc.folder = self self.documents.append(doc) self.corpus._add_document_objects(self, doc) self._document_lookup[doc_name] = doc elif is_model_json: from cdm.persistence.modeljson import ManifestPersistence from cdm.persistence.modeljson.types import Model model = Model() model.decode(json_data) doc = await ManifestPersistence.from_data(self.ctx, model, self) doc.folder = self doc.folder_path = self.folder_path else: from cdm.persistence.cdmfolder import DocumentPersistence from cdm.persistence.cdmfolder.types import DocumentContent document = DocumentContent() document.decode(json_data) doc = self.documents.append(await DocumentPersistence.from_data( self.ctx, doc_name, self.namespace, self.folder_path, document)) doc._file_system_modified_time = fs_modified_time doc._is_dirty = False return doc
async def load_document_from_path_async( folder: 'CdmFolderDefinition', doc_name: str, doc_container: 'CdmDocumentDefinition') -> 'CdmDocumentDefinition': is_cdm_folder = doc_name.lower().endswith( CdmCorpusDefinition._FOLIO_EXTENSION) or doc_name.lower().endswith( CdmCorpusDefinition._MANIFEST_EXTENSION) is_model_json = doc_name.lower().endswith( CdmCorpusDefinition._MODEL_JSON_EXTENSION) # go get the doc doc_content = None # type: Optional[CdmDocumentDefinition] json_data = None fs_modified_time = None ctx = folder.ctx doc_path = folder.folder_path + doc_name adapter = ctx.corpus.storage.fetch_adapter( folder.namespace) # type: StorageAdapter try: if adapter.can_read(): json_data = await adapter.read_async(doc_path) fs_modified_time = await adapter.compute_last_modified_time_async( adapter.create_adapter_path(doc_path)) ctx.logger.info('read file: {}'.format(doc_path)) except Exception as e: ctx.logger.error( 'could not read %s from the \'%s\' namespace.\n Reason: \n%s', doc_path, folder.namespace, e) return None try: if is_cdm_folder: from cdm.persistence.cdmfolder import ManifestPersistence from cdm.persistence.cdmfolder.types import ManifestContent manifest = ManifestContent() manifest.decode(json_data) doc_content = await ManifestPersistence.from_data( ctx, doc_name, folder.namespace, folder.folder_path, manifest) elif is_model_json: if doc_name.lower() != CdmCorpusDefinition._MODEL_JSON_EXTENSION: ctx.logger.error( 'Failed to load \'{}\', as it\'s not an acceptable filename. It must be model.json' .format(doc_name)) return None from cdm.persistence.modeljson import ManifestPersistence from cdm.persistence.modeljson.types import Model model = Model() model.decode(json_data) doc_content = await ManifestPersistence.from_data( ctx, model, folder) else: from cdm.persistence.cdmfolder import DocumentPersistence from cdm.persistence.cdmfolder.types import DocumentContent document = DocumentContent() document.decode(json_data) doc_content = await DocumentPersistence.from_data( ctx, doc_name, folder.namespace, folder.folder_path, document) except Exception as e: ctx.logger.error('Could not convert \'{}\'. Reason \'{}\''.format( doc_path, e)) return None # add document to the folder, this sets all the folder/path things, caches name to content association and may trigger indexing on content if doc_content is not None: if doc_container: # there are situations where a previously loaded document must be re-loaded. # the end of that chain of work is here where the old version of the document has been removed from # the corpus and we have created a new document and loaded it from storage and after this call we will probably # add it to the corpus and index it, etc. # it would be really rude to just kill that old object and replace it with this replicant, especially because # the caller has no idea this happened. so... sigh ... instead of returning the new object return the one that # was just killed off but make it contain everything the new document loaded. doc_content = doc_content.copy( ResolveOptions(wrt_doc=doc_container), doc_container) folder.documents.append(doc_content, doc_name) doc_content._file_system_modified_time = fs_modified_time doc_content._is_dirty = False return doc_content