def corpus_path_to_adapter_path(self, corpus_path: str) -> Optional[str]: """Takes a corpus path, figures out the right adapter to use and then return an adapter domain path""" if not corpus_path: logger.error(self._TAG, self._ctx, 'The corpus path is null or empty.', StorageManager.corpus_path_to_adapter_path.__name__) return None result = None # Break the corpus path into namespace and ... path path_tuple = StorageUtils.split_namespace_path(corpus_path) if not path_tuple: logger.error(self._TAG, self._ctx, 'The corpus path cannot be null or empty.', self.corpus_path_to_adapter_path.__name__) return None namespace = path_tuple[0] or self.default_namespace # Get the adapter registered for this namespace namespace_adapter = self.fetch_adapter(namespace) if not namespace_adapter: logger.error( self._TAG, self._ctx, 'The namespace cannot be null or empty.'.format(namespace), StorageManager.corpus_path_to_adapter_path.__name__) else: # Ask the storage adapter to 'adapt' this path result = namespace_adapter.create_adapter_path(path_tuple[1]) return result
def create_table_propertybags(instance: CdmLocalEntityDeclarationDefinition, res_opt: ResolveOptions, options: CopyOptions, properties): if properties == None: properties = {} if instance.entity_path is not None: path_tuple = StorageUtils.split_namespace_path(instance.entity_path) if path_tuple == None: logger.error(instance.ctx, _TAG, LocalEntityDeclarationPersistence.create_table_propertybags.__name__, instance.at_corpus_path, CdmLogCode.ERR_PERSIST_SYMS_ENTITY_PATH_NULL, instance.entity_name) return None properties['cdm:entityPath'] = path_tuple[1] t2pm = TraitToPropertyMap(instance) is_hidden_trait = t2pm._fetch_trait_reference('is.hidden') if not 'cdm:description' in properties: properties['cdm:description'] = instance.explanation if instance.last_child_file_modified_time is not None: properties['cdm:lastChildFileModifiedTime'] = instance.last_child_file_modified_time if instance.last_file_modified_time is not None: properties['cdm:lastFileModifiedTime'] = instance.last_file_modified_time if instance.last_file_status_check_time is not None: properties['cdm:lastFileStatusCheckTime'] = instance.last_file_status_check_time if is_hidden_trait is not None: properties['cdm:isHidden'] = True if instance.exhibits_traits is not None and len(instance.exhibits_traits) > 0: properties['cdm:entityDecTraits'] = copy_data_utils._array_copy_data(res_opt, instance.exhibits_traits, options) return properties
async def _save_odi_documents(self, doc: Any, adapter: 'StorageAdapter', new_name: str) -> None: if doc is None: raise Exception('Failed to persist document because doc is null.') # ask the adapter to make it happen. try: old_document_path = doc.documentPath new_document_path = old_document_path[ 0:len(old_document_path) - len(self.ODI_EXTENSION)] + new_name # Remove namespace from path path_tuple = StorageUtils.split_namespace_path(new_document_path) if not path_tuple: logger.error(self._TAG, self._ctx, 'The object path cannot be null or empty.', self._save_odi_documents.__name__) return content = doc.encode() await adapter.write_async(path_tuple[1], content) except Exception as e: logger.error( self._TAG, self._ctx, 'Failed to write to the file \'{}\' for reason {}.'.format( doc.documentPath, e), self._save_odi_documents.__name__) # Save linked documents. if doc.get('linkedDocuments') is not None: for linked_doc in doc.linkedDocuments: await self._save_odi_documents(linked_doc, adapter, new_name)
def corpus_path_to_adapter_path(self, corpus_path: str) -> Optional[str]: """Takes a corpus path, figures out the right adapter to use and then return an adapter domain path""" with logger._enter_scope(self._TAG, self._ctx, self.corpus_path_to_adapter_path.__name__): if not corpus_path: logger.error( self._ctx, self._TAG, StorageManager.corpus_path_to_adapter_path.__name__, None, CdmLogCode.ERR_STORAGE_NULL_CORPUS_PATH) return None result = None # Break the corpus path into namespace and ... path path_tuple = StorageUtils.split_namespace_path(corpus_path) if not path_tuple: logger.error(self._ctx, self._TAG, self.corpus_path_to_adapter_path.__name__, None, CdmLogCode.ERR_STORAGE_NULL_CORPUS_PATH) return None namespace = path_tuple[0] or self.default_namespace # Get the adapter registered for this namespace namespace_adapter = self.fetch_adapter(namespace) if not namespace_adapter: logger.error( self._ctx, self._TAG, StorageManager.corpus_path_to_adapter_path.__name__, None, CdmLogCode.ERR_STORAGE_NAMESPACE_NOT_REGISTERED, namespace) else: # Ask the storage adapter to 'adapt' this path result = namespace_adapter.create_adapter_path(path_tuple[1]) return result
def format_corpus_path(corpus_path: str) -> str: path_tuple = StorageUtils.split_namespace_path(corpus_path) if path_tuple == None: return None corpus_path = path_tuple[1] if len(corpus_path) > 0 and corpus_path[0] != '/': corpus_path = '/{}'.format(corpus_path) return corpus_path
async def _load_folder_or_document_internal( self, object_path: str, force_reload: Optional[bool] = False, res_opt: Optional['ResolveOptions'] = None ) -> Optional['CdmContainerDefinition']: """Loads a folder or document given its corpus path.""" if not object_path: return None # first check for namespace path_tuple = StorageUtils.split_namespace_path(object_path) if not path_tuple: logger.error(self._corpus.ctx, self._TAG, self._load_folder_or_document.__name__, object_path, CdmLogCode.ERR_PATH_NULL_OBJECT_PATH) return None namespace = path_tuple[0] or self._corpus.storage.default_namespace object_path = path_tuple[1] if not object_path.startswith('/'): return None namespace_folder = self._corpus.storage.fetch_root_folder(namespace) namespace_adapter = self._corpus.storage.fetch_adapter(namespace) if not namespace_folder or not namespace_adapter: logger.error(self._corpus.ctx, self._TAG, self._load_folder_or_document.__name__, object_path, CdmLogCode.ERR_STORAGE_NAMESPACE_NOT_REGISTERED, namespace) return None last_folder = namespace_folder._fetch_child_folder_from_path( object_path, False) # don't create new folders, just go as far as possible if not last_folder: return None # maybe the search is for a folder? last_path = last_folder._folder_path if last_path == object_path: return last_folder # remove path to folder and then look in the folder object_path = object_path[len(last_path):] self._concurrent_read_lock.acquire() # During this step the document will be added to the pathLookup when it is added to a folder. doc = await last_folder._fetch_document_from_folder_path_async( object_path, force_reload, res_opt) self._concurrent_read_lock.release() return doc
def create_adapter_path(self, corpus_path: str) -> str: path_tuple = StorageUtils.split_namespace_path(corpus_path) if not path_tuple: return None corpus_path = path_tuple[1] corpus_path = corpus_path.lstrip('\\/') return os.path.normpath(os.path.join(self._full_root, corpus_path))
def test_split_namespace_path(self): """Test split_namespace_path function on different paths""" self.assertIsNone(StorageUtils.split_namespace_path(None)) path_tuple_1 = StorageUtils.split_namespace_path('local:/some/path') self.assertIsNotNone(path_tuple_1) self.assertEqual('local', path_tuple_1[0]) self.assertEqual('/some/path', path_tuple_1[1]) path_tuple_2 = StorageUtils.split_namespace_path('/some/path') self.assertIsNotNone(path_tuple_2) self.assertEqual('', path_tuple_2[0]) self.assertEqual('/some/path', path_tuple_2[1]) path_tuple_3 = StorageUtils.split_namespace_path('adls:/some/path:with:colons') self.assertIsNotNone(path_tuple_3) self.assertEqual('adls', path_tuple_3[0]) self.assertEqual('/some/path:with:colons', path_tuple_3[1])
def corpus_path_to_syms_path(corpus_path: str, strg_mgr: 'StorageManager')-> str: path_tuple = StorageUtils.split_namespace_path(corpus_path) if path_tuple[0] != '': adls_path = strg_mgr.corpus_path_to_adapter_path(corpus_path) if adls_path is not None: syms_path = adls_adapter_path_to_syms_path(adls_path) if syms_path is not None: return syms_path return None
def _format_corpus_path(self, corpus_path: str) -> Optional[str]: path_tuple = StorageUtils.split_namespace_path(corpus_path) if not path_tuple: return None corpus_path = path_tuple[1] if corpus_path and corpus_path[0] != '/': corpus_path = '/' + corpus_path return corpus_path
async def to_data_async(instance: CdmLocalEntityDeclarationDefinition, manifest: CdmManifestDefinition, syms_root_path: str, res_opt: 'ResolveOptions', options: 'CopyOptions') -> TableEntity: table_entity = await DocumentPersistence.to_data_async( instance.entity_path, manifest, instance.ctx, res_opt, options) if table_entity is not None: te_properties = table_entity.properties properties = LocalEntityDeclarationPersistence.create_table_propertybags( instance, res_opt, options, te_properties.properties) if instance.data_partitions is not None and len( instance.data_partitions) > 0: paths = [] for element in instance.data_partitions: if element.location is not None: adls_path = instance.ctx.corpus.storage.corpus_path_to_adapter_path( element.location) location = element.location if adls_path == None: logger.error( instance.ctx, _TAG, 'to_data_async', instance.at_corpus_path, CdmLogCode. ERR_PERSIST_SYMS_ADLS_ADAPTER_MISSING, element.location) return None syms_path = utils.adls_adapter_path_to_syms_path( adls_path) if syms_path is not None: location = syms_path else: path_tuple = StorageUtils.split_namespace_path( element.location) location = utils.create_syms_absolute_path( syms_root_path, path_tuple[1]) paths.append(location) te_properties.storage_descriptor = DataPartitionPersistence.to_data( element, te_properties.storage_descriptor, res_opt, options) # Logic to find common root folder. source = DataSource(''.join(c[0] for c in takewhile( lambda x: all(x[0] == y for y in x), zip(*paths)))) te_properties.storage_descriptor.source = source else: # location and format is mandatory for syms. source = DataSource( utils.create_syms_absolute_path(syms_root_path, instance.entity_name)) te_properties.storage_descriptor.source = source te_properties.properties = properties return table_entity
def create_absolute_corpus_path(self, object_path: str, obj: 'CdmObject' = None) -> Optional[str]: """Takes a corpus path (relative or absolute) and creates a valid absolute path with namespace""" if not object_path: logger.error(self._TAG, self._ctx, 'The namespace cannot be null or empty.', StorageManager.create_absolute_corpus_path.__name__) return None if self._contains_unsupported_path_format(object_path): # Already called status_rpt when checking for unsupported path format. return None path_tuple = StorageUtils.split_namespace_path(object_path) if not path_tuple: logger.error(self._TAG, self._ctx, 'The object path cannot be null or empty.', self.create_absolute_corpus_path.__name__) return None final_namespace = '' prefix = None namespace_from_obj = None if obj and hasattr(obj, 'namespace') and hasattr(obj, 'folder_path'): prefix = obj.folder_path namespace_from_obj = obj.namespace elif obj: prefix = obj.in_document.folder_path namespace_from_obj = obj.in_document.namespace if prefix and self._contains_unsupported_path_format(prefix): # Already called status_rpt when checking for unsupported path format. return None if prefix and prefix[-1] != '/': logger.warning(self._TAG, self._ctx, 'Expected path prefix to end in /, but it didn\'t. Appended the /', prefix) prefix += '/' # check if this is a relative path if path_tuple[1][0] != '/': if not obj: # relative path and no other info given, assume default and root prefix = '/' if path_tuple[0] and path_tuple[0] != namespace_from_obj: logger.warning(self._TAG, self._ctx, 'The namespace "{}" found on the path does not match the namespace found on the object'.format(path_tuple[0])) return None path_tuple = (path_tuple[0], prefix + path_tuple[1]) final_namespace = namespace_from_obj or path_tuple[0] or self.default_namespace else: final_namespace = path_tuple[0] or namespace_from_obj or self.default_namespace return '{}:{}'.format(final_namespace, path_tuple[1]) if final_namespace else path_tuple[1]
async def file_status_check_async(self) -> None: """Check the modified time for this object and any children.""" with logger._enter_scope(self._TAG, self.ctx, self.file_status_check_async.__name__): namespace = None adapter = None # make sure the root is a good full corpus path. root_cleaned = (self.root_location[:-1] if self.root_location and self.root_location.endswith('/') else self.root_location) or '' root_corpus = self.ctx.corpus.storage.create_absolute_corpus_path( root_cleaned, self.in_document) try: # Remove namespace from path path_tuple = StorageUtils.split_namespace_path(root_corpus) if not path_tuple: logger.error( self.ctx, self._TAG, CdmDataPartitionPatternDefinition. file_status_check_async.__name__, self.at_corpus_path, CdmLogCode.ERR_STORAGE_NULL_CORPUS_PATH) return namespace = path_tuple[0] adapter = self.ctx.corpus.storage.fetch_adapter(namespace) if adapter is None: logger.error( self.ctx, self._TAG, CdmDataPartitionPatternDefinition. file_status_check_async.__name__, self.at_corpus_path, CdmLogCode.ERR_DOC_ADAPTER_NOT_FOUND, self.in_document.name) # get a list of all corpus_paths under the root. file_info_list = await adapter.fetch_all_files_async( path_tuple[1]) except Exception as e: file_info_list = None logger.warning( self.ctx, self._TAG, CdmDataPartitionPatternDefinition. file_status_check_async.__name__, self.at_corpus_path, CdmLogCode.WARN_PARTITION_FILE_FETCH_FAILED, root_corpus, e) if file_info_list is not None and namespace is not None: # remove root of the search from the beginning of all paths so anything in the root is not found by regex. file_info_list = [(namespace + ':' + fi)[len(root_corpus):] for fi in file_info_list] if isinstance(self.owner, CdmLocalEntityDeclarationDefinition): local_ent_dec_def_owner = cast( 'CdmLocalEntityDeclarationDefinition', self.owner) # if both are present log warning and use glob pattern, otherwise use regularExpression if self.glob_pattern and not self.glob_pattern.isspace( ) and self.regular_expression and not self.regular_expression.isspace( ): logger.warning( self.ctx, self._TAG, CdmDataPartitionPatternDefinition. file_status_check_async.__name__, self.at_corpus_path, CdmLogCode.WARN_PARTITION_GLOB_AND_REGEX_PRESENT, self.glob_pattern, self.regular_expression) regular_expression = self.glob_pattern_to_regex( self.glob_pattern ) if self.glob_pattern and not self.glob_pattern.isspace( ) else self.regular_expression try: reg = regex.compile(regular_expression) except Exception as e: logger.error( self.ctx, self._TAG, CdmDataPartitionPatternDefinition. file_status_check_async.__name__, self.at_corpus_path, CdmLogCode.ERR_VALDN_INVALID_EXPRESSION, 'glob pattern' if self.glob_pattern and not self.glob_pattern.isspace() else 'regular expression', self.glob_pattern if self.glob_pattern and not self.glob_pattern.isspace() else self.regular_expression, e) if reg: # a set to check if the data partition exists data_partition_path_set = set() if local_ent_dec_def_owner.data_partitions is not None: for data_partition in local_ent_dec_def_owner.data_partitions: data_partition_location_full_path = self.ctx.corpus.storage.create_absolute_corpus_path( data_partition.location, self.in_document) data_partition_path_set.add( data_partition_location_full_path) incremental_partition_path_hash_set = set() if local_ent_dec_def_owner.data_partitions is not None: for incremental_partition in local_ent_dec_def_owner.incremental_partitions: incremental_partition_location_full_path = self.ctx.corpus.storage.create_absolute_corpus_path( incremental_partition.location, self.in_document) incremental_partition_path_hash_set.add( incremental_partition_location_full_path) for fi in file_info_list: m = reg.fullmatch(fi) if m: # create a map of arguments out of capture groups. args = defaultdict( list) # type: Dict[str, List[str]] i_param = 0 for i in range(1, reg.groups + 1): captures = m.captures(i) if captures and self.parameters and i_param < len( self.parameters): # to be consistent with other languages, if a capture group captures # multiple things, only use the last thing that was captured single_capture = captures[-1] current_param = self.parameters[ i_param] args[current_param].append( single_capture) i_param += 1 else: break # put the original but cleaned up root back onto the matched doc as the location stored in the partition. location_corpus_path = root_cleaned + fi full_path = root_corpus + fi # Remove namespace from path path_tuple = StorageUtils.split_namespace_path( full_path) if not path_tuple: logger.error( self.ctx, self._TAG, CdmDataPartitionPatternDefinition. file_status_check_async.__name__, self.at_corpus_path, CdmLogCode. ERR_STORAGE_NULL_CORPUS_PATH) return last_modified_time = await adapter.compute_last_modified_time_async( path_tuple[1]) if self.is_incremental and full_path not in incremental_partition_path_hash_set: local_ent_dec_def_owner._create_partition_from_pattern( location_corpus_path, self.exhibits_traits, args, self.specialized_schema, last_modified_time, True, self.name) incremental_partition_path_hash_set.add( full_path) if not self.is_incremental and full_path not in data_partition_path_set: local_ent_dec_def_owner._create_partition_from_pattern( location_corpus_path, self.exhibits_traits, args, self.specialized_schema, last_modified_time) data_partition_path_set.add(full_path) # update modified times. self.last_file_status_check_time = datetime.now(timezone.utc)
async def _save_document_as_async(self, doc: 'CdmDocumentDefinition', options: 'CopyOptions', new_name: str, save_referenced: bool) -> bool: """a manifest or document can be saved with a new or exisitng name. This function on the corpus does all the actual work because the corpus knows about persistence types and about the storage adapters if saved with the same name, then consider this document 'clean' from changes. if saved with a back compat model or to a different name, then the source object is still 'dirty' an option will cause us to also save any linked documents.""" # find out if the storage adapter is able to write. namespace = StorageUtils.split_namespace_path(new_name)[0] if not namespace: namespace = doc._namespace if not namespace: namespace = self._corpus.storage.default_namespace adapter = self._corpus.storage.fetch_adapter(namespace) if adapter is None: logger.error( self._ctx, self._TAG, self._save_document_as_async.__name__, doc.at_corpus_path, CdmLogCode.ERR_PERSIST_ADAPTER_NOT_FOUND_FOR_NAMESPACE, namespace) return False if not adapter.can_write(): logger.error(self._ctx, self._TAG, self._save_document_as_async.__name__, doc.at_corpus_path, CdmLogCode.ERR_PERSIST_ADAPTER_WRITE_FAILURE, namespace) return False if not new_name: logger.error(self._ctx, self._TAG, self._save_document_as_async.__name__, doc.at_corpus_path, CdmLogCode.ERR_PERSIST_NULL_DOC_NAME) return None # what kind of document is requested? persistence_type = '' from cdm.persistence.syms import utils if utils.check_if_syms_adapter(adapter): if new_name == self.SYMS_DATABASES: logger.error(self._ctx, self._TAG, self._save_document_as_async.__name__, doc.at_corpus_path, CdmLogCode.ERR_PERSIST_SYMS_UNSUPPORTED_MANIFEST, new_name) return False elif not new_name.lower().endswith( self.MANIFEST_EXTENSION) and new_name.lower().endswith( self.CDM_EXTENSION): logger.error( self._ctx, self._TAG, self._save_document_as_async.__name__, doc.at_corpus_path, CdmLogCode.ERR_PERSIST_SYMS_UNSUPPORTED_CDM_CONVERSION, new_name) return False persistence_type = self.SYMS options.persistence_type_name = self.SYMS else: if new_name.lower().endswith(self.MODEL_JSON_EXTENSION): persistence_type = self.MODEL_JSON else: persistence_type = self.CDM_FOLDER if persistence_type == self.MODEL_JSON and new_name.lower( ) != self.MODEL_JSON_EXTENSION: logger.error(self._ctx, self._TAG, self._save_document_as_async.__name__, doc.at_corpus_path, CdmLogCode.ERR_PERSIST_FAILURE, new_name, self.MODEL_JSON_EXTENSION) return False # save the object into a json blob res_opt = { 'wrt_doc': doc, 'directives': AttributeResolutionDirectiveSet() } persisted_doc = None try: if new_name.lower().endswith( PersistenceLayer.MODEL_JSON_EXTENSION) or new_name.lower( ).endswith( PersistenceLayer.MANIFEST_EXTENSION) or new_name.lower( ).endswith(PersistenceLayer.FOLIO_EXTENSION): if persistence_type == self.CDM_FOLDER: from cdm.persistence.cdmfolder import ManifestPersistence persisted_doc = ManifestPersistence.to_data( doc, res_opt, options) elif persistence_type == self.SYMS: from cdm.persistence.syms.manifest_persistence import ManifestPersistence persisted_doc = await ManifestPersistence.convert_manifest_to_syms( doc, adapter, new_name, res_opt, options) else: if new_name != self.MODEL_JSON_EXTENSION: logger.error(self._ctx, self._TAG, self._save_document_as_async.__name__, doc.at_corpus_path, CdmLogCode.ERR_PERSIST_FAILURE, new_name) return False from cdm.persistence.modeljson import ManifestPersistence persisted_doc = await ManifestPersistence.to_data( doc, res_opt, options) elif new_name.lower().endswith(PersistenceLayer.CDM_EXTENSION): if persistence_type == self.CDM_FOLDER: from cdm.persistence.cdmfolder import DocumentPersistence persisted_doc = DocumentPersistence.to_data( doc, res_opt, options) elif persistence_type == self.SYMS: from cdm.persistence.syms.document_persistence import DocumentPersistence persisted_doc = await DocumentPersistence.convert_doc_to_syms_table( self._ctx, doc, adapter, new_name, res_opt, options) else: # Could not find a registered persistence class to handle this document type. logger.error(self._ctx, self._TAG, self._save_document_as_async.__name__, doc.at_corpus_path, CdmLogCode.ERR_PERSIST_CLASS_MISSING, new_name) return False except Exception as e: logger.error(self._ctx, self._TAG, self._save_document_as_async.__name__, doc.at_corpus_path, CdmLogCode.ERR_PERSIST_FILE_PERSIST_ERROR, new_name, e) return False if not persisted_doc: logger.error(self._ctx, self._TAG, self._save_document_as_async.__name__, doc.at_corpus_path, CdmLogCode.ERR_PERSIST_FILE_PERSIST_FAILED, new_name) return False # turn the name into a path new_path = '{}{}'.format(doc._folder_path, new_name) new_path = self._ctx.corpus.storage.create_absolute_corpus_path( new_path, doc) if new_path.startswith(namespace + ':'): new_path = new_path[len(namespace) + 1:] # ask the adapter to make it happen try: if persistence_type == self.SYMS: from cdm.persistence.syms import utils if new_name.lower().endswith(self.MANIFEST_EXTENSION): await utils.create_or_update_syms_entities( persisted_doc, adapter) elif new_name.lower().endswith(self.CDM_EXTENSION): await utils.create_or_update_table_entity( persisted_doc, adapter) else: content = persisted_doc.encode() await adapter.write_async(new_path, content) doc._file_system_modified_time = await adapter.compute_last_modified_time_async( new_path) # Write the adapter's config. if options._is_top_level_document and persistence_type != self.SYMS: await self._corpus.storage.save_adapters_config_async( '/config.json', adapter) # The next document won't be top level, so reset the flag. options._is_top_level_document = False except Exception as e: logger.error(self._ctx, self._TAG, self._save_document_as_async.__name__, doc.at_corpus_path, CdmLogCode.ERR_PERSIST_FILE_WRITE_FAILURE, new_name, e) return False # if we also want to save referenced docs, then it depends on what kind of thing just got saved # if a model.json there are none. If a manifest or definition doc then ask the docs to do the right things # definition will save imports, manifests will save imports, schemas, sub manifests if save_referenced and persistence_type == self.CDM_FOLDER: saved_linked_docs = await doc._save_linked_documents_async(options) if not saved_linked_docs: logger.error(self._ctx, self._TAG, self._save_document_as_async.__name__, doc.at_corpus_path, CdmLogCode.ERR_PERSIST_SAVE_LINK_DOCS, new_name) return False return True
def create_absolute_corpus_path(self, object_path: str, obj: 'CdmObject' = None) -> Optional[str]: """Takes a corpus path (relative or absolute) and creates a valid absolute path with namespace""" with logger._enter_scope(self._TAG, self._ctx, self.create_absolute_corpus_path.__name__): if not object_path: logger.error( self._ctx, self._TAG, StorageManager.create_absolute_corpus_path.__name__, None, CdmLogCode.ERR_PATH_NULL_OBJECT_PATH) return None if self._contains_unsupported_path_format(object_path): # Already called status_rpt when checking for unsupported path format. return None path_tuple = StorageUtils.split_namespace_path(object_path) if not path_tuple: logger.error( self._ctx, self._TAG, StorageManager.create_absolute_corpus_path.__name__, None, CdmLogCode.ERR_PATH_NULL_OBJECT_PATH) return None final_namespace = '' prefix = None namespace_from_obj = None if obj and hasattr(obj, 'namespace') and hasattr( obj, 'folder_path'): prefix = obj.folder_path namespace_from_obj = obj.namespace elif obj and obj.in_document: prefix = obj.in_document.folder_path namespace_from_obj = obj.in_document.namespace if prefix and self._contains_unsupported_path_format(prefix): # Already called status_rpt when checking for unsupported path format. return None if prefix and prefix[-1] != '/': logger.warning( self._ctx, self._TAG, StorageManager.create_absolute_corpus_path.__name__, None, CdmLogCode.WARN_STORAGE_EXPECTED_PATH_PREFIX, prefix) prefix += '/' # check if this is a relative path if path_tuple[1][0] != '/': if not obj: # relative path and no other info given, assume default and root prefix = '/' if path_tuple[0] and path_tuple[0] != namespace_from_obj: logger.error( self._ctx, self._TAG, StorageManager.create_absolute_corpus_path.__name__, None, CdmLogCode.ERR_STORAGE_NAMESPACE_MISMATCH, path_tuple[0]) return None path_tuple = (path_tuple[0], prefix + path_tuple[1]) final_namespace = namespace_from_obj or path_tuple[ 0] or self.default_namespace else: final_namespace = path_tuple[ 0] or namespace_from_obj or self.default_namespace return '{}:{}'.format( final_namespace, path_tuple[1]) if final_namespace else path_tuple[1]
async def file_status_check_async(self) -> None: """Check the modified time for this object and any children.""" namespace = self.in_document.namespace adapter = self.ctx.corpus.storage.fetch_adapter(namespace) if adapter is None: logger.error( self._TAG, self.ctx, 'Adapter not found for the document {}'.format( self.in_document.name), self.file_status_check_async.__name__) # make sure the root is a good full corpus path. root_cleaned = (self.root_location or '') root_corpus = self.ctx.corpus.storage.create_absolute_corpus_path( root_cleaned, self.in_document) try: # Remove namespace from path path_tuple = StorageUtils.split_namespace_path(root_corpus) if not path_tuple: logger.error( self._TAG, self.ctx, 'The root corpus path should not be null or empty.', self.file_status_check_async.__name__) return # get a list of all corpus_paths under the root. file_info_list = await adapter.fetch_all_files_async(path_tuple[1]) except Exception as e: logger.warning( self._TAG, self.ctx, 'The folder location \'{}\' described by a partition pattern does not exist' .format(root_corpus), self.file_status_check_async.__name__) if file_info_list is not None: # remove root of the search from the beginning of all paths so anything in the root is not found by regex. file_info_list = [(namespace + ':' + fi)[len(root_corpus):] for fi in file_info_list] if isinstance(self.owner, CdmLocalEntityDeclarationDefinition): # if both are present log warning and use glob pattern, otherwise use regularExpression if self.glob_pattern and not self.glob_pattern.isspace( ) and self.regular_expression and not self.regular_expression.isspace( ): logger.warning( self._TAG, self.ctx, 'The Data Partition Pattern contains both a glob pattern ({}) and a regular expression ({}) set, the glob pattern will be used.' .format(self.glob_pattern, self.regular_expression), self.file_status_check_async.__name__) regular_expression = self.glob_pattern_to_regex( self.glob_pattern ) if self.glob_pattern and not self.glob_pattern.isspace( ) else self.regular_expression try: reg = regex.compile(regular_expression) except Exception as e: logger.error( self._TAG, self.ctx, 'The {} \'{}\' could not form a valid regular expression. Reason: {}' .format( 'glob pattern' if self.glob_pattern and not self.glob_pattern.isspace() else 'regular expression', self.glob_pattern if self.glob_pattern and not self.glob_pattern.isspace() else self.regular_expression, e), self.file_status_check_async.__name__) if reg: for fi in file_info_list: m = reg.fullmatch(fi) if m: # create a map of arguments out of capture groups. args = defaultdict( list) # type: Dict[str, List[str]] i_param = 0 for i in range(1, reg.groups + 1): captures = m.captures(i) if captures and self.parameters and i_param < len( self.parameters): # to be consistent with other languages, if a capture group captures # multiple things, only use the last thing that was captured single_capture = captures[-1] current_param = self.parameters[i_param] args[current_param].append(single_capture) i_param += 1 else: break # put the original but cleaned up root back onto the matched doc as the location stored in the partition. location_corpus_path = root_cleaned + fi # Remove namespace from path path_tuple = StorageUtils.split_namespace_path( location_corpus_path) if not path_tuple: logger.error( self._TAG, self.ctx, 'The corpus path should not be null or empty.', self.file_status_check_async.__name__) return last_modified_time = await adapter.compute_last_modified_time_async( adapter.create_adapter_path(path_tuple[1])) cast('CdmLocalEntityDeclarationDefinition', self.owner)._create_partition_from_pattern( location_corpus_path, self.exhibits_traits, args, self.specialized_schema, last_modified_time) # update modified times. self.last_file_status_check_time = datetime.now(timezone.utc)
def from_object(ctx: CdmCorpusContext, docname: str, namespace: str, path: str, data_obj: 'SymsManifestContent') -> 'CdmManifestDefinition': database = data_obj.database if database == None or database.type != SASEntityType.database: logger.error(ctx, _TAG, 'from_object', None, CdmLogCode.ERR_PERSIST_SYMS_INVALID_DB_OBJECT) return None database_properties = DatabaseProperties.deserialize(database.properties) source = DataSource(None).deserialize(database_properties.source) if database_properties == None or source == None: logger.error(ctx, _TAG, 'from_object', None, CdmLogCode.ERR_PERSIST_SYMS_INVALID_DB_PROP_OBJECT) return None properties = database_properties.properties manifest = ctx.corpus.make_object(CdmObjectType.MANIFEST_DEF) manifest._folder_path = path manifest._namespace = namespace manifest.manifest_name = data_obj.database.name manifest.name = docname manifest.explanation = database_properties.description if properties is not None: if "cdm:schema" in properties: manifest.schema = properties["cdm:schema"] if "cdm:jsonSchemaSemanticVersion" in properties: manifest.json_schema_semantic_version = properties["cdm:jsonSchemaSemanticVersion"] if "cdm:documentVersion" in properties: manifest.DocumentVersion = properties["cdm:documentVersion"] if "cdm:traits" in properties: utils.add_list_to_cdm_collection(manifest.exhibits_traits, utils.create_trait_reference_array(ctx, properties["cdm:traits"])) if "cdm:imports" in properties: for import_obj in properties["cdm:imports"]: manifest.imports.append(ImportPersistence.from_data(ctx, json.loads(import_obj))) if "cdm:lastFileStatusCheckTime" in properties: manifest.last_file_status_check_time = dateutil.parser.parse(properties["cdm:lastFileStatusCheckTime"]) if "cdm:lastFileModifiedTime" in properties: manifest.last_file_modified_time = dateutil.parser.parse(properties["cdm:lastFileModifiedTime"]) if "cdm:lastChildFileModifiedTime" in properties: manifest.last_child_file_modified_time = dateutil.parser.parse(properties["cdm:lastChildFileModifiedTime"]) t2pm = TraitToPropertyMap(manifest) source_trait = t2pm._fetch_trait_reference(ManifestPersistence.db_location_trait) if source_trait == None: source_trait = utils.create_source_trait(ctx, ManifestPersistence.db_location_trait, ManifestPersistence.db_location_trait_arg_name) manifest.exhibits_traits.append(source_trait) adls_path = utils.syms_path_to_adls_adapter_path(source.location) adls_corpus_path = ctx.corpus.storage.adapter_path_to_corpus_path(adls_path) if not adls_corpus_path: path_tuple = StorageUtils.split_namespace_path(source_trait.arguments[0].value) obj = utils.create_and_mount_adls_adapter_from_adls_path(ctx.corpus.storage, adls_path, path_tuple[0]) if obj == None: logger.error(ctx, _TAG, 'from_object', None, CdmLogCode.ERR_PERSIST_SYMS_ADLS_ADAPTER_NOT_MOUNTED, adls_path) return None if data_obj.entities is not None: for item in data_obj.entities.items: entity_obj = TableEntity(None, None).deserialize(item) if entity_obj.type == SASEntityType.table: entity = LocalEntityDeclarationPersistence.from_data(ctx, entity_obj, manifest, database_properties.source.location) if entity is not None: manifest.entities.append(entity) else: logger.warning(ctx, _TAG, 'from_object', None, CdmLogCode.WARN_PERSIST_SYMS_ENTITY_SKIPPED, entity_obj.name) if not (x for x in manifest.imports if x.corpus_path == Constants._FOUNDATIONS_CORPUS_PATH) or len(manifest.imports) == 0: manifest.imports.append(Constants._FOUNDATIONS_CORPUS_PATH) if data_obj.relationships is not None: for item in data_obj.relationships.items: relationship_entity = RelationshipEntity(None, None).deserialize(item) manifest.relationships.extend(E2ERelationshipPersistence.from_data(ctx, relationship_entity)) # TODO: Submanifest return manifest