Example #1
0
    def corpus_path_to_adapter_path(self, corpus_path: str) -> Optional[str]:
        """Takes a corpus path, figures out the right adapter to use and then return
        an adapter domain path"""
        if not corpus_path:
            logger.error(self._TAG, self._ctx,
                         'The corpus path is null or empty.',
                         StorageManager.corpus_path_to_adapter_path.__name__)
            return None

        result = None

        # Break the corpus path into namespace and ... path
        path_tuple = StorageUtils.split_namespace_path(corpus_path)
        if not path_tuple:
            logger.error(self._TAG, self._ctx,
                         'The corpus path cannot be null or empty.',
                         self.corpus_path_to_adapter_path.__name__)
            return None
        namespace = path_tuple[0] or self.default_namespace

        # Get the adapter registered for this namespace
        namespace_adapter = self.fetch_adapter(namespace)
        if not namespace_adapter:
            logger.error(
                self._TAG, self._ctx,
                'The namespace cannot be null or empty.'.format(namespace),
                StorageManager.corpus_path_to_adapter_path.__name__)
        else:
            # Ask the storage adapter to 'adapt' this path
            result = namespace_adapter.create_adapter_path(path_tuple[1])

        return result
Example #2
0
    def create_table_propertybags(instance: CdmLocalEntityDeclarationDefinition, res_opt: ResolveOptions, options: CopyOptions, properties):
        if properties == None:
            properties = {}

        if instance.entity_path is not None:
            path_tuple = StorageUtils.split_namespace_path(instance.entity_path)
            if path_tuple == None:
                logger.error(instance.ctx, _TAG, LocalEntityDeclarationPersistence.create_table_propertybags.__name__, instance.at_corpus_path, CdmLogCode.ERR_PERSIST_SYMS_ENTITY_PATH_NULL, instance.entity_name)
                return None
            properties['cdm:entityPath'] = path_tuple[1]

        t2pm = TraitToPropertyMap(instance)
        is_hidden_trait = t2pm._fetch_trait_reference('is.hidden')

        if not 'cdm:description' in properties:
            properties['cdm:description'] = instance.explanation

        if instance.last_child_file_modified_time is not None:
            properties['cdm:lastChildFileModifiedTime'] = instance.last_child_file_modified_time

        if instance.last_file_modified_time is not None:
            properties['cdm:lastFileModifiedTime'] = instance.last_file_modified_time

        if instance.last_file_status_check_time is not None:
            properties['cdm:lastFileStatusCheckTime'] = instance.last_file_status_check_time

        if is_hidden_trait is not None:
            properties['cdm:isHidden'] = True

        if instance.exhibits_traits is not None and len(instance.exhibits_traits) > 0:
            properties['cdm:entityDecTraits'] = copy_data_utils._array_copy_data(res_opt, instance.exhibits_traits, options)
           
        return properties
Example #3
0
    async def _save_odi_documents(self, doc: Any, adapter: 'StorageAdapter',
                                  new_name: str) -> None:
        if doc is None:
            raise Exception('Failed to persist document because doc is null.')

        # ask the adapter to make it happen.
        try:
            old_document_path = doc.documentPath
            new_document_path = old_document_path[
                0:len(old_document_path) - len(self.ODI_EXTENSION)] + new_name
            # Remove namespace from path
            path_tuple = StorageUtils.split_namespace_path(new_document_path)
            if not path_tuple:
                logger.error(self._TAG, self._ctx,
                             'The object path cannot be null or empty.',
                             self._save_odi_documents.__name__)
                return
            content = doc.encode()
            await adapter.write_async(path_tuple[1], content)
        except Exception as e:
            logger.error(
                self._TAG, self._ctx,
                'Failed to write to the file \'{}\' for reason {}.'.format(
                    doc.documentPath, e), self._save_odi_documents.__name__)

        # Save linked documents.
        if doc.get('linkedDocuments') is not None:
            for linked_doc in doc.linkedDocuments:
                await self._save_odi_documents(linked_doc, adapter, new_name)
Example #4
0
    def corpus_path_to_adapter_path(self, corpus_path: str) -> Optional[str]:
        """Takes a corpus path, figures out the right adapter to use and then return
        an adapter domain path"""
        with logger._enter_scope(self._TAG, self._ctx,
                                 self.corpus_path_to_adapter_path.__name__):
            if not corpus_path:
                logger.error(
                    self._ctx, self._TAG,
                    StorageManager.corpus_path_to_adapter_path.__name__, None,
                    CdmLogCode.ERR_STORAGE_NULL_CORPUS_PATH)
                return None

            result = None

            # Break the corpus path into namespace and ... path
            path_tuple = StorageUtils.split_namespace_path(corpus_path)
            if not path_tuple:
                logger.error(self._ctx, self._TAG,
                             self.corpus_path_to_adapter_path.__name__, None,
                             CdmLogCode.ERR_STORAGE_NULL_CORPUS_PATH)
                return None
            namespace = path_tuple[0] or self.default_namespace

            # Get the adapter registered for this namespace
            namespace_adapter = self.fetch_adapter(namespace)
            if not namespace_adapter:
                logger.error(
                    self._ctx, self._TAG,
                    StorageManager.corpus_path_to_adapter_path.__name__, None,
                    CdmLogCode.ERR_STORAGE_NAMESPACE_NOT_REGISTERED, namespace)
            else:
                # Ask the storage adapter to 'adapt' this path
                result = namespace_adapter.create_adapter_path(path_tuple[1])

            return result
Example #5
0
def format_corpus_path(corpus_path: str) -> str:
    path_tuple = StorageUtils.split_namespace_path(corpus_path)
    if path_tuple == None:
        return None
    corpus_path = path_tuple[1]
    if len(corpus_path) > 0 and corpus_path[0] != '/':
        corpus_path = '/{}'.format(corpus_path)
    return corpus_path
Example #6
0
    async def _load_folder_or_document_internal(
        self,
        object_path: str,
        force_reload: Optional[bool] = False,
        res_opt: Optional['ResolveOptions'] = None
    ) -> Optional['CdmContainerDefinition']:
        """Loads a folder or document given its corpus path."""

        if not object_path:
            return None

        # first check for namespace
        path_tuple = StorageUtils.split_namespace_path(object_path)
        if not path_tuple:
            logger.error(self._corpus.ctx, self._TAG,
                         self._load_folder_or_document.__name__, object_path,
                         CdmLogCode.ERR_PATH_NULL_OBJECT_PATH)
            return None
        namespace = path_tuple[0] or self._corpus.storage.default_namespace
        object_path = path_tuple[1]

        if not object_path.startswith('/'):
            return None

        namespace_folder = self._corpus.storage.fetch_root_folder(namespace)
        namespace_adapter = self._corpus.storage.fetch_adapter(namespace)
        if not namespace_folder or not namespace_adapter:
            logger.error(self._corpus.ctx, self._TAG,
                         self._load_folder_or_document.__name__, object_path,
                         CdmLogCode.ERR_STORAGE_NAMESPACE_NOT_REGISTERED,
                         namespace)
            return None

        last_folder = namespace_folder._fetch_child_folder_from_path(
            object_path, False)

        # don't create new folders, just go as far as possible
        if not last_folder:
            return None

        # maybe the search is for a folder?
        last_path = last_folder._folder_path
        if last_path == object_path:
            return last_folder

        # remove path to folder and then look in the folder
        object_path = object_path[len(last_path):]

        self._concurrent_read_lock.acquire()

        # During this step the document will be added to the pathLookup when it is added to a folder.
        doc = await last_folder._fetch_document_from_folder_path_async(
            object_path, force_reload, res_opt)

        self._concurrent_read_lock.release()

        return doc
Example #7
0
    def create_adapter_path(self, corpus_path: str) -> str:
        path_tuple = StorageUtils.split_namespace_path(corpus_path)
        if not path_tuple:
            return None

        corpus_path = path_tuple[1]

        corpus_path = corpus_path.lstrip('\\/')
        return os.path.normpath(os.path.join(self._full_root, corpus_path))
    def test_split_namespace_path(self):
        """Test split_namespace_path function on different paths"""
        self.assertIsNone(StorageUtils.split_namespace_path(None))

        path_tuple_1 = StorageUtils.split_namespace_path('local:/some/path')
        self.assertIsNotNone(path_tuple_1)
        self.assertEqual('local', path_tuple_1[0])
        self.assertEqual('/some/path', path_tuple_1[1])

        path_tuple_2 = StorageUtils.split_namespace_path('/some/path')
        self.assertIsNotNone(path_tuple_2)
        self.assertEqual('', path_tuple_2[0])
        self.assertEqual('/some/path', path_tuple_2[1])

        path_tuple_3 = StorageUtils.split_namespace_path('adls:/some/path:with:colons')
        self.assertIsNotNone(path_tuple_3)
        self.assertEqual('adls', path_tuple_3[0])
        self.assertEqual('/some/path:with:colons', path_tuple_3[1])
Example #9
0
def corpus_path_to_syms_path(corpus_path: str, strg_mgr: 'StorageManager')-> str:
    path_tuple = StorageUtils.split_namespace_path(corpus_path)
    if path_tuple[0] != '':
        adls_path = strg_mgr.corpus_path_to_adapter_path(corpus_path)
        if adls_path is not None:
            syms_path = adls_adapter_path_to_syms_path(adls_path)
            if syms_path is not None:
                return syms_path
    return None
Example #10
0
    def _format_corpus_path(self, corpus_path: str) -> Optional[str]:
        path_tuple = StorageUtils.split_namespace_path(corpus_path)
        if not path_tuple:
            return None

        corpus_path = path_tuple[1]

        if corpus_path and corpus_path[0] != '/':
            corpus_path = '/' + corpus_path
        return corpus_path
    async def to_data_async(instance: CdmLocalEntityDeclarationDefinition,
                            manifest: CdmManifestDefinition,
                            syms_root_path: str, res_opt: 'ResolveOptions',
                            options: 'CopyOptions') -> TableEntity:
        table_entity = await DocumentPersistence.to_data_async(
            instance.entity_path, manifest, instance.ctx, res_opt, options)
        if table_entity is not None:
            te_properties = table_entity.properties
            properties = LocalEntityDeclarationPersistence.create_table_propertybags(
                instance, res_opt, options, te_properties.properties)
            if instance.data_partitions is not None and len(
                    instance.data_partitions) > 0:
                paths = []
                for element in instance.data_partitions:
                    if element.location is not None:
                        adls_path = instance.ctx.corpus.storage.corpus_path_to_adapter_path(
                            element.location)
                        location = element.location
                        if adls_path == None:
                            logger.error(
                                instance.ctx, _TAG, 'to_data_async',
                                instance.at_corpus_path, CdmLogCode.
                                ERR_PERSIST_SYMS_ADLS_ADAPTER_MISSING,
                                element.location)
                            return None
                        syms_path = utils.adls_adapter_path_to_syms_path(
                            adls_path)

                        if syms_path is not None:
                            location = syms_path
                        else:
                            path_tuple = StorageUtils.split_namespace_path(
                                element.location)
                            location = utils.create_syms_absolute_path(
                                syms_root_path, path_tuple[1])
                        paths.append(location)

                    te_properties.storage_descriptor = DataPartitionPersistence.to_data(
                        element, te_properties.storage_descriptor, res_opt,
                        options)
                # Logic to find common root folder.
                source = DataSource(''.join(c[0] for c in takewhile(
                    lambda x: all(x[0] == y for y in x), zip(*paths))))
                te_properties.storage_descriptor.source = source
            else:
                # location and format is mandatory for syms.
                source = DataSource(
                    utils.create_syms_absolute_path(syms_root_path,
                                                    instance.entity_name))
                te_properties.storage_descriptor.source = source
            te_properties.properties = properties

        return table_entity
    def create_absolute_corpus_path(self, object_path: str, obj: 'CdmObject' = None) -> Optional[str]:
        """Takes a corpus path (relative or absolute) and creates a valid absolute
        path with namespace"""
        if not object_path:
            logger.error(self._TAG, self._ctx, 'The namespace cannot be null or empty.', StorageManager.create_absolute_corpus_path.__name__)
            return None

        if self._contains_unsupported_path_format(object_path):
            # Already called status_rpt when checking for unsupported path format.
            return None

        path_tuple = StorageUtils.split_namespace_path(object_path)
        if not path_tuple:
            logger.error(self._TAG, self._ctx, 'The object path cannot be null or empty.', self.create_absolute_corpus_path.__name__)
            return None
        final_namespace = ''

        prefix = None
        namespace_from_obj = None

        if obj and hasattr(obj, 'namespace') and hasattr(obj, 'folder_path'):
            prefix = obj.folder_path
            namespace_from_obj = obj.namespace
        elif obj:
            prefix = obj.in_document.folder_path
            namespace_from_obj = obj.in_document.namespace

        if prefix and self._contains_unsupported_path_format(prefix):
            # Already called status_rpt when checking for unsupported path format.
            return None

        if prefix and prefix[-1] != '/':
            logger.warning(self._TAG, self._ctx, 'Expected path prefix to end in /, but it didn\'t. Appended the /', prefix)
            prefix += '/'

        # check if this is a relative path
        if path_tuple[1][0] != '/':
            if not obj:
                # relative path and no other info given, assume default and root
                prefix = '/'

            if path_tuple[0] and path_tuple[0] != namespace_from_obj:
                logger.warning(self._TAG, self._ctx,
                               'The namespace "{}" found on the path does not match the namespace found on the object'.format(path_tuple[0]))
                return None

            path_tuple = (path_tuple[0], prefix + path_tuple[1])
            final_namespace = namespace_from_obj or path_tuple[0] or self.default_namespace
        else:
            final_namespace = path_tuple[0] or namespace_from_obj or self.default_namespace

        return '{}:{}'.format(final_namespace, path_tuple[1]) if final_namespace else path_tuple[1]
    async def file_status_check_async(self) -> None:
        """Check the modified time for this object and any children."""
        with logger._enter_scope(self._TAG, self.ctx,
                                 self.file_status_check_async.__name__):
            namespace = None
            adapter = None

            # make sure the root is a good full corpus path.
            root_cleaned = (self.root_location[:-1] if self.root_location
                            and self.root_location.endswith('/') else
                            self.root_location) or ''
            root_corpus = self.ctx.corpus.storage.create_absolute_corpus_path(
                root_cleaned, self.in_document)

            try:
                # Remove namespace from path
                path_tuple = StorageUtils.split_namespace_path(root_corpus)
                if not path_tuple:
                    logger.error(
                        self.ctx, self._TAG, CdmDataPartitionPatternDefinition.
                        file_status_check_async.__name__, self.at_corpus_path,
                        CdmLogCode.ERR_STORAGE_NULL_CORPUS_PATH)
                    return

                namespace = path_tuple[0]
                adapter = self.ctx.corpus.storage.fetch_adapter(namespace)

                if adapter is None:
                    logger.error(
                        self.ctx, self._TAG, CdmDataPartitionPatternDefinition.
                        file_status_check_async.__name__, self.at_corpus_path,
                        CdmLogCode.ERR_DOC_ADAPTER_NOT_FOUND,
                        self.in_document.name)

                # get a list of all corpus_paths under the root.
                file_info_list = await adapter.fetch_all_files_async(
                    path_tuple[1])
            except Exception as e:
                file_info_list = None
                logger.warning(
                    self.ctx, self._TAG, CdmDataPartitionPatternDefinition.
                    file_status_check_async.__name__, self.at_corpus_path,
                    CdmLogCode.WARN_PARTITION_FILE_FETCH_FAILED, root_corpus,
                    e)

            if file_info_list is not None and namespace is not None:
                # remove root of the search from the beginning of all paths so anything in the root is not found by regex.
                file_info_list = [(namespace + ':' + fi)[len(root_corpus):]
                                  for fi in file_info_list]

                if isinstance(self.owner, CdmLocalEntityDeclarationDefinition):
                    local_ent_dec_def_owner = cast(
                        'CdmLocalEntityDeclarationDefinition', self.owner)
                    # if both are present log warning and use glob pattern, otherwise use regularExpression
                    if self.glob_pattern and not self.glob_pattern.isspace(
                    ) and self.regular_expression and not self.regular_expression.isspace(
                    ):
                        logger.warning(
                            self.ctx, self._TAG,
                            CdmDataPartitionPatternDefinition.
                            file_status_check_async.__name__,
                            self.at_corpus_path,
                            CdmLogCode.WARN_PARTITION_GLOB_AND_REGEX_PRESENT,
                            self.glob_pattern, self.regular_expression)
                    regular_expression = self.glob_pattern_to_regex(
                        self.glob_pattern
                    ) if self.glob_pattern and not self.glob_pattern.isspace(
                    ) else self.regular_expression

                    try:
                        reg = regex.compile(regular_expression)
                    except Exception as e:
                        logger.error(
                            self.ctx, self._TAG,
                            CdmDataPartitionPatternDefinition.
                            file_status_check_async.__name__,
                            self.at_corpus_path,
                            CdmLogCode.ERR_VALDN_INVALID_EXPRESSION,
                            'glob pattern' if self.glob_pattern
                            and not self.glob_pattern.isspace() else
                            'regular expression',
                            self.glob_pattern if self.glob_pattern
                            and not self.glob_pattern.isspace() else
                            self.regular_expression, e)

                    if reg:
                        # a set to check if the data partition exists
                        data_partition_path_set = set()
                        if local_ent_dec_def_owner.data_partitions is not None:
                            for data_partition in local_ent_dec_def_owner.data_partitions:
                                data_partition_location_full_path = self.ctx.corpus.storage.create_absolute_corpus_path(
                                    data_partition.location, self.in_document)
                                data_partition_path_set.add(
                                    data_partition_location_full_path)

                        incremental_partition_path_hash_set = set()
                        if local_ent_dec_def_owner.data_partitions is not None:
                            for incremental_partition in local_ent_dec_def_owner.incremental_partitions:
                                incremental_partition_location_full_path = self.ctx.corpus.storage.create_absolute_corpus_path(
                                    incremental_partition.location,
                                    self.in_document)
                                incremental_partition_path_hash_set.add(
                                    incremental_partition_location_full_path)

                        for fi in file_info_list:
                            m = reg.fullmatch(fi)
                            if m:
                                # create a map of arguments out of capture groups.
                                args = defaultdict(
                                    list)  # type: Dict[str, List[str]]
                                i_param = 0
                                for i in range(1, reg.groups + 1):
                                    captures = m.captures(i)
                                    if captures and self.parameters and i_param < len(
                                            self.parameters):
                                        # to be consistent with other languages, if a capture group captures
                                        # multiple things, only use the last thing that was captured
                                        single_capture = captures[-1]

                                        current_param = self.parameters[
                                            i_param]
                                        args[current_param].append(
                                            single_capture)
                                        i_param += 1
                                    else:
                                        break

                                # put the original but cleaned up root back onto the matched doc as the location stored in the partition.
                                location_corpus_path = root_cleaned + fi
                                full_path = root_corpus + fi
                                # Remove namespace from path
                                path_tuple = StorageUtils.split_namespace_path(
                                    full_path)
                                if not path_tuple:
                                    logger.error(
                                        self.ctx, self._TAG,
                                        CdmDataPartitionPatternDefinition.
                                        file_status_check_async.__name__,
                                        self.at_corpus_path, CdmLogCode.
                                        ERR_STORAGE_NULL_CORPUS_PATH)
                                    return
                                last_modified_time = await adapter.compute_last_modified_time_async(
                                    path_tuple[1])

                                if self.is_incremental and full_path not in incremental_partition_path_hash_set:
                                    local_ent_dec_def_owner._create_partition_from_pattern(
                                        location_corpus_path,
                                        self.exhibits_traits, args,
                                        self.specialized_schema,
                                        last_modified_time, True, self.name)
                                    incremental_partition_path_hash_set.add(
                                        full_path)

                                if not self.is_incremental and full_path not in data_partition_path_set:
                                    local_ent_dec_def_owner._create_partition_from_pattern(
                                        location_corpus_path,
                                        self.exhibits_traits, args,
                                        self.specialized_schema,
                                        last_modified_time)
                                    data_partition_path_set.add(full_path)

                    # update modified times.
            self.last_file_status_check_time = datetime.now(timezone.utc)
Example #14
0
    async def _save_document_as_async(self, doc: 'CdmDocumentDefinition',
                                      options: 'CopyOptions', new_name: str,
                                      save_referenced: bool) -> bool:
        """a manifest or document can be saved with a new or exisitng name. This function on the corpus does all the actual work
        because the corpus knows about persistence types and about the storage adapters
        if saved with the same name, then consider this document 'clean' from changes. if saved with a back compat model or
        to a different name, then the source object is still 'dirty'
        an option will cause us to also save any linked documents."""

        # find out if the storage adapter is able to write.
        namespace = StorageUtils.split_namespace_path(new_name)[0]
        if not namespace:
            namespace = doc._namespace
            if not namespace:
                namespace = self._corpus.storage.default_namespace

        adapter = self._corpus.storage.fetch_adapter(namespace)
        if adapter is None:
            logger.error(
                self._ctx, self._TAG, self._save_document_as_async.__name__,
                doc.at_corpus_path,
                CdmLogCode.ERR_PERSIST_ADAPTER_NOT_FOUND_FOR_NAMESPACE,
                namespace)
            return False
        if not adapter.can_write():
            logger.error(self._ctx, self._TAG,
                         self._save_document_as_async.__name__,
                         doc.at_corpus_path,
                         CdmLogCode.ERR_PERSIST_ADAPTER_WRITE_FAILURE,
                         namespace)
            return False

        if not new_name:
            logger.error(self._ctx, self._TAG,
                         self._save_document_as_async.__name__,
                         doc.at_corpus_path,
                         CdmLogCode.ERR_PERSIST_NULL_DOC_NAME)
            return None

        # what kind of document is requested?
        persistence_type = ''
        from cdm.persistence.syms import utils
        if utils.check_if_syms_adapter(adapter):
            if new_name == self.SYMS_DATABASES:
                logger.error(self._ctx, self._TAG,
                             self._save_document_as_async.__name__,
                             doc.at_corpus_path,
                             CdmLogCode.ERR_PERSIST_SYMS_UNSUPPORTED_MANIFEST,
                             new_name)
                return False
            elif not new_name.lower().endswith(
                    self.MANIFEST_EXTENSION) and new_name.lower().endswith(
                        self.CDM_EXTENSION):
                logger.error(
                    self._ctx, self._TAG,
                    self._save_document_as_async.__name__, doc.at_corpus_path,
                    CdmLogCode.ERR_PERSIST_SYMS_UNSUPPORTED_CDM_CONVERSION,
                    new_name)
                return False
            persistence_type = self.SYMS
            options.persistence_type_name = self.SYMS
        else:
            if new_name.lower().endswith(self.MODEL_JSON_EXTENSION):
                persistence_type = self.MODEL_JSON
            else:
                persistence_type = self.CDM_FOLDER

        if persistence_type == self.MODEL_JSON and new_name.lower(
        ) != self.MODEL_JSON_EXTENSION:
            logger.error(self._ctx, self._TAG,
                         self._save_document_as_async.__name__,
                         doc.at_corpus_path, CdmLogCode.ERR_PERSIST_FAILURE,
                         new_name, self.MODEL_JSON_EXTENSION)
            return False

        # save the object into a json blob
        res_opt = {
            'wrt_doc': doc,
            'directives': AttributeResolutionDirectiveSet()
        }
        persisted_doc = None

        try:
            if new_name.lower().endswith(
                    PersistenceLayer.MODEL_JSON_EXTENSION) or new_name.lower(
                    ).endswith(
                        PersistenceLayer.MANIFEST_EXTENSION) or new_name.lower(
                        ).endswith(PersistenceLayer.FOLIO_EXTENSION):
                if persistence_type == self.CDM_FOLDER:
                    from cdm.persistence.cdmfolder import ManifestPersistence
                    persisted_doc = ManifestPersistence.to_data(
                        doc, res_opt, options)
                elif persistence_type == self.SYMS:
                    from cdm.persistence.syms.manifest_persistence import ManifestPersistence
                    persisted_doc = await ManifestPersistence.convert_manifest_to_syms(
                        doc, adapter, new_name, res_opt, options)
                else:
                    if new_name != self.MODEL_JSON_EXTENSION:
                        logger.error(self._ctx, self._TAG,
                                     self._save_document_as_async.__name__,
                                     doc.at_corpus_path,
                                     CdmLogCode.ERR_PERSIST_FAILURE, new_name)
                        return False
                    from cdm.persistence.modeljson import ManifestPersistence
                    persisted_doc = await ManifestPersistence.to_data(
                        doc, res_opt, options)
            elif new_name.lower().endswith(PersistenceLayer.CDM_EXTENSION):
                if persistence_type == self.CDM_FOLDER:
                    from cdm.persistence.cdmfolder import DocumentPersistence
                    persisted_doc = DocumentPersistence.to_data(
                        doc, res_opt, options)
                elif persistence_type == self.SYMS:
                    from cdm.persistence.syms.document_persistence import DocumentPersistence
                    persisted_doc = await DocumentPersistence.convert_doc_to_syms_table(
                        self._ctx, doc, adapter, new_name, res_opt, options)
            else:
                # Could not find a registered persistence class to handle this document type.
                logger.error(self._ctx, self._TAG,
                             self._save_document_as_async.__name__,
                             doc.at_corpus_path,
                             CdmLogCode.ERR_PERSIST_CLASS_MISSING, new_name)
                return False
        except Exception as e:
            logger.error(self._ctx, self._TAG,
                         self._save_document_as_async.__name__,
                         doc.at_corpus_path,
                         CdmLogCode.ERR_PERSIST_FILE_PERSIST_ERROR, new_name,
                         e)
            return False

        if not persisted_doc:
            logger.error(self._ctx, self._TAG,
                         self._save_document_as_async.__name__,
                         doc.at_corpus_path,
                         CdmLogCode.ERR_PERSIST_FILE_PERSIST_FAILED, new_name)
            return False

        # turn the name into a path
        new_path = '{}{}'.format(doc._folder_path, new_name)
        new_path = self._ctx.corpus.storage.create_absolute_corpus_path(
            new_path, doc)
        if new_path.startswith(namespace + ':'):
            new_path = new_path[len(namespace) + 1:]

        # ask the adapter to make it happen
        try:
            if persistence_type == self.SYMS:
                from cdm.persistence.syms import utils
                if new_name.lower().endswith(self.MANIFEST_EXTENSION):
                    await utils.create_or_update_syms_entities(
                        persisted_doc, adapter)
                elif new_name.lower().endswith(self.CDM_EXTENSION):
                    await utils.create_or_update_table_entity(
                        persisted_doc, adapter)
            else:
                content = persisted_doc.encode()
                await adapter.write_async(new_path, content)

            doc._file_system_modified_time = await adapter.compute_last_modified_time_async(
                new_path)

            # Write the adapter's config.
            if options._is_top_level_document and persistence_type != self.SYMS:
                await self._corpus.storage.save_adapters_config_async(
                    '/config.json', adapter)

                # The next document won't be top level, so reset the flag.
                options._is_top_level_document = False
        except Exception as e:
            logger.error(self._ctx, self._TAG,
                         self._save_document_as_async.__name__,
                         doc.at_corpus_path,
                         CdmLogCode.ERR_PERSIST_FILE_WRITE_FAILURE, new_name,
                         e)
            return False

        # if we also want to save referenced docs, then it depends on what kind of thing just got saved
        # if a model.json there are none. If a manifest or definition doc then ask the docs to do the right things
        # definition will save imports, manifests will save imports, schemas, sub manifests
        if save_referenced and persistence_type == self.CDM_FOLDER:
            saved_linked_docs = await doc._save_linked_documents_async(options)
            if not saved_linked_docs:
                logger.error(self._ctx, self._TAG,
                             self._save_document_as_async.__name__,
                             doc.at_corpus_path,
                             CdmLogCode.ERR_PERSIST_SAVE_LINK_DOCS, new_name)
                return False
        return True
Example #15
0
    def create_absolute_corpus_path(self,
                                    object_path: str,
                                    obj: 'CdmObject' = None) -> Optional[str]:
        """Takes a corpus path (relative or absolute) and creates a valid absolute
        path with namespace"""
        with logger._enter_scope(self._TAG, self._ctx,
                                 self.create_absolute_corpus_path.__name__):
            if not object_path:
                logger.error(
                    self._ctx, self._TAG,
                    StorageManager.create_absolute_corpus_path.__name__, None,
                    CdmLogCode.ERR_PATH_NULL_OBJECT_PATH)
                return None

            if self._contains_unsupported_path_format(object_path):
                # Already called status_rpt when checking for unsupported path format.
                return None

            path_tuple = StorageUtils.split_namespace_path(object_path)
            if not path_tuple:
                logger.error(
                    self._ctx, self._TAG,
                    StorageManager.create_absolute_corpus_path.__name__, None,
                    CdmLogCode.ERR_PATH_NULL_OBJECT_PATH)
                return None
            final_namespace = ''

            prefix = None
            namespace_from_obj = None

            if obj and hasattr(obj, 'namespace') and hasattr(
                    obj, 'folder_path'):
                prefix = obj.folder_path
                namespace_from_obj = obj.namespace
            elif obj and obj.in_document:
                prefix = obj.in_document.folder_path
                namespace_from_obj = obj.in_document.namespace

            if prefix and self._contains_unsupported_path_format(prefix):
                # Already called status_rpt when checking for unsupported path format.
                return None

            if prefix and prefix[-1] != '/':
                logger.warning(
                    self._ctx, self._TAG,
                    StorageManager.create_absolute_corpus_path.__name__, None,
                    CdmLogCode.WARN_STORAGE_EXPECTED_PATH_PREFIX, prefix)
                prefix += '/'

            # check if this is a relative path
            if path_tuple[1][0] != '/':
                if not obj:
                    # relative path and no other info given, assume default and root
                    prefix = '/'

                if path_tuple[0] and path_tuple[0] != namespace_from_obj:
                    logger.error(
                        self._ctx, self._TAG,
                        StorageManager.create_absolute_corpus_path.__name__,
                        None, CdmLogCode.ERR_STORAGE_NAMESPACE_MISMATCH,
                        path_tuple[0])
                    return None

                path_tuple = (path_tuple[0], prefix + path_tuple[1])
                final_namespace = namespace_from_obj or path_tuple[
                    0] or self.default_namespace
            else:
                final_namespace = path_tuple[
                    0] or namespace_from_obj or self.default_namespace

            return '{}:{}'.format(
                final_namespace,
                path_tuple[1]) if final_namespace else path_tuple[1]
    async def file_status_check_async(self) -> None:
        """Check the modified time for this object and any children."""
        namespace = self.in_document.namespace
        adapter = self.ctx.corpus.storage.fetch_adapter(namespace)

        if adapter is None:
            logger.error(
                self._TAG, self.ctx,
                'Adapter not found for the document {}'.format(
                    self.in_document.name),
                self.file_status_check_async.__name__)

        # make sure the root is a good full corpus path.
        root_cleaned = (self.root_location or '')
        root_corpus = self.ctx.corpus.storage.create_absolute_corpus_path(
            root_cleaned, self.in_document)

        try:
            # Remove namespace from path
            path_tuple = StorageUtils.split_namespace_path(root_corpus)
            if not path_tuple:
                logger.error(
                    self._TAG, self.ctx,
                    'The root corpus path should not be null or empty.',
                    self.file_status_check_async.__name__)
                return
            # get a list of all corpus_paths under the root.
            file_info_list = await adapter.fetch_all_files_async(path_tuple[1])
        except Exception as e:
            logger.warning(
                self._TAG, self.ctx,
                'The folder location \'{}\' described by a partition pattern does not exist'
                .format(root_corpus), self.file_status_check_async.__name__)

        if file_info_list is not None:
            # remove root of the search from the beginning of all paths so anything in the root is not found by regex.
            file_info_list = [(namespace + ':' + fi)[len(root_corpus):]
                              for fi in file_info_list]

            if isinstance(self.owner, CdmLocalEntityDeclarationDefinition):
                # if both are present log warning and use glob pattern, otherwise use regularExpression
                if self.glob_pattern and not self.glob_pattern.isspace(
                ) and self.regular_expression and not self.regular_expression.isspace(
                ):
                    logger.warning(
                        self._TAG, self.ctx,
                        'The Data Partition Pattern contains both a glob pattern ({}) and a regular expression ({}) set, the glob pattern will be used.'
                        .format(self.glob_pattern, self.regular_expression),
                        self.file_status_check_async.__name__)
                regular_expression = self.glob_pattern_to_regex(
                    self.glob_pattern
                ) if self.glob_pattern and not self.glob_pattern.isspace(
                ) else self.regular_expression

                try:
                    reg = regex.compile(regular_expression)
                except Exception as e:
                    logger.error(
                        self._TAG, self.ctx,
                        'The {} \'{}\' could not form a valid regular expression. Reason: {}'
                        .format(
                            'glob pattern' if self.glob_pattern
                            and not self.glob_pattern.isspace() else
                            'regular expression',
                            self.glob_pattern if self.glob_pattern
                            and not self.glob_pattern.isspace() else
                            self.regular_expression, e),
                        self.file_status_check_async.__name__)

                if reg:
                    for fi in file_info_list:
                        m = reg.fullmatch(fi)
                        if m:
                            # create a map of arguments out of capture groups.
                            args = defaultdict(
                                list)  # type: Dict[str, List[str]]
                            i_param = 0
                            for i in range(1, reg.groups + 1):
                                captures = m.captures(i)
                                if captures and self.parameters and i_param < len(
                                        self.parameters):
                                    # to be consistent with other languages, if a capture group captures
                                    # multiple things, only use the last thing that was captured
                                    single_capture = captures[-1]

                                    current_param = self.parameters[i_param]
                                    args[current_param].append(single_capture)
                                    i_param += 1
                                else:
                                    break

                            # put the original but cleaned up root back onto the matched doc as the location stored in the partition.
                            location_corpus_path = root_cleaned + fi
                            # Remove namespace from path
                            path_tuple = StorageUtils.split_namespace_path(
                                location_corpus_path)
                            if not path_tuple:
                                logger.error(
                                    self._TAG, self.ctx,
                                    'The corpus path should not be null or empty.',
                                    self.file_status_check_async.__name__)
                                return
                            last_modified_time = await adapter.compute_last_modified_time_async(
                                adapter.create_adapter_path(path_tuple[1]))
                            cast('CdmLocalEntityDeclarationDefinition',
                                 self.owner)._create_partition_from_pattern(
                                     location_corpus_path,
                                     self.exhibits_traits, args,
                                     self.specialized_schema,
                                     last_modified_time)

        # update modified times.
        self.last_file_status_check_time = datetime.now(timezone.utc)
Example #17
0
    def from_object(ctx: CdmCorpusContext, docname: str, namespace: str, path: str,
                    data_obj: 'SymsManifestContent') -> 'CdmManifestDefinition':
        database = data_obj.database
        if database == None or database.type != SASEntityType.database:
            logger.error(ctx, _TAG, 'from_object', None, CdmLogCode.ERR_PERSIST_SYMS_INVALID_DB_OBJECT)
            return None

        database_properties = DatabaseProperties.deserialize(database.properties)
        source = DataSource(None).deserialize(database_properties.source)
        if database_properties == None or source  == None:
            logger.error(ctx, _TAG, 'from_object', None, CdmLogCode.ERR_PERSIST_SYMS_INVALID_DB_PROP_OBJECT)
            return None

        properties = database_properties.properties
        manifest = ctx.corpus.make_object(CdmObjectType.MANIFEST_DEF)

        manifest._folder_path = path
        manifest._namespace = namespace
        manifest.manifest_name = data_obj.database.name
        manifest.name = docname
        manifest.explanation = database_properties.description

        if properties is not None:
            if "cdm:schema" in properties:
                manifest.schema = properties["cdm:schema"]
            if "cdm:jsonSchemaSemanticVersion" in properties:
                manifest.json_schema_semantic_version = properties["cdm:jsonSchemaSemanticVersion"]
            if "cdm:documentVersion" in properties:
                manifest.DocumentVersion = properties["cdm:documentVersion"]
            if "cdm:traits" in properties:
                utils.add_list_to_cdm_collection(manifest.exhibits_traits,
                                            utils.create_trait_reference_array(ctx, properties["cdm:traits"]))
            if "cdm:imports" in properties:
                for import_obj in properties["cdm:imports"]:
                    manifest.imports.append(ImportPersistence.from_data(ctx, json.loads(import_obj)))
            if "cdm:lastFileStatusCheckTime" in properties:
                manifest.last_file_status_check_time = dateutil.parser.parse(properties["cdm:lastFileStatusCheckTime"])
            if "cdm:lastFileModifiedTime" in properties:
                manifest.last_file_modified_time = dateutil.parser.parse(properties["cdm:lastFileModifiedTime"])
            if "cdm:lastChildFileModifiedTime" in properties:
                manifest.last_child_file_modified_time = dateutil.parser.parse(properties["cdm:lastChildFileModifiedTime"])

        t2pm = TraitToPropertyMap(manifest)
        source_trait = t2pm._fetch_trait_reference(ManifestPersistence.db_location_trait)
        if source_trait == None:
            source_trait = utils.create_source_trait(ctx, ManifestPersistence.db_location_trait, ManifestPersistence.db_location_trait_arg_name)
            manifest.exhibits_traits.append(source_trait)

        adls_path = utils.syms_path_to_adls_adapter_path(source.location)
        adls_corpus_path = ctx.corpus.storage.adapter_path_to_corpus_path(adls_path)
        if not adls_corpus_path:
           path_tuple = StorageUtils.split_namespace_path(source_trait.arguments[0].value)
           obj = utils.create_and_mount_adls_adapter_from_adls_path(ctx.corpus.storage, adls_path, path_tuple[0])
           if  obj == None:
               logger.error(ctx, _TAG, 'from_object', None, CdmLogCode.ERR_PERSIST_SYMS_ADLS_ADAPTER_NOT_MOUNTED, adls_path)
               return None

        if data_obj.entities is not None:
            for item in data_obj.entities.items:
                entity_obj = TableEntity(None, None).deserialize(item)
                if entity_obj.type == SASEntityType.table:
                    entity = LocalEntityDeclarationPersistence.from_data(ctx, entity_obj, manifest, database_properties.source.location)
                    if entity is not None:
                        manifest.entities.append(entity)
                    else:
                        logger.warning(ctx, _TAG, 'from_object', None, CdmLogCode.WARN_PERSIST_SYMS_ENTITY_SKIPPED, entity_obj.name)

        if not (x for x in manifest.imports if x.corpus_path == Constants._FOUNDATIONS_CORPUS_PATH) or len(manifest.imports) == 0:
            manifest.imports.append(Constants._FOUNDATIONS_CORPUS_PATH)

        if data_obj.relationships is not None:
            for item in data_obj.relationships.items:
                relationship_entity = RelationshipEntity(None, None).deserialize(item)
                manifest.relationships.extend(E2ERelationshipPersistence.from_data(ctx, relationship_entity))

        # TODO: Submanifest
        return manifest