Exemple #1
0
 def add_table_to_database_container(
         self, dataset_urn: str,
         db_name: str) -> Iterable[MetadataWorkUnit]:
     database_container_key = self.gen_database_key(db_name)
     container_workunits = add_dataset_to_container(
         container_key=database_container_key,
         dataset_urn=dataset_urn,
     )
     for wu in container_workunits:
         self.report.report_workunit(wu)
         yield wu
 def add_table_to_schema_container(
         self, dataset_urn: str, db_name: str,
         schema: str) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]:
     schema_container_key = self.gen_schema_key(db_name, schema)
     container_workunits = add_dataset_to_container(
         container_key=schema_container_key,
         dataset_urn=dataset_urn,
     )
     for wu in container_workunits:
         self.report.report_workunit(wu)
         yield wu
Exemple #3
0
    def create_container_hierarchy(
            self, table_data: TableData,
            dataset_urn: str) -> Iterable[MetadataWorkUnit]:
        logger.debug(f"Creating containers for {dataset_urn}")
        base_full_path = table_data.table_path
        parent_key = None
        if table_data.is_s3:
            bucket_name = get_bucket_name(table_data.table_path)
            bucket_key = self.gen_bucket_key(bucket_name)
            yield from self.create_emit_containers(
                container_key=bucket_key,
                name=bucket_name,
                sub_types=["S3 bucket"],
                parent_container_key=None,
            )
            parent_key = bucket_key
            base_full_path = get_bucket_relative_path(table_data.table_path)

        parent_folder_path = (base_full_path[:base_full_path.rfind("/")]
                              if base_full_path.rfind("/") != -1 else "")
        for folder in parent_folder_path.split("/"):
            abs_path = folder
            if parent_key:
                prefix: str = ""
                if isinstance(parent_key, S3BucketKey):
                    prefix = parent_key.bucket_name
                elif isinstance(parent_key, FolderKey):
                    prefix = parent_key.folder_abs_path
                abs_path = prefix + "/" + folder
            folder_key = self.gen_folder_key(abs_path)
            yield from self.create_emit_containers(
                container_key=folder_key,
                name=folder,
                sub_types=["Folder"],
                parent_container_key=parent_key,
            )
            parent_key = folder_key
        if parent_key is None:
            logger.warning(
                f"Failed to associate Dataset ({dataset_urn}) with container")
            return
        yield from add_dataset_to_container(parent_key, dataset_urn)