Exemple #1
0
 def gen_schema_key(self, db_name: str, schema: str) -> DatabaseKey:
     return DatabaseKey(
         database=schema,
         platform=self.platform,
         instance=self.config.platform_instance
         if self.config.platform_instance is not None else self.config.env,
     )
Exemple #2
0
 def gen_database_key(self, database: str) -> PlatformKey:
     return DatabaseKey(
         database=database,
         platform=self.platform,
         instance=self.config.platform_instance
         if self.config.platform_instance is not None else self.config.env,
     )
Exemple #3
0
 def gen_schema_key(self, db_name: str, schema: str) -> DatabaseKey:
     return DatabaseKey(
         platform=self.platform,
         environment=self.config.env,
         instance=self.config.platform_instance,
         database=schema,
     )
Exemple #4
0
 def gen_database_key(self, database: str) -> DatabaseKey:
     return DatabaseKey(
         database=database,
         platform=self.platform,
         instance=self.source_config.platform_instance
         # keeps backward compatibility when platform instance is missed
         if self.source_config.platform_instance is not None else
         self.source_config.env,
     )
Exemple #5
0
 def gen_database_key(self, database: str) -> DatabaseKey:
     return DatabaseKey(
         database=database,
         platform=self.get_underlying_platform(),
         instance=self.env,
     )
Exemple #6
0
def migrate_containers(
    dry_run: bool,
    env: str,
    platform: str,
    hard: bool,
    instance: str,
    keep: bool,
    rest_emitter: DatahubRestEmitter,
) -> None:
    run_id: str = f"container-migrate-{uuid.uuid4()}"
    migration_report = MigrationReport(run_id, dry_run, keep)

    # Find container ids need to be migrated
    container_id_map: Dict[str, str] = {}
    # Get all the containers need to be migrated
    containers = get_containers_for_migration(env)
    for container in progressbar.progressbar(containers, redirect_stdout=True):
        # Generate new container key
        subType = container["aspects"]["subTypes"]["value"]["typeNames"][0]
        customProperties = container["aspects"]["containerProperties"][
            "value"]["customProperties"]
        if (env is not None and customProperties["instance"] != env) or (
                platform is not None
                and customProperties["platform"] != platform):
            log.debug(
                f"{container['urn']} does not match filter criteria, skipping.. {customProperties} {env} {platform}"
            )
            continue

        try:
            newKey: Union[SchemaKey, DatabaseKey, ProjectIdKey,
                          BigQueryDatasetKey]
            if subType == "Schema":
                newKey = SchemaKey.parse_obj(customProperties)
            elif subType == "Database":
                newKey = DatabaseKey.parse_obj(customProperties)
            elif subType == "Project":
                newKey = ProjectIdKey.parse_obj(customProperties)
            elif subType == "Dataset":
                newKey = BigQueryDatasetKey.parse_obj(customProperties)
            else:
                log.warning(f"Invalid subtype {subType}. Skipping")
                continue
        except Exception as e:
            log.warning(
                f"Unable to map {customProperties} to key due to exception {e}"
            )
            continue

        newKey.instance = instance

        log.debug(
            f"Container key migration: {container['urn']} -> urn:li:container:{newKey.guid()}"
        )

        src_urn = container["urn"]
        dst_urn = f"urn:li:container:{newKey.guid()}"
        container_id_map[src_urn] = dst_urn

        # Clone aspects of container with the new urn
        for mcp in migration_utils.clone_aspect(
                src_urn,
                aspect_names=migration_utils.all_aspects,
                entity_type="container",
                dst_urn=dst_urn,
                dry_run=dry_run,
                run_id=run_id,
        ):
            migration_report.on_entity_create(mcp.entityUrn,
                                              mcp.aspectName)  # type: ignore
            assert mcp.aspect
            # Update containerProperties to reflect the new key
            if mcp.aspectName == "containerProperties":
                assert isinstance(mcp.aspect, ContainerPropertiesClass)
                containerProperties: ContainerPropertiesClass = mcp.aspect
                containerProperties.customProperties = newKey.dict(
                    by_alias=True, exclude_none=True)
                mcp.aspect = containerProperties
            elif mcp.aspectName == "containerKey":
                assert isinstance(mcp.aspect, ContainerKeyClass)
                containerKey: ContainerKeyClass = mcp.aspect
                containerKey.guid = newKey.guid()
                mcp.aspect = containerKey
            if not dry_run:
                rest_emitter.emit_mcp(mcp)
                migration_report.on_entity_affected(
                    mcp.entityUrn, mcp.aspectName)  # type: ignore

        process_container_relationships(
            container_id_map=container_id_map,
            dry_run=dry_run,
            src_urn=src_urn,
            dst_urn=dst_urn,
            migration_report=migration_report,
            rest_emitter=rest_emitter,
        )

        if not dry_run and not keep:
            log.info(f"will {'hard' if hard else 'soft'} delete {src_urn}")
            delete_cli._delete_one_urn(src_urn,
                                       soft=not hard,
                                       run_id=run_id,
                                       entity_type="container")
        migration_report.on_entity_migrated(src_urn, "status")  # type: ignore

    print(f"{migration_report}")