Example #1
0
def _delete_one_urn(
    urn: str,
    soft: bool = False,
    dry_run: bool = False,
    entity_type: str = "dataset",
    cached_session_host: Optional[Tuple[sessions.Session, str]] = None,
    cached_emitter: Optional[rest_emitter.DatahubRestEmitter] = None,
    run_id: str = "delete-run-id",
    deletion_timestamp: int = _get_current_time(),
) -> DeletionResult:

    deletion_result = DeletionResult()
    deletion_result.num_entities = 1
    deletion_result.num_records = UNKNOWN_NUM_RECORDS  # Default is unknown

    if soft:
        # Add removed aspect
        if not cached_emitter:
            _, gms_host = cli_utils.get_session_and_host()
            token = cli_utils.get_token()
            emitter = rest_emitter.DatahubRestEmitter(gms_server=gms_host, token=token)
        else:
            emitter = cached_emitter
        if not dry_run:
            emitter.emit_mcp(
                MetadataChangeProposalWrapper(
                    entityType=entity_type,
                    changeType=ChangeTypeClass.UPSERT,
                    entityUrn=urn,
                    aspectName="status",
                    aspect=StatusClass(removed=True),
                    systemMetadata=SystemMetadataClass(
                        runId=run_id, lastObserved=deletion_timestamp
                    ),
                )
            )
        else:
            logger.info(f"[Dry-run] Would soft-delete {urn}")
    else:
        if not dry_run:
            payload_obj = {"urn": urn}
            urn, rows_affected = cli_utils.post_delete_endpoint(
                payload_obj,
                "/entities?action=delete",
                cached_session_host=cached_session_host,
            )
            deletion_result.num_records = rows_affected
        else:
            logger.info(f"[Dry-run] Would hard-delete {urn}")
            deletion_result.num_records = UNKNOWN_NUM_RECORDS  # since we don't know how many rows will be affected

    deletion_result.end()
    return deletion_result
Example #2
0
def delete_with_filters(
    dry_run: bool,
    soft: bool,
    force: bool,
    include_removed: bool,
    search_query: str = "*",
    entity_type: str = "dataset",
    env: Optional[str] = None,
    platform: Optional[str] = None,
) -> DeletionResult:

    session, gms_host = cli_utils.get_session_and_host()
    token = cli_utils.get_token()

    logger.info(f"datahub configured with {gms_host}")
    emitter = rest_emitter.DatahubRestEmitter(gms_server=gms_host, token=token)
    batch_deletion_result = DeletionResult()
    urns = [
        u
        for u in cli_utils.get_urns_by_filter(
            env=env,
            platform=platform,
            search_query=search_query,
            entity_type=entity_type,
            include_removed=include_removed,
        )
    ]
    logger.info(
        f"Filter matched {len(urns)} entities. Sample: {choices(urns, k=min(5, len(urns)))}"
    )
    if not force:
        click.confirm(
            f"This will delete {len(urns)} entities. Are you sure?", abort=True
        )

    for urn in progressbar.progressbar(urns, redirect_stdout=True):
        one_result = _delete_one_urn(
            urn,
            soft=soft,
            entity_type=entity_type,
            dry_run=dry_run,
            cached_session_host=(session, gms_host),
            cached_emitter=emitter,
        )
        batch_deletion_result.merge(one_result)
    batch_deletion_result.end()

    return batch_deletion_result