def _delete_one_urn( urn: str, soft: bool = False, dry_run: bool = False, entity_type: str = "dataset", cached_session_host: Optional[Tuple[sessions.Session, str]] = None, cached_emitter: Optional[rest_emitter.DatahubRestEmitter] = None, run_id: str = "delete-run-id", deletion_timestamp: int = _get_current_time(), ) -> DeletionResult: deletion_result = DeletionResult() deletion_result.num_entities = 1 deletion_result.num_records = UNKNOWN_NUM_RECORDS # Default is unknown if soft: # Add removed aspect if not cached_emitter: _, gms_host = cli_utils.get_session_and_host() token = cli_utils.get_token() emitter = rest_emitter.DatahubRestEmitter(gms_server=gms_host, token=token) else: emitter = cached_emitter if not dry_run: emitter.emit_mcp( MetadataChangeProposalWrapper( entityType=entity_type, changeType=ChangeTypeClass.UPSERT, entityUrn=urn, aspectName="status", aspect=StatusClass(removed=True), systemMetadata=SystemMetadataClass( runId=run_id, lastObserved=deletion_timestamp ), ) ) else: logger.info(f"[Dry-run] Would soft-delete {urn}") else: if not dry_run: payload_obj = {"urn": urn} urn, rows_affected = cli_utils.post_delete_endpoint( payload_obj, "/entities?action=delete", cached_session_host=cached_session_host, ) deletion_result.num_records = rows_affected else: logger.info(f"[Dry-run] Would hard-delete {urn}") deletion_result.num_records = UNKNOWN_NUM_RECORDS # since we don't know how many rows will be affected deletion_result.end() return deletion_result
def delete_with_filters( dry_run: bool, soft: bool, force: bool, include_removed: bool, search_query: str = "*", entity_type: str = "dataset", env: Optional[str] = None, platform: Optional[str] = None, ) -> DeletionResult: session, gms_host = cli_utils.get_session_and_host() token = cli_utils.get_token() logger.info(f"datahub configured with {gms_host}") emitter = rest_emitter.DatahubRestEmitter(gms_server=gms_host, token=token) batch_deletion_result = DeletionResult() urns = [ u for u in cli_utils.get_urns_by_filter( env=env, platform=platform, search_query=search_query, entity_type=entity_type, include_removed=include_removed, ) ] logger.info( f"Filter matched {len(urns)} entities. Sample: {choices(urns, k=min(5, len(urns)))}" ) if not force: click.confirm( f"This will delete {len(urns)} entities. Are you sure?", abort=True ) for urn in progressbar.progressbar(urns, redirect_stdout=True): one_result = _delete_one_urn( urn, soft=soft, entity_type=entity_type, dry_run=dry_run, cached_session_host=(session, gms_host), cached_emitter=emitter, ) batch_deletion_result.merge(one_result) batch_deletion_result.end() return batch_deletion_result