Beispiel #1
0
def rollback(run_id: str, dry_run: bool) -> None:
    """Rollback a provided ingestion run to datahub"""

    cli_utils.test_connectivity_complain_exit("ingest")

    if not dry_run:
        click.confirm(
            "This will permanently delete data from DataHub. Do you want to continue?",
            abort=True,
        )

    payload_obj = {"runId": run_id, "dryRun": dry_run}
    structured_rows, entities_affected, aspects_affected = post_rollback_endpoint(
        payload_obj, "/runs?action=rollback"
    )

    click.echo(
        "Rolling back deletes the entities created by a run and reverts the updated aspects"
    )
    click.echo(
        f"This rollback {'will' if dry_run else ''} {'delete' if dry_run else 'deleted'} {entities_affected} entities and {'will roll' if dry_run else 'rolled'} back {aspects_affected} aspects"
    )
    click.echo(
        f"showing first {len(structured_rows)} of {aspects_affected} aspects {'that will be' if dry_run else ''} reverted by this run"
    )
    click.echo(tabulate(structured_rows, RUN_TABLE_COLUMNS, tablefmt="grid"))
Beispiel #2
0
def rollback(
    run_id: str, force: bool, dry_run: bool, safe: bool, report_dir: str
) -> None:
    """Rollback a provided ingestion run to datahub"""

    cli_utils.test_connectivity_complain_exit("ingest")

    if not force and not dry_run:
        click.confirm(
            "This will permanently delete data from DataHub. Do you want to continue?",
            abort=True,
        )

    payload_obj = {"runId": run_id, "dryRun": dry_run, "safe": safe}
    (
        structured_rows,
        entities_affected,
        aspects_reverted,
        aspects_affected,
        unsafe_entity_count,
        unsafe_entities,
    ) = post_rollback_endpoint(payload_obj, "/runs?action=rollback")

    click.echo(
        "Rolling back deletes the entities created by a run and reverts the updated aspects"
    )
    click.echo(
        f"This rollback {'will' if dry_run else ''} {'delete' if dry_run else 'deleted'} {entities_affected} entities and {'will roll' if dry_run else 'rolled'} back {aspects_reverted} aspects"
    )

    click.echo(
        f"showing first {len(structured_rows)} of {aspects_reverted} aspects {'that will be ' if dry_run else ''}reverted by this run"
    )
    click.echo(tabulate(structured_rows, RUN_TABLE_COLUMNS, tablefmt="grid"))

    if aspects_affected > 0:
        if safe:
            click.echo(
                f"WARNING: This rollback {'will hide' if dry_run else 'has hidden'} {aspects_affected} aspects related to {unsafe_entity_count} entities being rolled back that are not part ingestion run id."
            )
        else:
            click.echo(
                f"WARNING: This rollback {'will delete' if dry_run else 'has deleted'} {aspects_affected} aspects related to {unsafe_entity_count} entities being rolled back that are not part ingestion run id."
            )

    if unsafe_entity_count > 0:
        now = datetime.now()
        current_time = now.strftime("%Y-%m-%d %H:%M:%S")

        try:
            folder_name = report_dir + "/" + current_time

            ingestion_config_file_name = folder_name + "/config.json"
            os.makedirs(os.path.dirname(ingestion_config_file_name), exist_ok=True)
            with open(ingestion_config_file_name, "w") as file_handle:
                json.dump({"run_id": run_id}, file_handle)

            csv_file_name = folder_name + "/unsafe_entities.csv"
            with open(csv_file_name, "w") as file_handle:
                writer = csv.writer(file_handle)
                writer.writerow(["urn"])
                for row in unsafe_entities:
                    writer.writerow([row.get("urn")])

        except IOError as e:
            print(e)
            sys.exit("Unable to write reports to " + report_dir)
Beispiel #3
0
def delete(
    urn: str,
    force: bool,
    soft: bool,
    env: str,
    platform: str,
    entity_type: str,
    query: str,
    registry_id: str,
    dry_run: bool,
) -> None:
    """Delete metadata from datahub using a single urn or a combination of filters"""

    cli_utils.test_connectivity_complain_exit("delete")
    # one of urn / platform / env / query must be provided
    if not urn and not platform and not env and not query and not registry_id:
        raise click.UsageError(
            "You must provide either an urn or a platform or an env or a query for me to delete anything"
        )

    # default query is set to "*" if not provided
    query = "*" if query is None else query

    if not force and not soft and not dry_run:
        click.confirm(
            "This will permanently delete data from DataHub. Do you want to continue?",
            abort=True,
        )

    if urn:
        # Single urn based delete
        session, host = cli_utils.get_session_and_host()
        entity_type = guess_entity_type(urn=urn)
        logger.info(f"DataHub configured with {host}")
        deletion_result: DeletionResult = delete_one_urn_cmd(
            urn,
            soft=soft,
            dry_run=dry_run,
            entity_type=entity_type,
            cached_session_host=(session, host),
        )

        if not dry_run:
            if deletion_result.num_records == 0:
                click.echo(f"Nothing deleted for {urn}")
            else:
                click.echo(
                    f"Successfully deleted {urn}. {deletion_result.num_records} rows deleted"
                )
    elif registry_id:
        # Registry-id based delete
        if soft and not dry_run:
            raise click.UsageError(
                "Soft-deleting with a registry-id is not yet supported. Try --dry-run to see what you will be deleting, before issuing a hard-delete using the --hard flag"
            )
        deletion_result = delete_for_registry(registry_id=registry_id,
                                              soft=soft,
                                              dry_run=dry_run)
    else:
        # Filter based delete
        deletion_result = delete_with_filters(
            env=env,
            platform=platform,
            dry_run=dry_run,
            soft=soft,
            entity_type=entity_type,
            search_query=query,
            force=force,
        )

    if not dry_run:
        message = "soft delete" if soft else "hard delete"
        click.echo(
            f"Took {(deletion_result.end_time_millis-deletion_result.start_time_millis)/1000.0} seconds to {message} {deletion_result.num_records} rows for {deletion_result.num_entities} entities"
        )
    else:
        click.echo(
            f"{deletion_result.num_entities} entities with {deletion_result.num_records if deletion_result.num_records != UNKNOWN_NUM_RECORDS else 'unknown'} rows will be affected. Took {(deletion_result.end_time_millis-deletion_result.start_time_millis)/1000.0} seconds to evaluate."
        )
    if deletion_result.sample_records:
        click.echo(
            tabulate(deletion_result.sample_records,
                     RUN_TABLE_COLUMNS,
                     tablefmt="grid"))