Beispiel #1
0
def docs_list(directory) -> None:
    """List known Data Docs Sites."""
    context = toolkit.load_data_context_with_error_handling(directory)

    docs_sites_url_dicts = context.get_docs_sites_urls()
    docs_sites_strings = [
        " - <cyan>{}</cyan>: {}".format(
            docs_site_dict["site_name"],
            docs_site_dict.get("site_url") or
            f"site configured but does not exist. Run the following command to build site: great_expectations "
            f'docs build --site-name {docs_site_dict["site_name"]}',
        ) for docs_site_dict in docs_sites_url_dicts
    ]

    if len(docs_sites_strings) == 0:
        cli_message("No Data Docs sites found")
    else:
        list_intro_string = _build_intro_string(docs_sites_strings)
        cli_message_list(docs_sites_strings, list_intro_string)

    send_usage_message(
        data_context=context,
        event="cli.docs.list",
        api_version="v2",
        success=True,
    )
Beispiel #2
0
def clean_data_docs(directory, site_name=None, all=None) -> None:
    """Delete data docs"""
    context = toolkit.load_data_context_with_error_handling(directory)
    failed = True
    if site_name is None and all is None:
        cli_message("<red>{}</red>".format(
            "Please specify --all to remove all sites or specify a specific site using "
            "--site_name"))
        sys.exit(1)
    context.clean_data_docs(site_name=site_name)
    failed = False
    if not failed and context is not None:
        send_usage_message(
            data_context=context,
            event="cli.docs.clean",
            api_version="v2",
            success=True,
        )
        cli_message("<green>Cleaned data docs</green>")

    if failed and context is not None:
        send_usage_message(
            data_context=context,
            event="cli.docs.clean",
            api_version="v2",
            success=False,
        )
def checkpoint_run(checkpoint, directory):
    """Run a checkpoint. (Experimental)"""
    usage_event = "cli.checkpoint.run"
    context = toolkit.load_data_context_with_error_handling(
        directory=directory, from_cli_upgrade_command=False
    )

    checkpoint: Checkpoint = toolkit.load_checkpoint(
        context,
        checkpoint,
        usage_event,
    )

    try:
        results = checkpoint.run()
    except Exception as e:
        toolkit.exit_with_failure_message_and_stats(
            context, usage_event, f"<red>{e}</red>"
        )

    if not results["success"]:
        cli_message("Validation failed!")
        toolkit.send_usage_message(context, event=usage_event, success=True)
        print_validation_operator_results_details(results)
        sys.exit(1)

    cli_message("Validation succeeded!")
    toolkit.send_usage_message(context, event=usage_event, success=True)
    print_validation_operator_results_details(results)
    sys.exit(0)
Beispiel #4
0
def checkpoint_list(directory) -> None:
    """List configured checkpoints. (Experimental)"""
    context = toolkit.load_data_context_with_error_handling(directory)
    checkpoints = context.list_checkpoints()
    if not checkpoints:
        cli_message(
            "No checkpoints found.\n"
            "  - Use the command `great_expectations checkpoint new` to create one."
        )
        send_usage_message(
            data_context=context,
            event="cli.checkpoint.list",
            api_version="v2",
            success=True,
        )
        sys.exit(0)

    number_found = len(checkpoints)
    plural = "s" if number_found > 1 else ""
    message = f"Found {number_found} checkpoint{plural}."
    pretty_list = [f" - <cyan>{cp}</cyan>" for cp in checkpoints]
    cli_message_list(pretty_list, list_intro_string=message)
    send_usage_message(
        data_context=context,
        event="cli.checkpoint.list",
        api_version="v2",
        success=True,
    )
Beispiel #5
0
def confirm_proceed_or_exit(
    confirm_prompt: str = "Would you like to proceed?",
    continuation_message: str = "Ok, exiting now. You can always read more at https://docs.greatexpectations.io/ !",
    exit_on_no: bool = True,
    exit_code: int = 0,
) -> Optional[bool]:
    """
    Every CLI command that starts a potentially lengthy (>1 sec) computation
    or modifies some resources (e.g., edits the config file, adds objects
    to the stores) must follow this pattern:
    1. Explain which resources will be created/modified/deleted
    2. Use this method to ask for user's confirmation

    The goal of this standardization is for the users to expect consistency -
    if you saw one command, you know what to expect from all others.

    If the user does not confirm, the program should exit. The purpose of the exit_on_no parameter is to provide
    the option to perform cleanup actions before exiting outside of the function.
    """
    confirm_prompt_colorized = cli_colorize_string(confirm_prompt)
    continuation_message_colorized = cli_colorize_string(continuation_message)
    if not click.confirm(confirm_prompt_colorized, default=True):
        if exit_on_no:
            cli_message(continuation_message_colorized)
            sys.exit(exit_code)
        else:
            return False
    return True
Beispiel #6
0
def suite_delete(suite, directory):
    """
    Delete an expectation suite from the expectation store.
    """
    usage_event = "cli.suite.delete"
    context = toolkit.load_data_context_with_error_handling(directory)
    suite_names = context.list_expectation_suite_names()
    if not suite_names:
        toolkit.exit_with_failure_message_and_stats(
            context,
            usage_event,
            "</red>No expectation suites found in the project.</red>",
        )

    if suite not in suite_names:
        toolkit.exit_with_failure_message_and_stats(
            context, usage_event, f"No expectation suite named {suite} found."
        )

    context.delete_expectation_suite(suite)
    cli_message(f"Deleted the expectation suite named: {suite}")
    send_usage_message(
        data_context=context,
        event=usage_event,
        api_version="v2",
        success=True,
    )
def checkpoint_new(checkpoint, suite, directory, datasource):
    """Create a new checkpoint for easy deployments. (Experimental)"""
    suite_name = suite
    usage_event = "cli.checkpoint.new"
    context = toolkit.load_data_context_with_error_handling(directory)

    _verify_checkpoint_does_not_exist(context, checkpoint, usage_event)
    suite: ExpectationSuite = toolkit.load_expectation_suite(
        context, suite_name, usage_event
    )
    datasource = toolkit.select_datasource(context, datasource_name=datasource)
    if datasource is None:
        toolkit.send_usage_message(context, usage_event, success=False)
        sys.exit(1)
    _, _, _, batch_kwargs = toolkit.get_batch_kwargs(context, datasource.name)

    _ = context.add_checkpoint(
        name=checkpoint,
        **{
            "class_name": "LegacyCheckpoint",
            "batches": [
                {
                    "batch_kwargs": dict(batch_kwargs),
                    "expectation_suite_names": [suite.expectation_suite_name],
                }
            ],
        },
    )

    cli_message(
        f"""<green>A Checkpoint named `{checkpoint}` was added to your project!</green>
  - To run this Checkpoint, run `great_expectations checkpoint run {checkpoint}`"""
    )
    toolkit.send_usage_message(context, usage_event, success=True)
def checkpoint_script(checkpoint, directory):
    """
    Create a python script to run a checkpoint. (Experimental)

    Checkpoints can be run directly without this script using the
    `great_expectations checkpoint run` command.

    This script is provided for those who wish to run checkpoints via python.
    """
    context = toolkit.load_data_context_with_error_handling(directory)
    usage_event = "cli.checkpoint.script"

    # Attempt to load the checkpoint and deal with errors
    _ = toolkit.load_checkpoint(context, checkpoint, usage_event)

    script_name = f"run_{checkpoint}.py"
    script_path = os.path.join(
        context.root_directory, context.GE_UNCOMMITTED_DIR, script_name
    )

    if os.path.isfile(script_path):
        toolkit.exit_with_failure_message_and_stats(
            context,
            usage_event,
            f"""<red>Warning! A script named {script_name} already exists and this command will not overwrite it.</red>
  - Existing file path: {script_path}""",
        )

    _write_checkpoint_script_to_disk(context.root_directory, checkpoint, script_path)
    cli_message(
        f"""<green>A python script was created that runs the checkpoint named: `{checkpoint}`</green>
  - The script is located in `great_expectations/uncommitted/run_{checkpoint}.py`
  - The script can be run with `python great_expectations/uncommitted/run_{checkpoint}.py`"""
    )
    toolkit.send_usage_message(context, event=usage_event, success=True)
Beispiel #9
0
def suite_list(directory):
    """Lists available Expectation Suites."""
    context = toolkit.load_data_context_with_error_handling(directory)

    try:
        suite_names = [
            f" - <cyan>{suite_name}</cyan>"
            for suite_name in context.list_expectation_suite_names()
        ]
        if len(suite_names) == 0:
            cli_message("No Expectation Suites found")
            toolkit.send_usage_message(data_context=context,
                                       event="cli.suite.list",
                                       success=True)
            return
        elif len(suite_names) == 1:
            list_intro_string = "1 Expectation Suite found:"
        else:
            list_intro_string = f"{len(suite_names)} Expectation Suites found:"

        cli_message_list(suite_names, list_intro_string)
        toolkit.send_usage_message(data_context=context,
                                   event="cli.suite.list",
                                   success=True)
    except Exception as e:
        toolkit.send_usage_message(data_context=context,
                                   event="cli.suite.list",
                                   success=False)
        raise e
Beispiel #10
0
def select_datasource(context: DataContext, datasource_name: str = None) -> Datasource:
    """Select a datasource interactively."""
    # TODO consolidate all the myriad CLI tests into this
    data_source = None

    if datasource_name is None:
        data_sources = sorted(context.list_datasources(), key=lambda x: x["name"])
        if len(data_sources) == 0:
            cli_message(
                "<red>No datasources found in the context. To add a datasource, run `great_expectations datasource new`</red>"
            )
        elif len(data_sources) == 1:
            datasource_name = data_sources[0]["name"]
        else:
            choices = "\n".join(
                [
                    "    {}. {}".format(i, data_source["name"])
                    for i, data_source in enumerate(data_sources, 1)
                ]
            )
            option_selection = click.prompt(
                "Select a datasource" + "\n" + choices + "\n",
                type=click.Choice(
                    [str(i) for i, data_source in enumerate(data_sources, 1)]
                ),
                show_choices=False,
            )
            datasource_name = data_sources[int(option_selection) - 1]["name"]

    if datasource_name is not None:
        data_source = context.get_datasource(datasource_name)

    return data_source
Beispiel #11
0
def load_data_context_with_error_handling(
    directory: str, from_cli_upgrade_command: bool = False
) -> DataContext:
    """Return a DataContext with good error handling and exit codes."""
    try:
        context: DataContext = DataContext(context_root_dir=directory)
        ge_config_version: int = context.get_config().config_version
        if (
            from_cli_upgrade_command
            and int(ge_config_version) < CURRENT_GE_CONFIG_VERSION
        ):
            directory = directory or context.root_directory
            (
                increment_version,
                exception_occurred,
            ) = upgrade_project_one_version_increment(
                context_root_dir=directory,
                ge_config_version=ge_config_version,
                continuation_message=EXIT_UPGRADE_CONTINUATION_MESSAGE,
                from_cli_upgrade_command=from_cli_upgrade_command,
            )
            if not exception_occurred and increment_version:
                context = DataContext(context_root_dir=directory)
        return context
    except ge_exceptions.UnsupportedConfigVersionError as err:
        directory = directory or DataContext.find_context_root_dir()
        ge_config_version = DataContext.get_ge_config_version(
            context_root_dir=directory
        )
        upgrade_helper_class = (
            GE_UPGRADE_HELPER_VERSION_MAP.get(int(ge_config_version))
            if ge_config_version
            else None
        )
        if upgrade_helper_class and ge_config_version < CURRENT_GE_CONFIG_VERSION:
            upgrade_project(
                context_root_dir=directory,
                ge_config_version=ge_config_version,
                from_cli_upgrade_command=from_cli_upgrade_command,
            )
        else:
            cli_message(f"<red>{err.message}</red>")
            sys.exit(1)
    except (
        ge_exceptions.ConfigNotFoundError,
        ge_exceptions.InvalidConfigError,
    ) as err:
        cli_message(f"<red>{err.message}</red>")
        sys.exit(1)
    except ge_exceptions.PluginModuleNotFoundError as err:
        cli_message(err.cli.v012_colored_message)
        sys.exit(1)
    except ge_exceptions.PluginClassNotFoundError as err:
        cli_message(err.cli.v012_colored_message)
        sys.exit(1)
    except ge_exceptions.InvalidConfigurationYamlError as err:
        cli_message(f"<red>{str(err)}</red>")
        sys.exit(1)
Beispiel #12
0
def upgrade_project(context_root_dir,
                    ge_config_version,
                    from_cli_upgrade_command=False) -> None:
    if from_cli_upgrade_command:
        message = (
            f"<red>\nYour project appears to have an out-of-date config version ({ge_config_version}) - "
            f"the version "
            f"number must be at least {CURRENT_GE_CONFIG_VERSION}.</red>")
    else:
        message = (
            f"<red>\nYour project appears to have an out-of-date config version ({ge_config_version}) - "
            f"the version "
            f"number must be at least {CURRENT_GE_CONFIG_VERSION}.\nIn order to proceed, "
            f"your project must be upgraded.</red>")

    cli_message(message)
    upgrade_prompt = (
        "\nWould you like to run the Upgrade Helper to bring your project up-to-date?"
    )
    confirm_proceed_or_exit(
        confirm_prompt=upgrade_prompt,
        continuation_message=EXIT_UPGRADE_CONTINUATION_MESSAGE,
    )
    cli_message(SECTION_SEPARATOR)

    # use loop in case multiple upgrades need to take place
    while ge_config_version < CURRENT_GE_CONFIG_VERSION:
        (
            increment_version,
            exception_occurred,
        ) = upgrade_project_up_to_one_version_increment(
            context_root_dir=context_root_dir,
            ge_config_version=ge_config_version,
            continuation_message=EXIT_UPGRADE_CONTINUATION_MESSAGE,
            from_cli_upgrade_command=from_cli_upgrade_command,
        )
        if exception_occurred or not increment_version:
            break
        ge_config_version += 1

    cli_message(SECTION_SEPARATOR)
    upgrade_success_message = "<green>Upgrade complete. Exiting...</green>\n"
    upgrade_incomplete_message = f"""\
<red>The Upgrade Helper was unable to perform a complete project upgrade. Next steps:</red>

    - Please perform any manual steps outlined in the Upgrade Overview and/or Upgrade Report above
    - When complete, increment the config_version key in your <cyan>great_expectations.yml</cyan> to <cyan>{
    ge_config_version + 1}</cyan>\n
To learn more about the upgrade process, visit \
<cyan>https://docs.greatexpectations.io/en/latest/how_to_guides/migrating_versions.html</cyan>
"""

    if ge_config_version < CURRENT_GE_CONFIG_VERSION:
        cli_message(upgrade_incomplete_message)
    else:
        cli_message(upgrade_success_message)
    sys.exit(0)
Beispiel #13
0
def exit_with_failure_message_and_stats(context: DataContext, usage_event: str,
                                        message: str) -> None:
    cli_message(message)
    send_usage_message(
        data_context=context,
        event=usage_event,
        api_version="v2",
        success=False,
    )
    sys.exit(1)
Beispiel #14
0
def validation_operator_list(directory):
    """List known Validation Operators."""
    try:
        context = DataContext(directory)
    except ge_exceptions.ConfigNotFoundError as err:
        cli_message(f"<red>{err.message}</red>")
        return

    try:
        validation_operators = context.list_validation_operators()

        if len(validation_operators) == 0:
            cli_message("No Validation Operators found")
            return
        elif len(validation_operators) == 1:
            list_intro_string = "1 Validation Operator found:"
        else:
            list_intro_string = "{} Validation Operators found:".format(
                len(validation_operators)
            )

        cli_message(list_intro_string)
        for validation_operator in validation_operators:
            cli_message("")
            cli_message_dict(validation_operator)
        toolkit.send_usage_message(
            data_context=context, event="cli.validation_operator.list", success=True
        )
    except Exception as e:
        toolkit.send_usage_message(
            data_context=context, event="cli.validation_operator.list", success=False
        )
        raise e
Beispiel #15
0
def _suite_scaffold(suite: str, directory: str, jupyter: bool) -> None:
    usage_event = "cli.suite.scaffold"
    suite_name = suite
    context = toolkit.load_data_context_with_error_handling(directory)
    notebook_filename = f"scaffold_{suite_name}.ipynb"
    notebook_path = _get_notebook_path(context, notebook_filename)

    if suite_name in context.list_expectation_suite_names():
        toolkit.tell_user_suite_exists(suite_name)
        if os.path.isfile(notebook_path):
            cli_message(
                f"  - If you wish to adjust your scaffolding, you can open this notebook with jupyter: `{notebook_path}` <red>(Please note that if you run that notebook, you will overwrite your existing suite.)</red>"
            )
        send_usage_message(
            data_context=context,
            event=usage_event,
            api_version="v2",
            success=False,
        )
        sys.exit(1)

    datasource = toolkit.select_datasource(context)
    if datasource is None:
        send_usage_message(
            data_context=context,
            event=usage_event,
            api_version="v2",
            success=False,
        )
        sys.exit(1)

    _suite = context.create_expectation_suite(suite_name)
    _, _, _, batch_kwargs = get_batch_kwargs(context,
                                             datasource_name=datasource.name)
    renderer = SuiteScaffoldNotebookRenderer(context, _suite, batch_kwargs)
    renderer.render_to_disk(notebook_path)

    send_usage_message(
        data_context=context,
        event=usage_event,
        api_version="v2",
        success=True,
    )

    if jupyter:
        toolkit.launch_jupyter_notebook(notebook_path)
    else:
        cli_message(
            f"To continue scaffolding this suite, run `jupyter notebook {notebook_path}`"
        )
def checkpoint_new(checkpoint, suite, directory, datasource, legacy):
    """Create a new checkpoint for easy deployments. (Experimental)"""
    if legacy:
        suite_name = suite
        usage_event = "cli.checkpoint.new"
        context = toolkit.load_data_context_with_error_handling(directory)
        ge_config_version = context.get_config().config_version
        if ge_config_version >= 3:
            cli_message(
                f"""<red>The `checkpoint new` CLI command is not yet implemented for Great Expectations config versions >= 3.</red>"""
            )
            toolkit.send_usage_message(context, usage_event, success=False)
            sys.exit(1)

        _verify_checkpoint_does_not_exist(context, checkpoint, usage_event)
        suite: ExpectationSuite = toolkit.load_expectation_suite(
            context, suite_name, usage_event)
        datasource = toolkit.select_datasource(context,
                                               datasource_name=datasource)
        if datasource is None:
            toolkit.send_usage_message(context, usage_event, success=False)
            sys.exit(1)
        _, _, _, batch_kwargs = toolkit.get_batch_kwargs(
            context, datasource.name)

        _ = context.add_checkpoint(
            name=checkpoint,
            **{
                "class_name":
                "LegacyCheckpoint",
                "validation_operator_name":
                "action_list_operator",
                "batches": [{
                    "batch_kwargs":
                    dict(batch_kwargs),
                    "expectation_suite_names": [suite.expectation_suite_name],
                }],
            },
        )

        cli_message(
            f"""<green>A checkpoint named `{checkpoint}` was added to your project!</green>
      - To run this checkpoint run `great_expectations checkpoint run {checkpoint}`"""
        )
        toolkit.send_usage_message(context, usage_event, success=True)
    # TODO: <Rob>Rob</Rob> Add flow for new style checkpoints
    else:
        pass
def create_empty_suite(context: DataContext, expectation_suite_name: str,
                       batch_kwargs) -> None:
    cli_message("""
Great Expectations will create a new Expectation Suite '{:s}' and store it here:

  {:s}
""".format(
        expectation_suite_name,
        context.stores[
            context.expectations_store_name].store_backend.get_url_for_key(
                ExpectationSuiteIdentifier(
                    expectation_suite_name=expectation_suite_name).to_tuple()),
    ))
    suite = context.create_expectation_suite(expectation_suite_name)
    suite.add_citation(comment="New suite added via CLI",
                       batch_kwargs=batch_kwargs)
    context.save_expectation_suite(suite, expectation_suite_name)
Beispiel #18
0
def _slack_setup(context):
    webhook_url = None
    cli_message(SLACK_SETUP_INTRO)
    if not click.confirm(SLACK_SETUP_PROMPT, default=True):
        cli_message(SLACK_LATER)
        return context
    else:
        webhook_url = click.prompt(SLACK_WEBHOOK_PROMPT, default="")

    while not is_sane_slack_webhook(webhook_url):
        cli_message("That URL was not valid.\n")
        if not click.confirm(SLACK_SETUP_PROMPT, default=True):
            cli_message(SLACK_LATER)
            return context
        webhook_url = click.prompt(SLACK_WEBHOOK_PROMPT, default="")

    context.save_config_variable("validation_notification_slack_webhook", webhook_url)
    cli_message(SLACK_SETUP_COMPLETE)

    return context
Beispiel #19
0
def project_check_config(directory):
    """Check a config for validity and help with migrations."""
    cli_message("Checking your config files for validity...\n")
    is_config_ok, error_message, context = do_config_check(directory)
    if context:
        toolkit.send_usage_message(data_context=context,
                                   event="cli.project.check_config",
                                   success=True)
    if not is_config_ok:
        cli_message("Unfortunately, your config appears to be invalid:\n")
        cli_message(f"<red>{error_message}</red>")
        sys.exit(1)

    cli_message("<green>Your config file appears valid!</green>")
Beispiel #20
0
def store_list(directory):
    """List known Stores."""
    context = toolkit.load_data_context_with_error_handling(directory)

    try:
        stores = context.list_stores()

        if len(stores) == 0:
            cli_message("No Stores found")
            toolkit.send_usage_message(data_context=context,
                                       event="cli.store.list",
                                       success=True)
            return
        elif len(stores) == 1:
            list_intro_string = "1 Store found:"
        else:
            list_intro_string = "{} Stores found:".format(len(stores))

        cli_message(list_intro_string)

        for store in stores:
            cli_message("")
            cli_message_dict(store)

        toolkit.send_usage_message(data_context=context,
                                   event="cli.store.list",
                                   success=True)
    except Exception as e:
        toolkit.send_usage_message(data_context=context,
                                   event="cli.store.list",
                                   success=False)
        raise e
Beispiel #21
0
def build_docs(context, site_name=None, view=True, assume_yes=False) -> None:
    """Build documentation in a context"""
    logger.debug("Starting cli.datasource.build_docs")

    if site_name is not None:
        site_names = [site_name]
    else:
        site_names = None
    index_page_locator_infos = context.build_data_docs(site_names=site_names,
                                                       dry_run=True)

    msg = "\nThe following Data Docs sites will be built:\n\n"
    for site_name, index_page_locator_info in index_page_locator_infos.items():
        msg += f" - <cyan>{site_name}:</cyan> "
        msg += f"{index_page_locator_info}\n"

    cli_message(msg)
    if not assume_yes:
        toolkit.confirm_proceed_or_exit()

    cli_message("\nBuilding Data Docs...\n")
    context.build_data_docs(site_names=site_names)

    cli_message("Done building Data Docs")

    if view:
        context.open_data_docs(site_name=site_name, only_if_exists=True)
Beispiel #22
0
def project_upgrade(directory):
    """Upgrade a project after installing the next Great Expectations major version."""
    cli_message("\nChecking project...")
    cli_message(SECTION_SEPARATOR)
    if load_data_context_with_error_handling(directory=directory,
                                             from_cli_upgrade_command=True):
        up_to_date_message = (
            "Your project is up-to-date - no further upgrade is necessary.\n")
        cli_message(f"<green>{up_to_date_message}</green>")
        sys.exit(0)
Beispiel #23
0
def _profile_to_create_a_suite(
    additional_batch_kwargs,
    batch_kwargs,
    batch_kwargs_generator_name,
    context,
    datasource_name,
    expectation_suite_name,
    data_asset_name,
    profiler_configuration,
):

    cli_message(
        """
Great Expectations will choose a couple of columns and generate expectations about them
to demonstrate some examples of assertions you can make about your data.

Great Expectations will store these expectations in a new Expectation Suite '{:s}' here:

  {:s}
""".format(
            expectation_suite_name,
            context.stores[
                context.expectations_store_name
            ].store_backend.get_url_for_key(
                ExpectationSuiteIdentifier(
                    expectation_suite_name=expectation_suite_name
                ).to_tuple()
            ),
        )
    )

    confirm_proceed_or_exit()

    # TODO this may not apply
    cli_message("\nGenerating example Expectation Suite...")
    run_id = datetime.datetime.now(datetime.timezone.utc).strftime("%Y%m%dT%H%M%S.%fZ")
    profiling_results = context.profile_data_asset(
        datasource_name,
        batch_kwargs_generator_name=batch_kwargs_generator_name,
        data_asset_name=data_asset_name,
        batch_kwargs=batch_kwargs,
        profiler=BasicSuiteBuilderProfiler,
        profiler_configuration=profiler_configuration,
        expectation_suite_name=expectation_suite_name,
        run_id=run_id,
        additional_batch_kwargs=additional_batch_kwargs,
    )
    if not profiling_results["success"]:
        _raise_profiling_errors(profiling_results)

    cli_message("\nDone generating example Expectation Suite")
    return profiling_results
def checkpoint_run(checkpoint, directory):
    """Run a checkpoint. (Experimental)"""
    usage_event = "cli.checkpoint.run"
    context = toolkit.load_data_context_with_error_handling(
        directory=directory, from_cli_upgrade_command=False)

    ge_config_version = context.get_config().config_version
    if ge_config_version >= 3:
        cli_message(
            f"""<red>The `checkpoint run` CLI command is not yet implemented for Great Expectations config versions >= 3.</red>"""
        )
        toolkit.send_usage_message(context, usage_event, success=False)
        sys.exit(1)

    checkpoint: Checkpoint = toolkit.load_checkpoint(
        context,
        checkpoint,
        usage_event,
    )

    try:
        results = checkpoint.run()
    except Exception as e:
        toolkit.exit_with_failure_message_and_stats(context, usage_event,
                                                    f"<red>{e}</red>")

    if not results["success"]:
        cli_message("Validation failed!")
        toolkit.send_usage_message(context, event=usage_event, success=True)
        print_validation_operator_results_details(results)
        sys.exit(1)

    cli_message("Validation succeeded!")
    toolkit.send_usage_message(context, event=usage_event, success=True)
    print_validation_operator_results_details(results)
    sys.exit(0)
Beispiel #25
0
def create_expectation_suite(
    context,
    datasource_name=None,
    batch_kwargs_generator_name=None,
    generator_asset=None,
    batch_kwargs=None,
    expectation_suite_name=None,
    additional_batch_kwargs=None,
    empty_suite=False,
    show_intro_message=False,
    flag_build_docs=True,
    open_docs=False,
    profiler_configuration="demo",
    data_asset_name=None,
):
    """
    Create a new expectation suite.

    WARNING: the flow and name of this method and its interaction with _profile_to_create_a_suite
    require a serious revisiting.
    :return: a tuple: (success, suite name, profiling_results)
    """
    if generator_asset:
        warnings.warn(
            "The 'generator_asset' argument will be deprecated and renamed to 'data_asset_name'. "
            "Please update code accordingly.",
            DeprecationWarning,
        )
        data_asset_name = generator_asset

    if show_intro_message and not empty_suite:
        cli_message(
            "\n<cyan>========== Create sample Expectations ==========</cyan>\n\n"
        )

    data_source = select_datasource(context, datasource_name=datasource_name)

    if data_source is None:
        # select_datasource takes care of displaying an error message, so all is left here is to exit.
        sys.exit(1)

    datasource_name = data_source.name

    if expectation_suite_name in context.list_expectation_suite_names():
        tell_user_suite_exists(expectation_suite_name)
        sys.exit(1)

    if (
        batch_kwargs_generator_name is None
        or data_asset_name is None
        or batch_kwargs is None
    ):
        (
            datasource_name,
            batch_kwargs_generator_name,
            data_asset_name,
            batch_kwargs,
        ) = get_batch_kwargs(
            context,
            datasource_name=datasource_name,
            batch_kwargs_generator_name=batch_kwargs_generator_name,
            data_asset_name=data_asset_name,
            additional_batch_kwargs=additional_batch_kwargs,
        )
        # In this case, we have "consumed" the additional_batch_kwargs
        additional_batch_kwargs = {}

    if expectation_suite_name is None:
        default_expectation_suite_name = _get_default_expectation_suite_name(
            batch_kwargs, data_asset_name
        )
        while True:
            expectation_suite_name = click.prompt(
                "\nName the new Expectation Suite",
                default=default_expectation_suite_name,
            )
            if expectation_suite_name in context.list_expectation_suite_names():
                tell_user_suite_exists(expectation_suite_name)
            else:
                break

    if empty_suite:
        create_empty_suite(context, expectation_suite_name, batch_kwargs)
        return True, expectation_suite_name, None

    profiling_results = _profile_to_create_a_suite(
        additional_batch_kwargs,
        batch_kwargs,
        batch_kwargs_generator_name,
        context,
        datasource_name,
        expectation_suite_name,
        data_asset_name,
        profiler_configuration,
    )

    if flag_build_docs:
        build_docs(context, view=False)
        if open_docs:
            attempt_to_open_validation_results_in_data_docs(context, profiling_results)

    return True, expectation_suite_name, profiling_results
Beispiel #26
0
def upgrade_project_one_version_increment(
    context_root_dir: str,
    ge_config_version: float,
    continuation_message: str,
    from_cli_upgrade_command: bool = False,
) -> [bool, bool]:  # Returns increment_version, exception_occurred
    upgrade_helper_class = GE_UPGRADE_HELPER_VERSION_MAP.get(int(ge_config_version))
    if not upgrade_helper_class:
        return False, False
    target_ge_config_version = int(ge_config_version) + 1
    # set version temporarily to CURRENT_GE_CONFIG_VERSION to get functional DataContext
    DataContext.set_ge_config_version(
        config_version=CURRENT_GE_CONFIG_VERSION,
        context_root_dir=context_root_dir,
    )
    upgrade_helper = upgrade_helper_class(context_root_dir=context_root_dir)
    upgrade_overview, confirmation_required = upgrade_helper.get_upgrade_overview()

    if confirmation_required or from_cli_upgrade_command:
        upgrade_confirmed = confirm_proceed_or_exit(
            confirm_prompt=upgrade_overview,
            continuation_message=continuation_message,
            exit_on_no=False,
        )
    else:
        upgrade_confirmed = True

    if upgrade_confirmed:
        cli_message("\nUpgrading project...")
        cli_message(SECTION_SEPARATOR)
        # run upgrade and get report of what was done, if version number should be incremented
        (
            upgrade_report,
            increment_version,
            exception_occurred,
        ) = upgrade_helper.upgrade_project()
        # display report to user
        cli_message(upgrade_report)
        if exception_occurred:
            # restore version number to current number
            DataContext.set_ge_config_version(
                ge_config_version, context_root_dir, validate_config_version=False
            )
            # display report to user
            return False, True
        # set config version to target version
        if increment_version:
            DataContext.set_ge_config_version(
                target_ge_config_version,
                context_root_dir,
                validate_config_version=False,
            )
            return True, False
        # restore version number to current number
        DataContext.set_ge_config_version(
            ge_config_version, context_root_dir, validate_config_version=False
        )
        return False, False

    # restore version number to current number
    DataContext.set_ge_config_version(
        ge_config_version, context_root_dir, validate_config_version=False
    )
    cli_message(continuation_message)
    sys.exit(0)
Beispiel #27
0
def _suite_edit(
    suite,
    datasource,
    directory,
    jupyter,
    batch_kwargs,
    usage_event,
    suppress_usage_message=False,
):
    # suppress_usage_message flag is for the situation where _suite_edit is called by _suite_new().
    # when called by _suite_new(), the flag will be set to False, otherwise it will default to True
    batch_kwargs_json = batch_kwargs
    batch_kwargs = None
    context = toolkit.load_data_context_with_error_handling(directory)

    try:
        suite = toolkit.load_expectation_suite(context, suite, usage_event)
        citations = suite.get_citations(require_batch_kwargs=True)

        if batch_kwargs_json:
            try:
                batch_kwargs = json.loads(batch_kwargs_json)
                if datasource:
                    batch_kwargs["datasource"] = datasource
                _batch = toolkit.load_batch(context, suite, batch_kwargs)
            except json_parse_exception as je:
                cli_message(
                    "<red>Please check that your batch_kwargs are valid JSON.\n{}</red>".format(
                        je
                    )
                )
                if not suppress_usage_message:
                    send_usage_message(
                        data_context=context,
                        event=usage_event,
                        api_version="v2",
                        success=True,
                    )
                sys.exit(1)
            except ge_exceptions.DataContextError:
                cli_message(
                    "<red>Please check that your batch_kwargs are able to load a batch.</red>"
                )
                if not suppress_usage_message:
                    send_usage_message(
                        data_context=context,
                        event=usage_event,
                        api_version="v2",
                        success=False,
                    )
                sys.exit(1)
            except ValueError as ve:
                cli_message(
                    "<red>Please check that your batch_kwargs are able to load a batch.\n{}</red>".format(
                        ve
                    )
                )
                if not suppress_usage_message:
                    send_usage_message(
                        data_context=context,
                        event=usage_event,
                        api_version="v2",
                        success=False,
                    )
                sys.exit(1)
        elif citations:
            citation = citations[-1]
            batch_kwargs = citation.get("batch_kwargs")

        if not batch_kwargs:
            cli_message(
                """
A batch of data is required to edit the suite - let's help you to specify it."""
            )

            additional_batch_kwargs = None
            try:
                data_source = toolkit.select_datasource(
                    context, datasource_name=datasource
                )
            except ValueError as ve:
                cli_message(f"<red>{ve}</red>")
                send_usage_message(
                    data_context=context,
                    event=usage_event,
                    api_version="v2",
                    success=False,
                )
                sys.exit(1)

            if not data_source:
                cli_message("<red>No datasources found in the context.</red>")
                if not suppress_usage_message:
                    send_usage_message(
                        data_context=context,
                        event=usage_event,
                        api_version="v2",
                        success=False,
                    )
                sys.exit(1)

            if batch_kwargs is None:
                (
                    datasource_name,
                    batch_kwargs_generator,
                    data_asset,
                    batch_kwargs,
                ) = get_batch_kwargs(
                    context,
                    datasource_name=data_source.name,
                    batch_kwargs_generator_name=None,
                    data_asset_name=None,
                    additional_batch_kwargs=additional_batch_kwargs,
                )

        notebook_name = f"edit_{suite.expectation_suite_name}.ipynb"
        notebook_path = _get_notebook_path(context, notebook_name)
        SuiteEditNotebookRenderer.from_data_context(context).render_to_disk(
            suite, notebook_path, batch_kwargs
        )

        if not jupyter:
            cli_message(
                f"To continue editing this suite, run <green>jupyter notebook {notebook_path}</green>"
            )

        payload = edit_expectation_suite_usage_statistics(
            data_context=context, expectation_suite_name=suite.expectation_suite_name
        )

        if not suppress_usage_message:
            send_usage_message(
                data_context=context,
                event=usage_event,
                event_payload=payload,
                api_version="v2",
                success=True,
            )

        if jupyter:
            toolkit.launch_jupyter_notebook(notebook_path)

    except Exception as e:
        send_usage_message(
            data_context=context,
            event=usage_event,
            api_version="v2",
            success=False,
        )
        raise e
Beispiel #28
0
def validation_operator_run(name, run_name, validation_config_file, suite, directory):
    # Note though the long lines here aren't pythonic, they look best if Click does the line wraps.
    """
    Run a validation operator against some data.

    There are two modes to run this command:

    1. Interactive (good for development):

        Specify the name of the validation operator using the --name argument
        and the name of the expectation suite using the --suite argument.

        The cli will help you specify the batch of data that you want to
        validate interactively.


    2. Non-interactive (good for production):

        Use the `--validation_config_file` argument to specify the path of the validation configuration JSON file. This file can be used to instruct a validation operator to validate multiple batches of data and use multiple expectation suites to validate each batch.

        Learn how to create a validation config file here:
        https://great-expectations.readthedocs.io/en/latest/command_line.html#great-expectations-validation-operator-run-validation-config-file-validation-config-file-path

        This command exits with 0 if the validation operator ran and the "success" attribute in its return object is True. Otherwise, the command exits with 1.

    To learn more about validation operators, go here:
    https://great-expectations.readthedocs.io/en/latest/features/validation.html#validation-operators
    """

    try:
        context = DataContext(directory)
    except ge_exceptions.ConfigNotFoundError as err:
        cli_message(f"Failed to process <red>{err.message}</red>")
        sys.exit(1)

    try:
        if validation_config_file is not None:
            try:
                with open(validation_config_file) as f:
                    validation_config = json.load(f)
            except (OSError, json_parse_exception) as e:
                cli_message(
                    f"Failed to process the --validation_config_file argument: <red>{e}</red>"
                )
                toolkit.send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False,
                )
                sys.exit(1)

            validation_config_error_message = _validate_valdiation_config(
                validation_config
            )
            if validation_config_error_message is not None:
                cli_message(
                    "<red>The validation config in {:s} is misconfigured: {:s}</red>".format(
                        validation_config_file, validation_config_error_message
                    )
                )
                toolkit.send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False,
                )
                sys.exit(1)

        else:
            if suite is None:
                cli_message(
                    """
Please use --suite argument to specify the name of the expectation suite.
Call `great_expectation suite list` command to list the expectation suites in your project.
"""
                )
                toolkit.send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False,
                )
                sys.exit(0)

            suite = toolkit.load_expectation_suite(
                context, suite, "cli.validation_operator.run"
            )

            if name is None:
                cli_message(
                    """
Please use --name argument to specify the name of the validation operator.
Call `great_expectation validation-operator list` command to list the operators in your project.
"""
                )
                toolkit.send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False,
                )
                sys.exit(1)
            else:
                if name not in context.list_validation_operator_names():
                    cli_message(
                        f"""
Could not find a validation operator {name}.
Call `great_expectation validation-operator list` command to list the operators in your project.
"""
                    )
                    toolkit.send_usage_message(
                        data_context=context,
                        event="cli.validation_operator.run",
                        success=False,
                    )
                    sys.exit(1)

            batch_kwargs = None

            cli_message(
                """
Let us help you specify the batch of data your want the validation operator to validate."""
            )

            try:
                data_source = toolkit.select_datasource(context)
            except ValueError as ve:
                cli_message(f"<red>{ve}</red>")
                toolkit.send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False,
                )
                sys.exit(1)

            if not data_source:
                cli_message("<red>No datasources found in the context.</red>")
                toolkit.send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False,
                )
                sys.exit(1)

            if batch_kwargs is None:
                (
                    datasource_name,
                    batch_kwargs_generator,
                    data_asset,
                    batch_kwargs,
                ) = get_batch_kwargs(
                    context,
                    datasource_name=data_source.name,
                    batch_kwargs_generator_name=None,
                    data_asset_name=None,
                    additional_batch_kwargs=None,
                )

            validation_config = {
                "validation_operator_name": name,
                "batches": [
                    {
                        "batch_kwargs": batch_kwargs,
                        "expectation_suite_names": [suite.expectation_suite_name],
                    }
                ],
            }

        try:
            validation_operator_name = validation_config["validation_operator_name"]
            batches_to_validate = []
            for entry in validation_config["batches"]:
                for expectation_suite_name in entry["expectation_suite_names"]:
                    batch = context.get_batch(
                        entry["batch_kwargs"], expectation_suite_name
                    )
                    batches_to_validate.append(batch)

            if run_name is None:
                run_name = datetime.datetime.now(datetime.timezone.utc).strftime(
                    "%Y%m%dT%H%M%S.%fZ"
                )

            run_id = RunIdentifier(run_name=run_name)

            if suite is None:
                results = context.run_validation_operator(
                    validation_operator_name,
                    assets_to_validate=batches_to_validate,
                    run_id=run_id,
                )
            else:
                if suite.evaluation_parameters is None:
                    results = context.run_validation_operator(
                        validation_operator_name,
                        assets_to_validate=batches_to_validate,
                        run_id=run_id,
                    )
                else:
                    results = context.run_validation_operator(
                        validation_operator_name,
                        assets_to_validate=batches_to_validate,
                        run_id=run_id,
                        evaluation_parameters=suite.evaluation_parameters,
                    )
        except (ge_exceptions.DataContextError, OSError, SQLAlchemyError) as e:
            cli_message(f"<red>{e}</red>")
            toolkit.send_usage_message(
                data_context=context, event="cli.validation_operator.run", success=False
            )
            sys.exit(1)

        if not results["success"]:
            cli_message("Validation failed!")
            toolkit.send_usage_message(
                data_context=context, event="cli.validation_operator.run", success=True
            )
            sys.exit(1)
        else:
            cli_message("Validation succeeded!")
            toolkit.send_usage_message(
                data_context=context, event="cli.validation_operator.run", success=True
            )
            sys.exit(0)
    except Exception as e:
        toolkit.send_usage_message(
            data_context=context, event="cli.validation_operator.run", success=False
        )
        raise e
Beispiel #29
0
def exit_with_failure_message_and_stats(
    context: DataContext, usage_event: str, message: str
) -> None:
    cli_message(message)
    send_usage_message(context, event=usage_event, success=False)
    sys.exit(1)
Beispiel #30
0
def tell_user_suite_exists(suite_name: str) -> None:
    cli_message(
        f"""<red>An expectation suite named `{suite_name}` already exists.</red>
  - If you intend to edit the suite please use `great_expectations suite edit {suite_name}`."""
    )