Esempio n. 1
0
def checkpoint_new(checkpoint, suite, directory, datasource):
    """Create a new checkpoint for easy deployments. (Experimental)"""
    suite_name = suite
    usage_event = "cli.checkpoint.new"
    context = toolkit.load_data_context_with_error_handling(directory)
    _verify_checkpoint_does_not_exist(context, checkpoint, usage_event)
    suite: ExpectationSuite = toolkit.load_expectation_suite(
        context, suite_name, usage_event)
    datasource = toolkit.select_datasource(context, datasource_name=datasource)
    if datasource is None:
        send_usage_message(context, usage_event, success=False)
        sys.exit(1)
    _, _, _, batch_kwargs = toolkit.get_batch_kwargs(context, datasource.name)

    template = _load_checkpoint_yml_template()
    # This picky update helps template comments stay in place
    template["batches"][0]["batch_kwargs"] = dict(batch_kwargs)
    template["batches"][0]["expectation_suite_names"] = [
        suite.expectation_suite_name
    ]

    checkpoint_file = _write_checkpoint_to_disk(context, template, checkpoint)
    cli_message(
        f"""<green>A checkpoint named `{checkpoint}` was added to your project!</green>
  - To edit this checkpoint edit the checkpoint file: {checkpoint_file}
  - To run this checkpoint run `great_expectations checkpoint run {checkpoint}`"""
    )
    send_usage_message(context, usage_event, success=True)
Esempio n. 2
0
def _tap_new(suite, tap_filename, directory, usage_event, datasource=None):
    context = toolkit.load_data_context_with_error_handling(directory)
    try:
        _validate_tap_filename(tap_filename)
        context_directory = context.root_directory
        datasource = _get_datasource(context, datasource)
        suite = toolkit.load_expectation_suite(context, suite, usage_event)
        _, _, _, batch_kwargs = get_batch_kwargs(context, datasource.name)

        tap_filename = _write_tap_file_to_disk(
            batch_kwargs, context_directory, suite, tap_filename
        )
        cli_message(
        f"""\
<green>A new tap has been generated!</green>
To run this tap, run: <green>python {tap_filename}</green>
You can edit this script or place this code snippet in your pipeline."""
        )
        send_usage_message(
            data_context=context,
            event=usage_event,
            success=True
        )
    except Exception as e:
        send_usage_message(
            data_context=context,
            event=usage_event,
            success=False
        )
        raise e
def checkpoint_run(checkpoint, directory):
    """Run a checkpoint. (Experimental)"""
    context = toolkit.load_data_context_with_error_handling(directory)
    usage_event = "cli.checkpoint.run"

    checkpoint_config = toolkit.load_checkpoint(context, checkpoint,
                                                usage_event)
    checkpoint_file = f"great_expectations/checkpoints/{checkpoint}.yml"

    # TODO loading batches will move into DataContext eventually
    batches_to_validate = []
    for batch in checkpoint_config["batches"]:
        _validate_at_least_one_suite_is_listed(context, batch, checkpoint_file)
        batch_kwargs = batch["batch_kwargs"]
        for suite_name in batch["expectation_suite_names"]:
            suite = toolkit.load_expectation_suite(context, suite_name,
                                                   usage_event)
            try:
                batch = toolkit.load_batch(context, suite, batch_kwargs)
            except (FileNotFoundError, SQLAlchemyError, OSError,
                    DataContextError) as e:
                toolkit.exit_with_failure_message_and_stats(
                    context,
                    usage_event,
                    f"""<red>There was a problem loading a batch:
  - Batch: {batch_kwargs}
  - {e}
  - Please verify these batch kwargs in the checkpoint file: `{checkpoint_file}`</red>""",
                )
            batches_to_validate.append(batch)
    try:
        results = context.run_validation_operator(
            checkpoint_config["validation_operator_name"],
            assets_to_validate=batches_to_validate,
            # TODO prepare for new RunID - checkpoint name and timestamp
            # run_id=RunID(checkpoint)
        )
    except DataContextError as e:
        toolkit.exit_with_failure_message_and_stats(context, usage_event,
                                                    f"<red>{e}</red>")

    if not results["success"]:
        cli_message("Validation failed!")
        send_usage_message(context, event=usage_event, success=True)
        print_validation_operator_results_details(results)
        sys.exit(1)

    cli_message("Validation succeeded!")
    send_usage_message(context, event=usage_event, success=True)
    print_validation_operator_results_details(results)
    sys.exit(0)
Esempio n. 4
0
def checkpoint_new(checkpoint, suite, directory, datasource, legacy):
    """Create a new checkpoint for easy deployments. (Experimental)"""
    if legacy:
        suite_name = suite
        usage_event = "cli.checkpoint.new"
        context = toolkit.load_data_context_with_error_handling(directory)
        ge_config_version = context.get_config().config_version
        if ge_config_version >= 3:
            cli_message(
                f"""<red>The `checkpoint new` CLI command is not yet implemented for GE config versions >= 3.</red>"""
            )
            send_usage_message(context, usage_event, success=False)
            sys.exit(1)

        _verify_checkpoint_does_not_exist(context, checkpoint, usage_event)
        suite: ExpectationSuite = toolkit.load_expectation_suite(
            context, suite_name, usage_event)
        datasource = toolkit.select_datasource(context,
                                               datasource_name=datasource)
        if datasource is None:
            send_usage_message(context, usage_event, success=False)
            sys.exit(1)
        _, _, _, batch_kwargs = toolkit.get_batch_kwargs(
            context, datasource.name)

        _ = context.add_checkpoint(
            name=checkpoint,
            **{
                "class_name":
                "LegacyCheckpoint",
                "validation_operator_name":
                "action_list_operator",
                "batches": [{
                    "batch_kwargs":
                    dict(batch_kwargs),
                    "expectation_suite_names": [suite.expectation_suite_name],
                }],
            },
        )

        cli_message(
            f"""<green>A checkpoint named `{checkpoint}` was added to your project!</green>
      - To run this checkpoint run `great_expectations checkpoint run {checkpoint}`"""
        )
        send_usage_message(context, usage_event, success=True)
    # TODO: <Rob>Rob</Rob> Add flow for new style checkpoints
    else:
        pass
def validation_operator_run(name, run_name, validation_config_file, suite,
                            directory):
    # Note though the long lines here aren't pythonic, they look best if Click does the line wraps.
    """
    Run a validation operator against some data.

    There are two modes to run this command:

    1. Interactive (good for development):

        Specify the name of the validation operator using the --name argument
        and the name of the expectation suite using the --suite argument.

        The cli will help you specify the batch of data that you want to
        validate interactively.


    2. Non-interactive (good for production):

        Use the `--validation_config_file` argument to specify the path of the validation configuration JSON file. This file can be used to instruct a validation operator to validate multiple batches of data and use multiple expectation suites to validate each batch.

        Learn how to create a validation config file here:
        https://great-expectations.readthedocs.io/en/latest/command_line.html#great-expectations-validation-operator-run-validation-config-file-validation-config-file-path

        This command exits with 0 if the validation operator ran and the "success" attribute in its return object is True. Otherwise, the command exits with 1.

    To learn more about validation operators, go here:
    https://great-expectations.readthedocs.io/en/latest/features/validation.html#validation-operators
    """

    try:
        context = DataContext(directory)
    except ge_exceptions.ConfigNotFoundError as err:
        cli_message("Failed to process <red>{}</red>".format(err.message))
        sys.exit(1)

    try:
        if validation_config_file is not None:
            try:
                with open(validation_config_file) as f:
                    validation_config = json.load(f)
            except (OSError, json_parse_exception) as e:
                cli_message(
                    f"Failed to process the --validation_config_file argument: <red>{e}</red>"
                )
                send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False,
                )
                sys.exit(1)

            validation_config_error_message = _validate_valdiation_config(
                validation_config)
            if validation_config_error_message is not None:
                cli_message(
                    "<red>The validation config in {:s} is misconfigured: {:s}</red>"
                    .format(validation_config_file,
                            validation_config_error_message))
                send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False,
                )
                sys.exit(1)

        else:
            if suite is None:
                cli_message("""
Please use --suite argument to specify the name of the expectation suite.
Call `great_expectation suite list` command to list the expectation suites in your project.
""")
                send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False,
                )
                sys.exit(0)

            suite = toolkit.load_expectation_suite(
                context, suite, "cli.validation_operator.run")

            if name is None:
                cli_message("""
Please use --name argument to specify the name of the validation operator.
Call `great_expectation validation-operator list` command to list the operators in your project.
""")
                send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False,
                )
                sys.exit(1)
            else:
                if name not in context.list_validation_operator_names():
                    cli_message(f"""
Could not find a validation operator {name}.
Call `great_expectation validation-operator list` command to list the operators in your project.
""")
                    send_usage_message(
                        data_context=context,
                        event="cli.validation_operator.run",
                        success=False,
                    )
                    sys.exit(1)

            batch_kwargs = None

            cli_message("""
Let us help you specify the batch of data your want the validation operator to validate."""
                        )

            try:
                data_source = toolkit.select_datasource(context)
            except ValueError as ve:
                cli_message("<red>{}</red>".format(ve))
                send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False,
                )
                sys.exit(1)

            if not data_source:
                cli_message("<red>No datasources found in the context.</red>")
                send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False,
                )
                sys.exit(1)

            if batch_kwargs is None:
                (
                    datasource_name,
                    batch_kwargs_generator,
                    data_asset,
                    batch_kwargs,
                ) = get_batch_kwargs(
                    context,
                    datasource_name=data_source.name,
                    batch_kwargs_generator_name=None,
                    data_asset_name=None,
                    additional_batch_kwargs=None,
                )

            validation_config = {
                "validation_operator_name":
                name,
                "batches": [{
                    "batch_kwargs":
                    batch_kwargs,
                    "expectation_suite_names": [suite.expectation_suite_name],
                }],
            }

        try:
            validation_operator_name = validation_config[
                "validation_operator_name"]
            batches_to_validate = []
            for entry in validation_config["batches"]:
                for expectation_suite_name in entry["expectation_suite_names"]:
                    batch = context.get_batch(entry["batch_kwargs"],
                                              expectation_suite_name)
                    batches_to_validate.append(batch)

            if run_name is None:
                run_name = datetime.datetime.now(
                    datetime.timezone.utc).strftime("%Y%m%dT%H%M%S.%fZ")

            run_id = RunIdentifier(run_name=run_name)

            if suite is None:
                results = context.run_validation_operator(
                    validation_operator_name,
                    assets_to_validate=batches_to_validate,
                    run_id=run_id,
                )
            else:
                if suite.evaluation_parameters is None:
                    results = context.run_validation_operator(
                        validation_operator_name,
                        assets_to_validate=batches_to_validate,
                        run_id=run_id,
                    )
                else:
                    results = context.run_validation_operator(
                        validation_operator_name,
                        assets_to_validate=batches_to_validate,
                        run_id=run_id,
                        evaluation_parameters=suite.evaluation_parameters,
                    )
        except (ge_exceptions.DataContextError, OSError, SQLAlchemyError) as e:
            cli_message("<red>{}</red>".format(e))
            send_usage_message(data_context=context,
                               event="cli.validation_operator.run",
                               success=False)
            sys.exit(1)

        if not results["success"]:
            cli_message("Validation failed!")
            send_usage_message(data_context=context,
                               event="cli.validation_operator.run",
                               success=True)
            sys.exit(1)
        else:
            cli_message("Validation succeeded!")
            send_usage_message(data_context=context,
                               event="cli.validation_operator.run",
                               success=True)
            sys.exit(0)
    except Exception as e:
        send_usage_message(data_context=context,
                           event="cli.validation_operator.run",
                           success=False)
        raise e
Esempio n. 6
0
def _suite_edit(suite, datasource, directory, jupyter, batch_kwargs, usage_event):
    batch_kwargs_json = batch_kwargs
    batch_kwargs = None
    context = toolkit.load_data_context_with_error_handling(directory)

    try:
        suite = toolkit.load_expectation_suite(context, suite, usage_event)
        citations = suite.get_citations(require_batch_kwargs=True)

        if batch_kwargs_json:
            try:
                batch_kwargs = json.loads(batch_kwargs_json)
                if datasource:
                    batch_kwargs["datasource"] = datasource
                _batch = toolkit.load_batch(context, suite, batch_kwargs)
            except json_parse_exception as je:
                cli_message(
                    "<red>Please check that your batch_kwargs are valid JSON.\n{}</red>".format(
                        je
                    )
                )
                send_usage_message(
                    data_context=context, event=usage_event, success=False
                )
                sys.exit(1)
            except ge_exceptions.DataContextError:
                cli_message(
                    "<red>Please check that your batch_kwargs are able to load a batch.</red>"
                )
                send_usage_message(
                    data_context=context, event=usage_event, success=False
                )
                sys.exit(1)
            except ValueError as ve:
                cli_message(
                    "<red>Please check that your batch_kwargs are able to load a batch.\n{}</red>".format(
                        ve
                    )
                )
                send_usage_message(
                    data_context=context, event=usage_event, success=False
                )
                sys.exit(1)
        elif citations:
            citation = citations[-1]
            batch_kwargs = citation.get("batch_kwargs")

        if not batch_kwargs:
            cli_message(
                """
A batch of data is required to edit the suite - let's help you to specify it."""
            )

            additional_batch_kwargs = None
            try:
                data_source = toolkit.select_datasource(context, datasource_name=datasource)
            except ValueError as ve:
                cli_message("<red>{}</red>".format(ve))
                send_usage_message(
                    data_context=context, event=usage_event, success=False
                )
                sys.exit(1)

            if not data_source:
                cli_message("<red>No datasources found in the context.</red>")
                send_usage_message(
                    data_context=context, event=usage_event, success=False
                )
                sys.exit(1)

            if batch_kwargs is None:
                (
                    datasource_name,
                    batch_kwargs_generator,
                    data_asset,
                    batch_kwargs,
                ) = get_batch_kwargs(context, datasource_name=data_source.name,
                                     additional_batch_kwargs=additional_batch_kwargs)

        notebook_name = "edit_{}.ipynb".format(suite.expectation_suite_name)
        notebook_path = _get_notebook_path(context, notebook_name)
        SuiteEditNotebookRenderer().render_to_disk(suite, notebook_path, batch_kwargs)

        if not jupyter:
            cli_message(
                f"To continue editing this suite, run <green>jupyter notebook {notebook_path}</green>"
            )

        payload = edit_expectation_suite_usage_statistics(
            data_context=context, expectation_suite_name=suite.expectation_suite_name
        )

        send_usage_message(
            data_context=context, event=usage_event, event_payload=payload, success=True
        )

        if jupyter:
            toolkit.launch_jupyter_notebook(notebook_path)

    except Exception as e:
        send_usage_message(data_context=context, event=usage_event, success=False)
        raise e
Esempio n. 7
0
def _suite_edit_workflow(
    context: DataContext,
    expectation_suite_name: str,
    profile: bool,
    usage_event: str,
    interactive: bool,
    no_jupyter: bool,
    create_if_not_exist: Optional[bool] = False,
    datasource_name: Optional[str] = None,
    batch_request: Optional[Union[str, Dict[str, Union[str, int,
                                                       Dict[str,
                                                            Any]]]]] = None,
    additional_batch_request_args: Optional[Dict[str,
                                                 Union[str, int,
                                                       Dict[str,
                                                            Any]]]] = None,
    suppress_usage_message: Optional[bool] = False,
    assume_yes: Optional[bool] = False,
):
    # suppress_usage_message flag is for the situation where _suite_edit_workflow is called by _suite_new_workflow().
    # when called by _suite_new_workflow(), the flag will be set to True, otherwise it will default to False
    if suppress_usage_message:
        usage_event = None

    suite: ExpectationSuite = toolkit.load_expectation_suite(
        data_context=context,
        expectation_suite_name=expectation_suite_name,
        usage_event=usage_event,
        create_if_not_exist=create_if_not_exist,
    )

    try:
        if interactive or profile:
            batch_request_from_citation_is_up_to_date: bool = True

            batch_request_from_citation: Optional[Union[str, Dict[str, Union[
                str, Dict[str,
                          Any]]]]] = toolkit.get_batch_request_from_citations(
                              expectation_suite=suite)

            if batch_request is not None and isinstance(batch_request, str):
                batch_request = toolkit.get_batch_request_from_json_file(
                    batch_request_json_file_path=batch_request,
                    data_context=context,
                    usage_event=usage_event,
                    suppress_usage_message=suppress_usage_message,
                )
                if batch_request != batch_request_from_citation:
                    batch_request_from_citation_is_up_to_date = False

            if not (batch_request and isinstance(batch_request, dict)
                    and BatchRequest(**batch_request)):
                if (batch_request_from_citation
                        and isinstance(batch_request_from_citation, dict)
                        and BatchRequest(**batch_request_from_citation)):
                    batch_request = copy.deepcopy(batch_request_from_citation)
                else:
                    batch_request = toolkit.get_batch_request_using_datasource_name(
                        data_context=context,
                        datasource_name=datasource_name,
                        usage_event=usage_event,
                        suppress_usage_message=False,
                        additional_batch_request_args=
                        additional_batch_request_args,
                    )
                    if batch_request != batch_request_from_citation:
                        batch_request_from_citation_is_up_to_date = False

            if not batch_request_from_citation_is_up_to_date:
                toolkit.add_citation_with_batch_request(
                    data_context=context,
                    expectation_suite=suite,
                    batch_request=batch_request,
                )

        notebook_name: str = "edit_{}.ipynb".format(expectation_suite_name)
        notebook_path: str = _get_notebook_path(context, notebook_name)

        if profile:
            if not assume_yes:
                toolkit.prompt_profile_to_create_a_suite(
                    data_context=context,
                    expectation_suite_name=expectation_suite_name)

            renderer: SuiteProfileNotebookRenderer = SuiteProfileNotebookRenderer(
                context=context,
                expectation_suite_name=expectation_suite_name,
                batch_request=batch_request,
            )
            renderer.render_to_disk(notebook_file_path=notebook_path)
        else:
            SuiteEditNotebookRenderer.from_data_context(
                data_context=context).render_to_disk(
                    suite=suite,
                    notebook_file_path=notebook_path,
                    batch_request=batch_request,
                )

        if no_jupyter:
            cli_message(
                string=
                f"To continue editing this suite, run <green>jupyter notebook {notebook_path}</green>"
            )
        else:
            cli_message(
                string=
                """<green>Opening a notebook for you now to edit your expectation suite!
If you wish to avoid this you can add the `--no-jupyter` flag.</green>\n\n""")

        payload: dict = edit_expectation_suite_usage_statistics(
            data_context=context,
            expectation_suite_name=suite.expectation_suite_name)

        if not suppress_usage_message:
            toolkit.send_usage_message(
                data_context=context,
                event=usage_event,
                event_payload=payload,
                success=True,
            )

        if not no_jupyter:
            toolkit.launch_jupyter_notebook(notebook_path=notebook_path)

    except (
            ge_exceptions.DataContextError,
            ge_exceptions.ProfilerError,
            ValueError,
            OSError,
            SQLAlchemyError,
    ) as e:
        cli_message(string="<red>{}</red>".format(e))
        if not suppress_usage_message:
            toolkit.send_usage_message(data_context=context,
                                       event=usage_event,
                                       success=False)
        sys.exit(1)

    except Exception as e:
        if not suppress_usage_message:
            toolkit.send_usage_message(data_context=context,
                                       event=usage_event,
                                       success=False)
        raise e