def test_get_batch_kwargs_for_specific_dataasset(empty_data_context,
                                                 filesystem_csv):
    project_root_dir = empty_data_context.root_directory
    context = DataContext(project_root_dir)
    base_directory = str(filesystem_csv)

    context.add_datasource(
        "wow_a_datasource",
        module_name="great_expectations.datasource",
        class_name="PandasDatasource",
        batch_kwargs_generators={
            "subdir_reader": {
                "class_name": "SubdirReaderBatchKwargsGenerator",
                "base_directory": base_directory,
            }
        },
    )

    batch = get_batch_kwargs(
        context,
        datasource_name=None,
        batch_kwargs_generator_name=None,
        data_asset_name="f1",
        additional_batch_kwargs={},
    )

    expected_batch = {
        "data_asset_name": "f1",
        "datasource": "wow_a_datasource",
        "path": os.path.join(filesystem_csv, "f1.csv"),
    }
    assert batch == expected_batch
Esempio n. 2
0
def _suite_scaffold(suite: str, directory: str, jupyter: bool) -> None:
    usage_event = "cli.suite.scaffold"
    suite_name = suite
    context = toolkit.load_data_context_with_error_handling(directory)
    notebook_filename = f"scaffold_{suite_name}.ipynb"
    notebook_path = _get_notebook_path(context, notebook_filename)

    if suite_name in context.list_expectation_suite_names():
        toolkit.tell_user_suite_exists(suite_name)
        if os.path.isfile(notebook_path):
            cli_message(
                f"  - If you wish to adjust your scaffolding, you can open this notebook with jupyter: `{notebook_path}` <red>(Please note that if you run that notebook, you will overwrite your existing suite.)</red>"
            )
        send_usage_message(
            data_context=context,
            event=usage_event,
            api_version="v2",
            success=False,
        )
        sys.exit(1)

    datasource = toolkit.select_datasource(context)
    if datasource is None:
        send_usage_message(
            data_context=context,
            event=usage_event,
            api_version="v2",
            success=False,
        )
        sys.exit(1)

    _suite = context.create_expectation_suite(suite_name)
    _, _, _, batch_kwargs = get_batch_kwargs(context,
                                             datasource_name=datasource.name)
    renderer = SuiteScaffoldNotebookRenderer(context, _suite, batch_kwargs)
    renderer.render_to_disk(notebook_path)

    send_usage_message(
        data_context=context,
        event=usage_event,
        api_version="v2",
        success=True,
    )

    if jupyter:
        toolkit.launch_jupyter_notebook(notebook_path)
    else:
        cli_message(
            f"To continue scaffolding this suite, run `jupyter notebook {notebook_path}`"
        )
Esempio n. 3
0
def create_expectation_suite(
    context,
    datasource_name=None,
    batch_kwargs_generator_name=None,
    generator_asset=None,
    batch_kwargs=None,
    expectation_suite_name=None,
    additional_batch_kwargs=None,
    empty_suite=False,
    show_intro_message=False,
    flag_build_docs=True,
    open_docs=False,
    profiler_configuration="demo",
    data_asset_name=None,
):
    """
    Create a new expectation suite.

    WARNING: the flow and name of this method and its interaction with _profile_to_create_a_suite
    require a serious revisiting.
    :return: a tuple: (success, suite name, profiling_results)
    """
    if generator_asset:
        warnings.warn(
            "The 'generator_asset' argument will be deprecated and renamed to 'data_asset_name'. "
            "Please update code accordingly.",
            DeprecationWarning,
        )
        data_asset_name = generator_asset

    if show_intro_message and not empty_suite:
        cli_message(
            "\n<cyan>========== Create sample Expectations ==========</cyan>\n\n"
        )

    data_source = select_datasource(context, datasource_name=datasource_name)

    if data_source is None:
        # select_datasource takes care of displaying an error message, so all is left here is to exit.
        sys.exit(1)

    datasource_name = data_source.name

    if expectation_suite_name in context.list_expectation_suite_names():
        tell_user_suite_exists(expectation_suite_name)
        sys.exit(1)

    if (
        batch_kwargs_generator_name is None
        or data_asset_name is None
        or batch_kwargs is None
    ):
        (
            datasource_name,
            batch_kwargs_generator_name,
            data_asset_name,
            batch_kwargs,
        ) = get_batch_kwargs(
            context,
            datasource_name=datasource_name,
            batch_kwargs_generator_name=batch_kwargs_generator_name,
            data_asset_name=data_asset_name,
            additional_batch_kwargs=additional_batch_kwargs,
        )
        # In this case, we have "consumed" the additional_batch_kwargs
        additional_batch_kwargs = {}

    if expectation_suite_name is None:
        default_expectation_suite_name = _get_default_expectation_suite_name(
            batch_kwargs, data_asset_name
        )
        while True:
            expectation_suite_name = click.prompt(
                "\nName the new Expectation Suite",
                default=default_expectation_suite_name,
            )
            if expectation_suite_name in context.list_expectation_suite_names():
                tell_user_suite_exists(expectation_suite_name)
            else:
                break

    if empty_suite:
        create_empty_suite(context, expectation_suite_name, batch_kwargs)
        return True, expectation_suite_name, None

    profiling_results = _profile_to_create_a_suite(
        additional_batch_kwargs,
        batch_kwargs,
        batch_kwargs_generator_name,
        context,
        datasource_name,
        expectation_suite_name,
        data_asset_name,
        profiler_configuration,
    )

    if flag_build_docs:
        build_docs(context, view=False)
        if open_docs:
            attempt_to_open_validation_results_in_data_docs(context, profiling_results)

    return True, expectation_suite_name, profiling_results
Esempio n. 4
0
def validation_operator_run(name, run_name, validation_config_file, suite, directory):
    # Note though the long lines here aren't pythonic, they look best if Click does the line wraps.
    """
    Run a validation operator against some data.

    There are two modes to run this command:

    1. Interactive (good for development):

        Specify the name of the validation operator using the --name argument
        and the name of the expectation suite using the --suite argument.

        The cli will help you specify the batch of data that you want to
        validate interactively.


    2. Non-interactive (good for production):

        Use the `--validation_config_file` argument to specify the path of the validation configuration JSON file. This file can be used to instruct a validation operator to validate multiple batches of data and use multiple expectation suites to validate each batch.

        Learn how to create a validation config file here:
        https://great-expectations.readthedocs.io/en/latest/command_line.html#great-expectations-validation-operator-run-validation-config-file-validation-config-file-path

        This command exits with 0 if the validation operator ran and the "success" attribute in its return object is True. Otherwise, the command exits with 1.

    To learn more about validation operators, go here:
    https://great-expectations.readthedocs.io/en/latest/features/validation.html#validation-operators
    """

    try:
        context = DataContext(directory)
    except ge_exceptions.ConfigNotFoundError as err:
        cli_message(f"Failed to process <red>{err.message}</red>")
        sys.exit(1)

    try:
        if validation_config_file is not None:
            try:
                with open(validation_config_file) as f:
                    validation_config = json.load(f)
            except (OSError, json_parse_exception) as e:
                cli_message(
                    f"Failed to process the --validation_config_file argument: <red>{e}</red>"
                )
                toolkit.send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False,
                )
                sys.exit(1)

            validation_config_error_message = _validate_valdiation_config(
                validation_config
            )
            if validation_config_error_message is not None:
                cli_message(
                    "<red>The validation config in {:s} is misconfigured: {:s}</red>".format(
                        validation_config_file, validation_config_error_message
                    )
                )
                toolkit.send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False,
                )
                sys.exit(1)

        else:
            if suite is None:
                cli_message(
                    """
Please use --suite argument to specify the name of the expectation suite.
Call `great_expectation suite list` command to list the expectation suites in your project.
"""
                )
                toolkit.send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False,
                )
                sys.exit(0)

            suite = toolkit.load_expectation_suite(
                context, suite, "cli.validation_operator.run"
            )

            if name is None:
                cli_message(
                    """
Please use --name argument to specify the name of the validation operator.
Call `great_expectation validation-operator list` command to list the operators in your project.
"""
                )
                toolkit.send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False,
                )
                sys.exit(1)
            else:
                if name not in context.list_validation_operator_names():
                    cli_message(
                        f"""
Could not find a validation operator {name}.
Call `great_expectation validation-operator list` command to list the operators in your project.
"""
                    )
                    toolkit.send_usage_message(
                        data_context=context,
                        event="cli.validation_operator.run",
                        success=False,
                    )
                    sys.exit(1)

            batch_kwargs = None

            cli_message(
                """
Let us help you specify the batch of data your want the validation operator to validate."""
            )

            try:
                data_source = toolkit.select_datasource(context)
            except ValueError as ve:
                cli_message(f"<red>{ve}</red>")
                toolkit.send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False,
                )
                sys.exit(1)

            if not data_source:
                cli_message("<red>No datasources found in the context.</red>")
                toolkit.send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False,
                )
                sys.exit(1)

            if batch_kwargs is None:
                (
                    datasource_name,
                    batch_kwargs_generator,
                    data_asset,
                    batch_kwargs,
                ) = get_batch_kwargs(
                    context,
                    datasource_name=data_source.name,
                    batch_kwargs_generator_name=None,
                    data_asset_name=None,
                    additional_batch_kwargs=None,
                )

            validation_config = {
                "validation_operator_name": name,
                "batches": [
                    {
                        "batch_kwargs": batch_kwargs,
                        "expectation_suite_names": [suite.expectation_suite_name],
                    }
                ],
            }

        try:
            validation_operator_name = validation_config["validation_operator_name"]
            batches_to_validate = []
            for entry in validation_config["batches"]:
                for expectation_suite_name in entry["expectation_suite_names"]:
                    batch = context.get_batch(
                        entry["batch_kwargs"], expectation_suite_name
                    )
                    batches_to_validate.append(batch)

            if run_name is None:
                run_name = datetime.datetime.now(datetime.timezone.utc).strftime(
                    "%Y%m%dT%H%M%S.%fZ"
                )

            run_id = RunIdentifier(run_name=run_name)

            if suite is None:
                results = context.run_validation_operator(
                    validation_operator_name,
                    assets_to_validate=batches_to_validate,
                    run_id=run_id,
                )
            else:
                if suite.evaluation_parameters is None:
                    results = context.run_validation_operator(
                        validation_operator_name,
                        assets_to_validate=batches_to_validate,
                        run_id=run_id,
                    )
                else:
                    results = context.run_validation_operator(
                        validation_operator_name,
                        assets_to_validate=batches_to_validate,
                        run_id=run_id,
                        evaluation_parameters=suite.evaluation_parameters,
                    )
        except (ge_exceptions.DataContextError, OSError, SQLAlchemyError) as e:
            cli_message(f"<red>{e}</red>")
            toolkit.send_usage_message(
                data_context=context, event="cli.validation_operator.run", success=False
            )
            sys.exit(1)

        if not results["success"]:
            cli_message("Validation failed!")
            toolkit.send_usage_message(
                data_context=context, event="cli.validation_operator.run", success=True
            )
            sys.exit(1)
        else:
            cli_message("Validation succeeded!")
            toolkit.send_usage_message(
                data_context=context, event="cli.validation_operator.run", success=True
            )
            sys.exit(0)
    except Exception as e:
        toolkit.send_usage_message(
            data_context=context, event="cli.validation_operator.run", success=False
        )
        raise e
Esempio n. 5
0
def _suite_edit(
    suite,
    datasource,
    directory,
    jupyter,
    batch_kwargs,
    usage_event,
    suppress_usage_message=False,
):
    # suppress_usage_message flag is for the situation where _suite_edit is called by _suite_new().
    # when called by _suite_new(), the flag will be set to False, otherwise it will default to True
    batch_kwargs_json = batch_kwargs
    batch_kwargs = None
    context = toolkit.load_data_context_with_error_handling(directory)

    try:
        suite = toolkit.load_expectation_suite(context, suite, usage_event)
        citations = suite.get_citations(require_batch_kwargs=True)

        if batch_kwargs_json:
            try:
                batch_kwargs = json.loads(batch_kwargs_json)
                if datasource:
                    batch_kwargs["datasource"] = datasource
                _batch = toolkit.load_batch(context, suite, batch_kwargs)
            except json_parse_exception as je:
                cli_message(
                    "<red>Please check that your batch_kwargs are valid JSON.\n{}</red>".format(
                        je
                    )
                )
                if not suppress_usage_message:
                    send_usage_message(
                        data_context=context,
                        event=usage_event,
                        api_version="v2",
                        success=True,
                    )
                sys.exit(1)
            except ge_exceptions.DataContextError:
                cli_message(
                    "<red>Please check that your batch_kwargs are able to load a batch.</red>"
                )
                if not suppress_usage_message:
                    send_usage_message(
                        data_context=context,
                        event=usage_event,
                        api_version="v2",
                        success=False,
                    )
                sys.exit(1)
            except ValueError as ve:
                cli_message(
                    "<red>Please check that your batch_kwargs are able to load a batch.\n{}</red>".format(
                        ve
                    )
                )
                if not suppress_usage_message:
                    send_usage_message(
                        data_context=context,
                        event=usage_event,
                        api_version="v2",
                        success=False,
                    )
                sys.exit(1)
        elif citations:
            citation = citations[-1]
            batch_kwargs = citation.get("batch_kwargs")

        if not batch_kwargs:
            cli_message(
                """
A batch of data is required to edit the suite - let's help you to specify it."""
            )

            additional_batch_kwargs = None
            try:
                data_source = toolkit.select_datasource(
                    context, datasource_name=datasource
                )
            except ValueError as ve:
                cli_message(f"<red>{ve}</red>")
                send_usage_message(
                    data_context=context,
                    event=usage_event,
                    api_version="v2",
                    success=False,
                )
                sys.exit(1)

            if not data_source:
                cli_message("<red>No datasources found in the context.</red>")
                if not suppress_usage_message:
                    send_usage_message(
                        data_context=context,
                        event=usage_event,
                        api_version="v2",
                        success=False,
                    )
                sys.exit(1)

            if batch_kwargs is None:
                (
                    datasource_name,
                    batch_kwargs_generator,
                    data_asset,
                    batch_kwargs,
                ) = get_batch_kwargs(
                    context,
                    datasource_name=data_source.name,
                    batch_kwargs_generator_name=None,
                    data_asset_name=None,
                    additional_batch_kwargs=additional_batch_kwargs,
                )

        notebook_name = f"edit_{suite.expectation_suite_name}.ipynb"
        notebook_path = _get_notebook_path(context, notebook_name)
        SuiteEditNotebookRenderer.from_data_context(context).render_to_disk(
            suite, notebook_path, batch_kwargs
        )

        if not jupyter:
            cli_message(
                f"To continue editing this suite, run <green>jupyter notebook {notebook_path}</green>"
            )

        payload = edit_expectation_suite_usage_statistics(
            data_context=context, expectation_suite_name=suite.expectation_suite_name
        )

        if not suppress_usage_message:
            send_usage_message(
                data_context=context,
                event=usage_event,
                event_payload=payload,
                api_version="v2",
                success=True,
            )

        if jupyter:
            toolkit.launch_jupyter_notebook(notebook_path)

    except Exception as e:
        send_usage_message(
            data_context=context,
            event=usage_event,
            api_version="v2",
            success=False,
        )
        raise e