Python select_datasource Examples, great_expectations.cli.datasource.select_datasource Python Examples

Example #1

0

Show file

File: suite.py Project: bernardofrassy/great_expectations

def _suite_edit(suite, datasource, directory, jupyter, batch_kwargs, usage_event):
    batch_kwargs_json = batch_kwargs
    batch_kwargs = None
    context = load_data_context_with_error_handling(directory)

    try:
        suite = load_expectation_suite(context, suite)
        citations = suite.get_citations(require_batch_kwargs=True)

        if batch_kwargs_json:
            try:
                batch_kwargs = json.loads(batch_kwargs_json)
                if datasource:
                    batch_kwargs["datasource"] = datasource
                _batch = toolkit.load_batch(context, suite, batch_kwargs)
            except json_parse_exception as je:
                cli_message(
                    "<red>Please check that your batch_kwargs are valid JSON.\n{}</red>".format(
                        je
                    )
                )
                send_usage_message(
                    data_context=context, event=usage_event, success=False
                )
                sys.exit(1)
            except ge_exceptions.DataContextError:
                cli_message(
                    "<red>Please check that your batch_kwargs are able to load a batch.</red>"
                )
                send_usage_message(
                    data_context=context, event=usage_event, success=False
                )
                sys.exit(1)
            except ValueError as ve:
                cli_message(
                    "<red>Please check that your batch_kwargs are able to load a batch.\n{}</red>".format(
                        ve
                    )
                )
                send_usage_message(
                    data_context=context, event=usage_event, success=False
                )
                sys.exit(1)
        elif citations:
            citation = citations[-1]
            batch_kwargs = citation.get("batch_kwargs")

        if not batch_kwargs:
            cli_message(
                """
A batch of data is required to edit the suite - let's help you to specify it."""
            )

            additional_batch_kwargs = None
            try:
                data_source = select_datasource(context, datasource_name=datasource)
            except ValueError as ve:
                cli_message("<red>{}</red>".format(ve))
                send_usage_message(
                    data_context=context, event=usage_event, success=False
                )
                sys.exit(1)

            if not data_source:
                cli_message("<red>No datasources found in the context.</red>")
                send_usage_message(
                    data_context=context, event=usage_event, success=False
                )
                sys.exit(1)

            if batch_kwargs is None:
                (
                    datasource_name,
                    batch_kwargs_generator,
                    data_asset,
                    batch_kwargs,
                ) = get_batch_kwargs(context, datasource_name=data_source.name,
                                     additional_batch_kwargs=additional_batch_kwargs)

        notebook_name = "edit_{}.ipynb".format(suite.expectation_suite_name)
        notebook_path = _get_notebook_path(context, notebook_name)
        SuiteEditNotebookRenderer().render_to_disk(suite, notebook_path, batch_kwargs)

        if not jupyter:
            cli_message(
                f"To continue editing this suite, run <green>jupyter notebook {notebook_path}</green>"
            )

        payload = edit_expectation_suite_usage_statistics(
            data_context=context, expectation_suite_name=suite.expectation_suite_name
        )

        send_usage_message(
            data_context=context, event=usage_event, event_payload=payload, success=True
        )

        if jupyter:
            toolkit.launch_jupyter_notebook(notebook_path)

    except Exception as e:
        send_usage_message(data_context=context, event=usage_event, success=False)
        raise e

Example #2

0

Show file

File: tap.py Project: tsanikgr/great_expectations

def _get_datasource(context, datasource):
    datasource = select_datasource(context, datasource_name=datasource)
    if not datasource:
        cli_message("<red>No datasources found in the context.</red>")
        sys.exit(1)
    return datasource

Example #3

0

Show file

File: toolkit.py Project: mryanclark/great_expectations

def create_expectation_suite(
    context,
    datasource_name=None,
    batch_kwargs_generator_name=None,
    generator_asset=None,
    batch_kwargs=None,
    expectation_suite_name=None,
    additional_batch_kwargs=None,
    empty_suite=False,
    show_intro_message=False,
    open_docs=False,
    profiler_configuration="demo",
):
    """
    Create a new expectation suite.

    :return: a tuple: (success, suite name)
    """
    if show_intro_message and not empty_suite:
        cli_message(
            "\n<cyan>========== Create sample Expectations ==========</cyan>\n\n"
        )

    data_source = select_datasource(context, datasource_name=datasource_name)
    if data_source is None:
        # select_datasource takes care of displaying an error message, so all is left here is to exit.
        sys.exit(1)

    datasource_name = data_source.name

    if expectation_suite_name in context.list_expectation_suite_names():
        tell_user_suite_exists(expectation_suite_name)
        sys.exit(1)

    if (batch_kwargs_generator_name is None or generator_asset is None
            or batch_kwargs is None):
        (
            datasource_name,
            batch_kwargs_generator_name,
            generator_asset,
            batch_kwargs,
        ) = get_batch_kwargs(
            context,
            datasource_name=datasource_name,
            batch_kwargs_generator_name=batch_kwargs_generator_name,
            generator_asset=generator_asset,
            additional_batch_kwargs=additional_batch_kwargs,
        )
        # In this case, we have "consumed" the additional_batch_kwargs
        additional_batch_kwargs = {}

    if expectation_suite_name is None:
        default_expectation_suite_name = _get_default_expectation_suite_name(
            batch_kwargs, generator_asset)
        while True:
            expectation_suite_name = click.prompt(
                "\nName the new expectation suite",
                default=default_expectation_suite_name)
            if expectation_suite_name in context.list_expectation_suite_names(
            ):
                tell_user_suite_exists(expectation_suite_name)
            else:
                break

    if empty_suite:
        create_empty_suite(context, expectation_suite_name, batch_kwargs)
        return True, expectation_suite_name

    profiling_results = _profile_to_create_a_suite(
        additional_batch_kwargs,
        batch_kwargs,
        batch_kwargs_generator_name,
        context,
        datasource_name,
        expectation_suite_name,
        generator_asset,
        profiler_configuration,
    )

    build_docs(context, view=False)
    if open_docs:
        _attempt_to_open_validation_results_in_data_docs(
            context, profiling_results)

    return True, expectation_suite_name

Example #4

0

Show file

File: suite.py Project: rlshuhart/great_expectations

def suite_edit(suite, datasource, directory, jupyter, batch_kwargs):
    """
    Generate a Jupyter notebook for editing an existing expectation suite.

    The SUITE argument is required. This is the name you gave to the suite
    when you created it.

    A batch of data is required to edit the suite, which is used as a sample.

    The edit command will help you specify a batch interactively. Or you can
    specify them manually by providing --batch-kwargs in valid JSON format.

    Read more about specifying batches of data in the documentation: https://docs.greatexpectations.io/
    """
    try:
        context = DataContext(directory)
    except ge_exceptions.ConfigNotFoundError as err:
        cli_message("<red>{}</red>".format(err.message))
        return
    except ge_exceptions.ZeroDotSevenConfigVersionError as err:
        _offer_to_install_new_template(err, context.root_directory)
        return

    suite = _load_suite(context, suite)

    if batch_kwargs:
        try:
            batch_kwargs = json.loads(batch_kwargs)
            if datasource:
                batch_kwargs["datasource"] = datasource
            _batch = context.get_batch(batch_kwargs, suite.expectation_suite_name)
            assert isinstance(_batch, DataAsset)
        except json_parse_exception as je:
            cli_message("<red>Please check that your batch_kwargs are valid JSON.\n{}</red>".format(je))
            sys.exit(1)
        except ge_exceptions.DataContextError:
            cli_message("<red>Please check that your batch_kwargs are able to load a batch.</red>")
            sys.exit(1)
        except ValueError as ve:
            cli_message("<red>Please check that your batch_kwargs are able to load a batch.\n{}</red>".format(ve))
            sys.exit(1)
    else:
        cli_message("""
A batch of data is required to edit the suite - let's help you to specify it."""
        )

        additional_batch_kwargs = None
        try:
            data_source = select_datasource(context, datasource_name=datasource)
        except ValueError as ve:
            cli_message("<red>{}</red>".format(ve))
            sys.exit(1)

        if not data_source:
            cli_message("<red>No datasources found in the context.</red>")
            sys.exit(1)

        if batch_kwargs is None:
            datasource_name, batch_kwarg_generator, data_asset, batch_kwargs = get_batch_kwargs(
                context,
                datasource_name=data_source.name,
                generator_name=None,
                generator_asset=None,
                additional_batch_kwargs=additional_batch_kwargs
            )

    notebook_name = "{}.ipynb".format(suite.expectation_suite_name)

    notebook_path = os.path.join(context.root_directory, context.GE_EDIT_NOTEBOOK_DIR, notebook_name)
    NotebookRenderer().render_to_disk(suite, batch_kwargs, notebook_path)

    cli_message(
        "To continue editing this suite, run <green>jupyter notebook {}</green>".format(
            notebook_path
        )
    )

    if jupyter:
        subprocess.call(["jupyter", "notebook", notebook_path])

Example #5

0

Show file

File: suite.py Project: cicdw/great_expectations

def _suite_edit(suite, datasource, directory, jupyter, batch_kwargs):
    batch_kwargs_json = batch_kwargs
    batch_kwargs = None
    try:
        context = DataContext(directory)
    except ge_exceptions.ConfigNotFoundError as err:
        cli_message("<red>{}</red>".format(err.message))
        return

    suite = _load_suite(context, suite)
    citations = suite.get_citations(sort=True, require_batch_kwargs=True)

    if batch_kwargs_json:
        try:
            batch_kwargs = json.loads(batch_kwargs_json)
            if datasource:
                batch_kwargs["datasource"] = datasource
            _batch = context.get_batch(batch_kwargs,
                                       suite.expectation_suite_name)
            assert isinstance(_batch, DataAsset)
        except json_parse_exception as je:
            cli_message(
                "<red>Please check that your batch_kwargs are valid JSON.\n{}</red>"
                .format(je))
            sys.exit(1)
        except ge_exceptions.DataContextError:
            cli_message(
                "<red>Please check that your batch_kwargs are able to load a batch.</red>"
            )
            sys.exit(1)
        except ValueError as ve:
            cli_message(
                "<red>Please check that your batch_kwargs are able to load a batch.\n{}</red>"
                .format(ve))
            sys.exit(1)
    elif citations:
        citation = citations[-1]
        batch_kwargs = citation.get("batch_kwargs")

    if not batch_kwargs:
        cli_message("""
A batch of data is required to edit the suite - let's help you to specify it."""
                    )

        additional_batch_kwargs = None
        try:
            data_source = select_datasource(context,
                                            datasource_name=datasource)
        except ValueError as ve:
            cli_message("<red>{}</red>".format(ve))
            sys.exit(1)

        if not data_source:
            cli_message("<red>No datasources found in the context.</red>")
            sys.exit(1)

        if batch_kwargs is None:
            (
                datasource_name,
                batch_kwarg_generator,
                data_asset,
                batch_kwargs,
            ) = get_batch_kwargs(
                context,
                datasource_name=data_source.name,
                generator_name=None,
                generator_asset=None,
                additional_batch_kwargs=additional_batch_kwargs,
            )

    notebook_name = "{}.ipynb".format(suite.expectation_suite_name)

    notebook_path = os.path.join(context.root_directory,
                                 context.GE_EDIT_NOTEBOOK_DIR, notebook_name)
    NotebookRenderer().render_to_disk(suite, notebook_path, batch_kwargs)

    if not jupyter:
        cli_message("To continue editing this suite, run <green>jupyter "
                    f"notebook {notebook_path}</green>")

    if jupyter:
        subprocess.call(["jupyter", "notebook", notebook_path])

Example #6

0

Show file

File: validation_operator.py Project: kepiej/great_expectations

def validation_operator_run(name, run_id, validation_config_file, suite, directory):
    # Note though the long lines here aren't pythonic, they look best if Click does the line wraps.
    """
    Run a validation operator against some data.

    There are two modes to run this command:

    1. Interactive (good for development):

        Specify the name of the validation operator using the --name argument
        and the name of the expectation suite using the --suite argument.

        The cli will help you specify the batch of data that you want to
        validate interactively.


    2. Non-interactive (good for production):

        Use the `--validation_config_file` argument to specify the path of the validation configuration JSON file. This file can be used to instruct a validation operator to validate multiple batches of data and use multiple expectation suites to validate each batch.

        Learn how to create a validation config file here:
        https://great-expectations.readthedocs.io/en/latest/command_line.html#great-expectations-validation-operator-run-validation-config-file-validation-config-file-path

        This command exits with 0 if the validation operator ran and the "success" attribute in its return object is True. Otherwise, the command exits with 1.

    To learn more about validation operators, go here:
    https://great-expectations.readthedocs.io/en/latest/features/validation.html#validation-operators
    """

    try:
        context = DataContext(directory)
    except ge_exceptions.ConfigNotFoundError as err:
        cli_message("Failed to process <red>{}</red>".format(err.message))
        sys.exit(1)

    if validation_config_file is not None:
        try:
            with open(validation_config_file) as f:
                validation_config = json.load(f)
        except (
            IOError,
            json_parse_exception
        ) as e:
            cli_message(f"Failed to process the --validation_config_file argument: <red>{e}</red>")
            sys.exit(1)

        validation_config_error_message = _validate_valdiation_config(validation_config)
        if validation_config_error_message is not None:
            cli_message("<red>The validation config in {0:s} is misconfigured: {1:s}</red>".format(validation_config_file, validation_config_error_message))
            sys.exit(1)

    else:
        if suite is None:
            cli_message(
"""
Please use --suite argument to specify the name of the expectation suite.
Call `great_expectation suite list` command to list the expectation suites in your project.
"""
            )
            sys.exit(0)

        suite = load_expectation_suite(context, suite)

        if name is None:
            cli_message(
"""
Please use --name argument to specify the name of the validation operator.
Call `great_expectation validation-operator list` command to list the operators in your project.
"""
            )
            sys.exit(1)
        else:
            if name not in context.list_validation_operator_names():
                cli_message(
                    f"""
Could not find a validation operator {name}.
Call `great_expectation validation-operator list` command to list the operators in your project.
"""
                )
                sys.exit(1)

        batch_kwargs = None

        cli_message(
            """
Let's help you specify the batch of data your want the validation operator to validate."""
        )

        try:
            data_source = select_datasource(context)
        except ValueError as ve:
            cli_message("<red>{}</red>".format(ve))
            sys.exit(1)

        if not data_source:
            cli_message("<red>No datasources found in the context.</red>")
            sys.exit(1)

        if batch_kwargs is None:
            (
                datasource_name,
                batch_kwarg_generator,
                data_asset,
                batch_kwargs,
            ) = get_batch_kwargs(
                context,
                datasource_name=data_source.name,
                generator_name=None,
                generator_asset=None,
                additional_batch_kwargs=None,
            )

        validation_config = {
            "validation_operator_name": name,
            "batches": [
                {
                "batch_kwargs": batch_kwargs,
                "expectation_suite_names": [suite.expectation_suite_name]
                }
            ]
        }

    try:
        validation_operator_name = validation_config["validation_operator_name"]
        batches_to_validate = []
        for entry in validation_config["batches"]:
            for expectation_suite_name in entry["expectation_suite_names"]:
                batch = context.get_batch(entry["batch_kwargs"], expectation_suite_name)
                batches_to_validate.append(batch)

        if run_id is None:
            run_id = datetime.utcnow().strftime("%Y%m%dT%H%M%S.%fZ")

        results = context.run_validation_operator(
            validation_operator_name,
            assets_to_validate=[batch],
            run_id=run_id)

    except (
        ge_exceptions.DataContextError,
        IOError,
        SQLAlchemyError,
    ) as e:
        cli_message("<red>{}</red>".format(e))
        sys.exit(1)

    if not results["success"]:
        cli_message("Validation Failed!")
        sys.exit(1)
    else:
        cli_message("Validation Succeeded!")
        sys.exit(0)