Python DataContext.run_validation_operator 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: great_expectations

클래스/타입: DataContext

메소드/함수: run_validation_operator

hotexamples.com에서의 예제들: 5

Python DataContext.run_validation_operator - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 great_expectations.DataContext.run_validation_operator에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

DataContext(30)

list_datasources(30)

get_expectation_suite(19)

create_expectation_suite(18)

get_docs_sites_urls(11)

_save_project_config(10)

validate_config(9)

list_expectation_suites(9)

list_expectation_suite_names(8)

create(8)

list_checkpoints(7)

add_datasource(7)

get_ge_config_version(7)

save_expectation_suite(6)

get_batch(6)

run_validation_operator(5)

get_validation_result(4)

does_config_exist_on_disk(4)

get_checkpoint(3)

build_data_docs(3)

is_project_initialized(2)

get_config(2)

get_validator(2)

find_context_root_dir(2)

set_ge_config_version(2)

list_validation_operator_names(2)

save_config_variable(1)

open_data_docs(1)

set_config(1)

profile_data_asset(1)

get_site_names(1)

list_validation_operators(1)

list_stores(1)

_project_config(1)

get_datasource(1)

get_available_data_asset_names(1)

config_variables_yml_exist(1)

all_uncommitted_directories_exist(1)

add_validation_operator(1)

write_project_template_to_disk(1)

예제 #1

파일 보기

파일: validation_operator.py 프로젝트: yjlee215/great_expectations

def validation_operator_run(name, run_name, validation_config_file, suite,
                            directory):
    # Note though the long lines here aren't pythonic, they look best if Click does the line wraps.
    """
    Run a validation operator against some data.

    There are two modes to run this command:

    1. Interactive (good for development):

        Specify the name of the validation operator using the --name argument
        and the name of the expectation suite using the --suite argument.

        The cli will help you specify the batch of data that you want to
        validate interactively.


    2. Non-interactive (good for production):

        Use the `--validation_config_file` argument to specify the path of the validation configuration JSON file. This file can be used to instruct a validation operator to validate multiple batches of data and use multiple expectation suites to validate each batch.

        Learn how to create a validation config file here:
        https://great-expectations.readthedocs.io/en/latest/command_line.html#great-expectations-validation-operator-run-validation-config-file-validation-config-file-path

        This command exits with 0 if the validation operator ran and the "success" attribute in its return object is True. Otherwise, the command exits with 1.

    To learn more about validation operators, go here:
    https://great-expectations.readthedocs.io/en/latest/features/validation.html#validation-operators
    """

    try:
        context = DataContext(directory)
    except ge_exceptions.ConfigNotFoundError as err:
        cli_message("Failed to process <red>{}</red>".format(err.message))
        sys.exit(1)

    try:
        if validation_config_file is not None:
            try:
                with open(validation_config_file) as f:
                    validation_config = json.load(f)
            except (OSError, json_parse_exception) as e:
                cli_message(
                    f"Failed to process the --validation_config_file argument: <red>{e}</red>"
                )
                send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False,
                )
                sys.exit(1)

            validation_config_error_message = _validate_valdiation_config(
                validation_config)
            if validation_config_error_message is not None:
                cli_message(
                    "<red>The validation config in {:s} is misconfigured: {:s}</red>"
                    .format(validation_config_file,
                            validation_config_error_message))
                send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False,
                )
                sys.exit(1)

        else:
            if suite is None:
                cli_message("""
Please use --suite argument to specify the name of the expectation suite.
Call `great_expectation suite list` command to list the expectation suites in your project.
""")
                send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False,
                )
                sys.exit(0)

            suite = toolkit.load_expectation_suite(
                context, suite, "cli.validation_operator.run")

            if name is None:
                cli_message("""
Please use --name argument to specify the name of the validation operator.
Call `great_expectation validation-operator list` command to list the operators in your project.
""")
                send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False,
                )
                sys.exit(1)
            else:
                if name not in context.list_validation_operator_names():
                    cli_message(f"""
Could not find a validation operator {name}.
Call `great_expectation validation-operator list` command to list the operators in your project.
""")
                    send_usage_message(
                        data_context=context,
                        event="cli.validation_operator.run",
                        success=False,
                    )
                    sys.exit(1)

            batch_kwargs = None

            cli_message("""
Let us help you specify the batch of data your want the validation operator to validate."""
                        )

            try:
                data_source = toolkit.select_datasource(context)
            except ValueError as ve:
                cli_message("<red>{}</red>".format(ve))
                send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False,
                )
                sys.exit(1)

            if not data_source:
                cli_message("<red>No datasources found in the context.</red>")
                send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False,
                )
                sys.exit(1)

            if batch_kwargs is None:
                (
                    datasource_name,
                    batch_kwargs_generator,
                    data_asset,
                    batch_kwargs,
                ) = get_batch_kwargs(
                    context,
                    datasource_name=data_source.name,
                    batch_kwargs_generator_name=None,
                    data_asset_name=None,
                    additional_batch_kwargs=None,
                )

            validation_config = {
                "validation_operator_name":
                name,
                "batches": [{
                    "batch_kwargs":
                    batch_kwargs,
                    "expectation_suite_names": [suite.expectation_suite_name],
                }],
            }

        try:
            validation_operator_name = validation_config[
                "validation_operator_name"]
            batches_to_validate = []
            for entry in validation_config["batches"]:
                for expectation_suite_name in entry["expectation_suite_names"]:
                    batch = context.get_batch(entry["batch_kwargs"],
                                              expectation_suite_name)
                    batches_to_validate.append(batch)

            if run_name is None:
                run_name = datetime.datetime.now(
                    datetime.timezone.utc).strftime("%Y%m%dT%H%M%S.%fZ")

            run_id = RunIdentifier(run_name=run_name)

            if suite is None:
                results = context.run_validation_operator(
                    validation_operator_name,
                    assets_to_validate=batches_to_validate,
                    run_id=run_id,
                )
            else:
                if suite.evaluation_parameters is None:
                    results = context.run_validation_operator(
                        validation_operator_name,
                        assets_to_validate=batches_to_validate,
                        run_id=run_id,
                    )
                else:
                    results = context.run_validation_operator(
                        validation_operator_name,
                        assets_to_validate=batches_to_validate,
                        run_id=run_id,
                        evaluation_parameters=suite.evaluation_parameters,
                    )
        except (ge_exceptions.DataContextError, OSError, SQLAlchemyError) as e:
            cli_message("<red>{}</red>".format(e))
            send_usage_message(data_context=context,
                               event="cli.validation_operator.run",
                               success=False)
            sys.exit(1)

        if not results["success"]:
            cli_message("Validation failed!")
            send_usage_message(data_context=context,
                               event="cli.validation_operator.run",
                               success=True)
            sys.exit(1)
        else:
            cli_message("Validation succeeded!")
            send_usage_message(data_context=context,
                               event="cli.validation_operator.run",
                               success=True)
            sys.exit(0)
    except Exception as e:
        send_usage_message(data_context=context,
                           event="cli.validation_operator.run",
                           success=False)
        raise e

예제 #2

파일 보기

파일: tap_template.py 프로젝트: zhuohuwu0603/great_expectations

saves validation results to your results store and then updates Data Docs.

This makes viewing validation results easy for you and your team.

Usage:
- Run this file: `python {0}`.
- This can be run manually or via a scheduler such as cron.
- If your pipeline runner supports python snippets you can paste this into your
pipeline.
"""
import sys

from great_expectations import DataContext

# checkpoint configuration
context = DataContext("{1}")
suite = context.get_expectation_suite("{2}")
# You can modify your BatchKwargs to select different data
batch_kwargs = {3}

# checkpoint validation process
batch = context.get_batch(batch_kwargs, suite)
results = context.run_validation_operator("action_list_operator", [batch])

if not results["success"]:
    print("Validation Failed!")
    sys.exit(1)

print("Validation Succeeded!")
sys.exit(0)

예제 #3

파일 보기

from great_expectations import DataContext

# checkpoint configuration
context = DataContext("{1}")
checkpoint = context.get_checkpoint("{0}")

# load batches of data
batches_to_validate = []
for batch in checkpoint["batches"]:
    batch_kwargs = batch["batch_kwargs"]
    for suite_name in batch["expectation_suite_names"]:
        suite = context.get_expectation_suite(suite_name)
        batch = context.get_batch(batch_kwargs, suite)
        batches_to_validate.append(batch)

# run the validation operator
results = context.run_validation_operator(
    checkpoint["validation_operator_name"],
    assets_to_validate=batches_to_validate,
    # TODO prepare for new RunID - checkpoint name and timestamp
    # run_id=RunID(checkpoint)
)

# take action based on results
if not results["success"]:
    print("Validation Failed!")
    sys.exit(1)

print("Validation Succeeded!")
sys.exit(0)

예제 #4

파일 보기

    def run(
        self,
        checkpoint_name: str = None,
        ge_checkpoint: Checkpoint = None,
        checkpoint_kwargs: dict = None,
        context: ge.DataContext = None,
        assets_to_validate: list = None,
        batch_kwargs: dict = None,
        expectation_suite_name: str = None,
        context_root_dir: str = None,
        runtime_environment: Optional[dict] = None,
        run_name: str = None,
        run_info_at_end: bool = True,
        disable_markdown_artifact: bool = False,
        validation_operator: str = "action_list_operator",
        evaluation_parameters: Optional[dict] = None,
    ):
        """
        Task run method.

        Args:
            - checkpoint_name (str, optional): the name of a pre-configured checkpoint; should match the
                filename of the checkpoint without the extension. Either checkpoint_name or
                checkpoint_config is required when using the Great Expectations v3 API.
            - ge_checkpoint (Checkpoint, optional): an in-memory GE `Checkpoint` object used to perform
                validation. If not provided then `checkpoint_name` will be used to load the specified
                checkpoint.
            - checkpoint_kwargs (Dict, optional): A dictionary whose keys match the parameters of
                `CheckpointConfig` which can be used to update and populate the task's Checkpoint at
                runtime.
            - context (DataContext, optional): an in-memory GE `DataContext` object. e.g.
                `ge.data_context.DataContext()` If not provided then `context_root_dir` will be used to
                look for one.
            - assets_to_validate (list, optional): A list of assets to validate when running the
                validation operator. Only used in the Great Expectations v2 API
            - batch_kwargs (dict, optional): a dictionary of batch kwargs to be used when validating
                assets. Only used in the Great Expectations v2 API
            - expectation_suite_name (str, optional): the name of an expectation suite to be used when
                validating assets. Only used in the Great Expectations v2 API
            - context_root_dir (str, optional): the absolute or relative path to the directory holding
                your `great_expectations.yml`
            - runtime_environment (dict, optional): a dictionary of great expectation config key-value
                pairs to overwrite your config in `great_expectations.yml`
            - run_name (str, optional): the name of this  Great Expectation validation run; defaults to
                the task slug
            - run_info_at_end (bool, optional): add run info to the end of the artifact generated by this
                task. Defaults to `True`.
            - disable_markdown_artifact (bool, optional): toggle the posting of a markdown artifact from
                this tasks. Defaults to `False`.
            - evaluation_parameters (Optional[dict], optional): the evaluation parameters to use when
                running validation. For more information, see
                [example](https://docs.prefect.io/api/latest/tasks/great_expectations.html#rungreatexpectationsvalidation)
                and
                [docs](https://docs.greatexpectations.io/en/latest/reference/core_concepts/evaluation_parameters.html).
            - validation_operator (str, optional): configure the actions to be executed after running
                validation. Defaults to `action_list_operator`.

        Raises:
            - 'signals.FAIL' if the validation was not a success

        Returns:
            - result
                ('great_expectations.validation_operators.types.validation_operator_result.ValidationOperatorResult'):
                The Great Expectations metadata returned from the validation if the v2 (batch_kwargs) API
                is used.

                ('great_expectations.checkpoint.checkpoint.CheckpointResult'):
                The Great Expectations metadata returned from running the provided checkpoint if a
                checkpoint name is provided.

        """

        if version.parse(ge.__version__) < version.parse("0.13.8"):
            self.logger.warning(
                f"You are using great_expectations version {ge.__version__} which may cause"
                "errors in this task. Please upgrade great_expections to 0.13.8 or later."
            )

        runtime_environment = runtime_environment or dict()
        checkpoint_kwargs = checkpoint_kwargs or dict()

        # Load context if not provided directly
        if not context:
            context = ge.DataContext(
                context_root_dir=context_root_dir,
                runtime_environment=runtime_environment,
            )

        # Check that the parameters are mutually exclusive
        if (sum(
                bool(x) for x in [
                    (expectation_suite_name and batch_kwargs),
                    assets_to_validate,
                    checkpoint_name,
                    ge_checkpoint,
                ]) != 1):
            raise ValueError(
                "Exactly one of expectation_suite_name + batch_kwargs, assets_to_validate, "
                "checkpoint_name, or ge_checkpoint is required to run validation."
            )

        results = None
        # If there is a checkpoint or checkpoint name provided, run the checkpoint.
        # Checkpoints are the preferred deployment of validation configuration.
        if ge_checkpoint or checkpoint_name:
            ge_checkpoint = ge_checkpoint or context.get_checkpoint(
                checkpoint_name)
            results = ge_checkpoint.run(
                evaluation_parameters=evaluation_parameters,
                run_id={
                    "run_name": run_name or prefect.context.get("task_slug")
                },
                **checkpoint_kwargs,
            )
        else:
            # If assets are not provided directly through `assets_to_validate` then they need be loaded
            #   get batch from `batch_kwargs` and `expectation_suite_name`
            if not assets_to_validate:
                assets_to_validate = [
                    context.get_batch(batch_kwargs, expectation_suite_name)
                ]

            # Run validation operator
            results = context.run_validation_operator(
                validation_operator,
                assets_to_validate=assets_to_validate,
                run_id={
                    "run_name": run_name or prefect.context.get("task_slug")
                },
                evaluation_parameters=evaluation_parameters,
            )

        # Generate artifact markdown
        if not disable_markdown_artifact:
            validation_results_page_renderer = (
                ge.render.renderer.ValidationResultsPageRenderer(
                    run_info_at_end=run_info_at_end))
            rendered_content_list = validation_results_page_renderer.render_validation_operator_result(
                # This also works with a CheckpointResult because of duck typing.
                # The passed in object needs a list_validation_results method that
                # returns a list of ExpectationSuiteValidationResult.
                validation_operator_result=results)
            markdown_artifact = " ".join(
                ge.render.view.DefaultMarkdownPageView().render(
                    rendered_content_list))

            create_markdown_artifact(markdown_artifact)

        if results.success is False:
            raise signals.FAIL(result=results)

        return results

예제 #5

파일 보기

파일: checkpoint_script_template.py 프로젝트: thejasraju/great_expectations

import sys

from great_expectations import DataContext

# checkpoint configuration
context = DataContext("{1}")
checkpoint = context.get_checkpoint("{0}")

# load batches of data
batches_to_validate = []
for batch in checkpoint.batches:
    batch_kwargs = batch["batch_kwargs"]
    for suite_name in batch["expectation_suite_names"]:
        suite = context.get_expectation_suite(suite_name)
        batch = context.get_batch(batch_kwargs, suite)
        batches_to_validate.append(batch)

# run the validation operator
results = context.run_validation_operator(
    checkpoint.validation_operator_name,
    assets_to_validate=batches_to_validate,
)

# take action based on results
if not results["success"]:
    print("Validation failed!")
    sys.exit(1)

print("Validation succeeded!")
sys.exit(0)