Python DataContext Exemples, great_expectations.data_context.data_context.DataContext Python Exemples

Exemple #1

0

Afficher le fichier

def select_datasource(context: DataContext, datasource_name: str = None) -> Datasource:
    """Select a datasource interactively."""
    # TODO consolidate all the myriad CLI tests into this
    data_source = None

    if datasource_name is None:
        data_sources = sorted(context.list_datasources(), key=lambda x: x["name"])
        if len(data_sources) == 0:
            cli_message(
                "<red>No datasources found in the context. To add a datasource, run `great_expectations datasource new`</red>"
            )
        elif len(data_sources) == 1:
            datasource_name = data_sources[0]["name"]
        else:
            choices = "\n".join(
                [
                    "    {}. {}".format(i, data_source["name"])
                    for i, data_source in enumerate(data_sources, 1)
                ]
            )
            option_selection = click.prompt(
                "Select a datasource" + "\n" + choices + "\n",
                type=click.Choice(
                    [str(i) for i, data_source in enumerate(data_sources, 1)]
                ),
                show_choices=False,
            )
            datasource_name = data_sources[int(option_selection) - 1]["name"]

    if datasource_name is not None:
        data_source = context.get_datasource(datasource_name)

    return data_source

Exemple #2

0

Afficher le fichier

Fichier : toolkit.py Projet : yangrong688/great_expectations

def delete_checkpoint(
    context: DataContext,
    checkpoint_name: str,
    usage_event: str,
    assume_yes: bool,
):
    """Delete a Checkpoint or raise helpful errors."""
    validate_checkpoint(
        context=context,
        checkpoint_name=checkpoint_name,
        usage_event=usage_event,
    )
    confirm_prompt: str = f"""\nAre you sure you want to delete the Checkpoint "{checkpoint_name}" (this action is irreversible)?"
"""
    continuation_message: str = (
        f'The Checkpoint "{checkpoint_name}" was not deleted.  Exiting now.'
    )
    if not assume_yes:
        confirm_proceed_or_exit(
            confirm_prompt=confirm_prompt,
            continuation_message=continuation_message,
            data_context=context,
            usage_stats_event=usage_event,
        )
    context.delete_checkpoint(name=checkpoint_name)

Exemple #3

0

Afficher le fichier

def load_data_context_with_error_handling(
    directory: str, from_cli_upgrade_command: bool = False
) -> DataContext:
    """Return a DataContext with good error handling and exit codes."""
    try:
        context: DataContext = DataContext(context_root_dir=directory)
        ge_config_version: int = context.get_config().config_version
        if (
            from_cli_upgrade_command
            and int(ge_config_version) < CURRENT_GE_CONFIG_VERSION
        ):
            directory = directory or context.root_directory
            (
                increment_version,
                exception_occurred,
            ) = upgrade_project_one_version_increment(
                context_root_dir=directory,
                ge_config_version=ge_config_version,
                continuation_message=EXIT_UPGRADE_CONTINUATION_MESSAGE,
                from_cli_upgrade_command=from_cli_upgrade_command,
            )
            if not exception_occurred and increment_version:
                context = DataContext(context_root_dir=directory)
        return context
    except ge_exceptions.UnsupportedConfigVersionError as err:
        directory = directory or DataContext.find_context_root_dir()
        ge_config_version = DataContext.get_ge_config_version(
            context_root_dir=directory
        )
        upgrade_helper_class = (
            GE_UPGRADE_HELPER_VERSION_MAP.get(int(ge_config_version))
            if ge_config_version
            else None
        )
        if upgrade_helper_class and ge_config_version < CURRENT_GE_CONFIG_VERSION:
            upgrade_project(
                context_root_dir=directory,
                ge_config_version=ge_config_version,
                from_cli_upgrade_command=from_cli_upgrade_command,
            )
        else:
            cli_message(f"<red>{err.message}</red>")
            sys.exit(1)
    except (
        ge_exceptions.ConfigNotFoundError,
        ge_exceptions.InvalidConfigError,
    ) as err:
        cli_message(f"<red>{err.message}</red>")
        sys.exit(1)
    except ge_exceptions.PluginModuleNotFoundError as err:
        cli_message(err.cli.v012_colored_message)
        sys.exit(1)
    except ge_exceptions.PluginClassNotFoundError as err:
        cli_message(err.cli.v012_colored_message)
        sys.exit(1)
    except ge_exceptions.InvalidConfigurationYamlError as err:
        cli_message(f"<red>{str(err)}</red>")
        sys.exit(1)

Exemple #4

0

Afficher le fichier

Fichier : toolkit.py Projet : alfredo-f/great_expectations

def add_citation_with_batch_request(
    data_context: DataContext,
    expectation_suite: ExpectationSuite,
    batch_request: Optional[Dict[str, Union[str, int, Dict[str, Any]]]] = None,
) -> None:
    if (expectation_suite is not None and batch_request
            and isinstance(batch_request, dict)
            and BatchRequest(**batch_request)):
        expectation_suite.add_citation(
            comment="Created suite added via CLI",
            batch_request=batch_request,
        )
        data_context.save_expectation_suite(
            expectation_suite=expectation_suite)

Exemple #5

0

Afficher le fichier

Fichier : toolkit.py Projet : yangrong688/great_expectations

def get_or_create_expectation_suite(
    expectation_suite_name: str,
    data_context: DataContext,
    data_asset_name: Optional[str] = None,
    usage_event: Optional[str] = None,
    suppress_usage_message: Optional[bool] = False,
    batch_request: Optional[
        Union[str, Dict[str, Union[str, int, Dict[str, Any]]]]
    ] = None,
    create_if_not_exist: Optional[bool] = True,
) -> ExpectationSuite:
    if expectation_suite_name is None:
        default_expectation_suite_name: str = get_default_expectation_suite_name(
            data_asset_name=data_asset_name,
            batch_request=batch_request,
        )
        while True:
            expectation_suite_name = click.prompt(
                "\nName the new Expectation Suite",
                default=default_expectation_suite_name,
            )
            if (
                expectation_suite_name
                not in data_context.list_expectation_suite_names()
            ):
                break
            tell_user_suite_exists(
                data_context=data_context,
                expectation_suite_name=expectation_suite_name,
                usage_event=usage_event,
                suppress_usage_message=suppress_usage_message,
            )
    elif expectation_suite_name in data_context.list_expectation_suite_names():
        tell_user_suite_exists(
            data_context=data_context,
            expectation_suite_name=expectation_suite_name,
            usage_event=usage_event,
            suppress_usage_message=suppress_usage_message,
        )

    suite: ExpectationSuite = load_expectation_suite(
        data_context=data_context,
        expectation_suite_name=expectation_suite_name,
        usage_event=usage_event,
        suppress_usage_message=suppress_usage_message,
        create_if_not_exist=create_if_not_exist,
    )

    return suite

Exemple #6

0

Afficher le fichier

Fichier : toolkit.py Projet : alfredo-f/great_expectations

def upgrade_project_strictly_multiple_versions_increment(
        directory: str,
        ge_config_version: float,
        from_cli_upgrade_command: bool = False) -> Optional[DataContext]:
    upgrade_helper_class = (GE_UPGRADE_HELPER_VERSION_MAP.get(
        int(ge_config_version)) if ge_config_version else None)
    context: Optional[DataContext]
    if upgrade_helper_class and int(
            ge_config_version) < CURRENT_GE_CONFIG_VERSION:
        upgrade_project(
            context_root_dir=directory,
            ge_config_version=ge_config_version,
            from_cli_upgrade_command=from_cli_upgrade_command,
        )
        context = DataContext(context_root_dir=directory)
        # noinspection PyBroadException
        try:
            send_usage_message(
                data_context=context,
                event="cli.project.upgrade.end",
                success=True,
            )
        except Exception:
            # Don't fail for usage stats
            pass
    else:
        context = None

    return context

Exemple #7

0

Afficher le fichier

Fichier : toolkit.py Projet : yangrong688/great_expectations

def run_checkpoint(
    context: DataContext,
    checkpoint_name: str,
    usage_event: str,
) -> CheckpointResult:
    """Run a Checkpoint or raise helpful errors."""
    failure_message: str = "Exception occurred while running Checkpoint."
    validate_checkpoint(
        context=context,
        checkpoint_name=checkpoint_name,
        usage_event=usage_event,
        failure_message=failure_message,
    )
    try:
        result: CheckpointResult = context.run_checkpoint(
            checkpoint_name=checkpoint_name
        )
        return result
    except ge_exceptions.CheckpointError as e:
        cli_message(string=failure_message)
        exit_with_failure_message_and_stats(
            data_context=context,
            usage_event=usage_event,
            message=f"<red>{e}.</red>",
        )

Exemple #8

0

Afficher le fichier

def load_checkpoint(
    context: DataContext,
    checkpoint_name: str,
    usage_event: str,
) -> Union[Checkpoint, LegacyCheckpoint]:
    """Load a checkpoint or raise helpful errors."""
    try:
        checkpoint: Union[Checkpoint, LegacyCheckpoint] = context.get_checkpoint(
            name=checkpoint_name
        )
        return checkpoint
    except (
        ge_exceptions.CheckpointNotFoundError,
        ge_exceptions.InvalidCheckpointConfigError,
    ):
        exit_with_failure_message_and_stats(
            context,
            usage_event,
            f"""\
<red>Could not find checkpoint `{checkpoint_name}`.</red> Try running:
  - `<green>great_expectations checkpoint list</green>` to verify your checkpoint exists
  - `<green>great_expectations checkpoint new</green>` to configure a new checkpoint""",
        )
    except ge_exceptions.CheckpointError as e:
        exit_with_failure_message_and_stats(context, usage_event, f"<red>{e}</red>")

Exemple #9

0

Afficher le fichier

def select_datasource(context: DataContext,
                      datasource_name: str = None) -> BaseDatasource:
    """Select a datasource interactively."""
    # TODO consolidate all the myriad CLI tests into this
    data_source: Optional[BaseDatasource] = None

    if datasource_name is None:
        data_sources: List[BaseDatasource] = cast(
            List[BaseDatasource],
            list(sorted(context.datasources.values(), key=lambda x: x.name), ),
        )
        if len(data_sources) == 0:
            cli_message(
                string=
                "<red>No datasources found in the context. To add a datasource, run `great_expectations datasource new`</red>"
            )
        elif len(data_sources) == 1:
            datasource_name = data_sources[0].name
        else:
            choices: str = "\n".join([
                f"    {i}. {data_source.name}"
                for i, data_source in enumerate(data_sources, 1)
            ])
            option_selection: str = click.prompt(
                "Select a datasource" + "\n" + choices + "\n",
                type=click.Choice(
                    [str(i) for i, data_source in enumerate(data_sources, 1)]),
                show_choices=False,
            )
            datasource_name = data_sources[int(option_selection) - 1].name

    if datasource_name is not None:
        data_source = context.get_datasource(datasource_name=datasource_name)

    return data_source

Exemple #10

0

Afficher le fichier

def create_empty_suite(context: DataContext, expectation_suite_name: str,
                       batch_kwargs) -> None:
    cli_message("""
Great Expectations will create a new Expectation Suite '{:s}' and store it here:

  {:s}
""".format(
        expectation_suite_name,
        context.stores[
            context.expectations_store_name].store_backend.get_url_for_key(
                ExpectationSuiteIdentifier(
                    expectation_suite_name=expectation_suite_name).to_tuple()),
    ))
    suite = context.create_expectation_suite(expectation_suite_name)
    suite.add_citation(comment="New suite added via CLI",
                       batch_kwargs=batch_kwargs)
    context.save_expectation_suite(suite, expectation_suite_name)

Exemple #11

0

Afficher le fichier

def load_batch(
    context: DataContext,
    suite: Union[str, ExpectationSuite],
    batch_kwargs: Union[dict, BatchKwargs],
) -> Union[Batch, DataAsset]:
    batch: Union[Batch, DataAsset] = context.get_batch(batch_kwargs, suite)
    assert isinstance(batch, DataAsset) or isinstance(
        batch, Batch), "Batch failed to load. Please check your batch_kwargs"
    return batch

Exemple #12

0

Afficher le fichier

Fichier : toolkit.py Projet : yangrong688/great_expectations

def load_expectation_suite(
    data_context: DataContext,
    expectation_suite_name: str,
    usage_event: str,
    suppress_usage_message: Optional[bool] = False,
    create_if_not_exist: Optional[bool] = True,
) -> Optional[ExpectationSuite]:
    """
    Load an expectation suite from a given context.

    Handles a suite name with or without `.json`
    :param data_context:
    :param expectation_suite_name:
    :param usage_event:
    :param suppress_usage_message:
    :param create_if_not_exist:
    """
    if expectation_suite_name.endswith(".json"):
        expectation_suite_name = expectation_suite_name[:-5]

    suite: Optional[ExpectationSuite]
    try:
        suite = data_context.get_expectation_suite(
            expectation_suite_name=expectation_suite_name
        )
        return suite
    except ge_exceptions.DataContextError:
        if create_if_not_exist:
            suite = data_context.create_expectation_suite(
                expectation_suite_name=expectation_suite_name
            )
            return suite
        else:
            suite = None
            exit_with_failure_message_and_stats(
                data_context=data_context,
                usage_event=usage_event,
                suppress_usage_message=suppress_usage_message,
                message=f"<red>Could not find a suite named `{expectation_suite_name}`.</red> Please check "
                "the name by running `great_expectations suite list` and try again.",
            )
    return suite

Exemple #13

0

Afficher le fichier

Fichier : toolkit.py Projet : alfredo-f/great_expectations

def get_validator(
    context: DataContext,
    batch_request: Union[dict, BatchRequest],
    suite: Union[str, ExpectationSuite],
) -> Validator:
    assert isinstance(
        suite,
        (str, ExpectationSuite
         )), "Invalid suite type (must be ExpectationSuite) or a string."

    if isinstance(batch_request, dict):
        batch_request = BatchRequest(**batch_request)

    validator: Validator
    if isinstance(suite, str):
        validator = context.get_validator(batch_request=batch_request,
                                          expectation_suite_name=suite)
    else:
        validator = context.get_validator(batch_request=batch_request,
                                          expectation_suite=suite)
    return validator

Exemple #14

0

Afficher le fichier

Fichier : test_data_context_config_variables.py Projet : rpatil524/great_expectations

def empty_data_context_with_config_variables(monkeypatch, empty_data_context):
    monkeypatch.setenv("FOO", "BAR")
    monkeypatch.setenv("REPLACE_ME_ESCAPED_ENV", "ive_been_$--replaced")
    root_dir = empty_data_context.root_directory
    ge_config_path = file_relative_path(
        __file__,
        "../test_fixtures/great_expectations_basic_with_variables.yml",
    )
    shutil.copy(ge_config_path, os.path.join(root_dir,
                                             "great_expectations.yml"))
    config_variables_path = file_relative_path(
        __file__,
        "../test_fixtures/config_variables.yml",
    )
    shutil.copy(config_variables_path, os.path.join(root_dir, "uncommitted"))
    return DataContext(context_root_dir=root_dir)

Exemple #15

0

Afficher le fichier

def load_expectation_suite(
    # TODO consolidate all the myriad CLI tests into this
    context: DataContext,
    suite_name: str,
    usage_event: str,
) -> ExpectationSuite:
    """
    Load an expectation suite from a given context.

    Handles a suite name with or without `.json`
    :param usage_event:
    """
    if suite_name.endswith(".json"):
        suite_name = suite_name[:-5]
    try:
        suite = context.get_expectation_suite(suite_name)
        return suite
    except ge_exceptions.DataContextError as e:
        exit_with_failure_message_and_stats(
            context,
            usage_event,
            f"<red>Could not find a suite named `{suite_name}`.</red> Please check "
            "the name by running `great_expectations suite list` and try again.",
        )

Exemple #16

0

Afficher le fichier

def test_evaluation_parameter_store_methods(
    data_context_parameterized_expectation_suite: DataContext, ):
    run_id = RunIdentifier(run_name="20191125T000000.000000Z")
    source_patient_data_results = ExpectationSuiteValidationResult(
        meta={
            "expectation_suite_name": "source_patient_data.default",
            "run_id": run_id,
        },
        results=[
            ExpectationValidationResult(
                expectation_config=ExpectationConfiguration(
                    expectation_type="expect_table_row_count_to_equal",
                    kwargs={
                        "value": 1024,
                    },
                ),
                success=True,
                exception_info={
                    "exception_message": None,
                    "exception_traceback": None,
                    "raised_exception": False,
                },
                result={
                    "observed_value": 1024,
                    "element_count": 1024,
                    "missing_percent": 0.0,
                    "missing_count": 0,
                },
            )
        ],
        success=True,
    )

    data_context_parameterized_expectation_suite.store_evaluation_parameters(
        source_patient_data_results)

    bound_parameters = data_context_parameterized_expectation_suite.evaluation_parameter_store.get_bind_params(
        run_id)
    assert bound_parameters == {
        "urn:great_expectations:validations:source_patient_data.default:expect_table_row_count_to_equal.result"
        ".observed_value":
        1024
    }
    source_diabetes_data_results = ExpectationSuiteValidationResult(
        meta={
            "expectation_suite_name": "source_diabetes_data.default",
            "run_id": run_id,
        },
        results=[
            ExpectationValidationResult(
                expectation_config=ExpectationConfiguration(
                    expectation_type=
                    "expect_column_unique_value_count_to_be_between",
                    kwargs={
                        "column": "patient_nbr",
                        "min": 2048,
                        "max": 2048
                    },
                ),
                success=True,
                exception_info={
                    "exception_message": None,
                    "exception_traceback": None,
                    "raised_exception": False,
                },
                result={
                    "observed_value": 2048,
                    "element_count": 5000,
                    "missing_percent": 0.0,
                    "missing_count": 0,
                },
            )
        ],
        success=True,
    )

    data_context_parameterized_expectation_suite.store_evaluation_parameters(
        source_diabetes_data_results)
    bound_parameters = data_context_parameterized_expectation_suite.evaluation_parameter_store.get_bind_params(
        run_id)
    assert bound_parameters == {
        "urn:great_expectations:validations:source_patient_data.default:expect_table_row_count_to_equal.result"
        ".observed_value":
        1024,
        "urn:great_expectations:validations:source_diabetes_data.default"
        ":expect_column_unique_value_count_to_be_between.result.observed_value:column=patient_nbr":
        2048,
    }

Exemple #17

0

Afficher le fichier

import numpy as np

from great_expectations.core.batch import BatchRequest
from great_expectations.data_context.data_context import DataContext
from great_expectations.datasource.data_connector.batch_filter import (
    BatchFilter,
    build_batch_filter,
)
from great_expectations.validator.validation_graph import MetricConfiguration

context = DataContext()
suite = context.get_expectation_suite("yellow_trip_data_validations")

# This BatchRequest will retrieve all twelve batches from 2019
multi_batch_request = BatchRequest(
    datasource_name="taxi_pandas",
    data_connector_name="monthly",
    data_asset_name="my_reports",
    data_connector_query={"batch_filter_parameters": {"year": "2019"}},
)

# Instantiate the Validator
validator_multi_batch = context.get_validator(
    batch_request=multi_batch_request, expectation_suite=suite
)

# The active batch should be December, as this should be the last one loaded. Confirming here.
assert validator_multi_batch.active_batch_definition.batch_identifiers["month"] == "12"

# Get the list of all batches contained by the Validator for use in the BatchFilter
total_batch_definition_list: list = [

Exemple #18

0

Afficher le fichier

Fichier : toolkit.py Projet : yangrong688/great_expectations

def upgrade_project(
    context_root_dir, ge_config_version, from_cli_upgrade_command=False
):
    if from_cli_upgrade_command:
        message = (
            f"<red>\nYour project appears to have an out-of-date config version ({ge_config_version}) - "
            f"the version "
            f"number must be at least {CURRENT_GE_CONFIG_VERSION}.</red>"
        )
    else:
        message = (
            f"<red>\nYour project appears to have an out-of-date config version ({ge_config_version}) - "
            f"the version "
            f"number must be at least {CURRENT_GE_CONFIG_VERSION}.\nIn order to proceed, "
            f"your project must be upgraded.</red>"
        )

    cli_message(string=message)
    upgrade_prompt = (
        "\nWould you like to run the Upgrade Helper to bring your project up-to-date?"
    )
    # This loading of DataContext is optional and just to track if someone exits here
    try:
        data_context = DataContext(context_root_dir)
    except Exception:
        # Do not raise error for usage stats
        data_context = None
    confirm_proceed_or_exit(
        confirm_prompt=upgrade_prompt,
        continuation_message=EXIT_UPGRADE_CONTINUATION_MESSAGE,
        data_context=data_context,
        usage_stats_event="cli.project.upgrade.end",
    )
    cli_message(string=SECTION_SEPARATOR)

    # use loop in case multiple upgrades need to take place
    while ge_config_version < CURRENT_GE_CONFIG_VERSION:
        increment_version, exception_occurred = upgrade_project_one_version_increment(
            context_root_dir=context_root_dir,
            ge_config_version=ge_config_version,
            continuation_message=EXIT_UPGRADE_CONTINUATION_MESSAGE,
            from_cli_upgrade_command=from_cli_upgrade_command,
        )
        if exception_occurred or not increment_version:
            break
        ge_config_version += 1

    cli_message(string=SECTION_SEPARATOR)
    upgrade_success_message = "<green>Upgrade complete. Exiting...</green>\n"
    upgrade_incomplete_message = f"""\
<red>The Upgrade Helper was unable to perform a complete project upgrade. Next steps:</red>

    - Please perform any manual steps outlined in the Upgrade Overview and/or Upgrade Report above
    - When complete, increment the config_version key in your <cyan>great_expectations.yml</cyan> to <cyan>{
    ge_config_version + 1}</cyan>\n
To learn more about the upgrade process, visit \
<cyan>https://docs.greatexpectations.io/en/latest/how_to_guides/migrating_versions.html</cyan>
"""

    if ge_config_version < CURRENT_GE_CONFIG_VERSION:
        cli_message(string=upgrade_incomplete_message)
        # noinspection PyBroadException
        try:
            context: DataContext = DataContext(context_root_dir=context_root_dir)
            send_usage_message(
                data_context=context, event="cli.project.upgrade.end", success=False
            )
        except Exception:
            # Do not raise error for usage stats
            pass
    else:
        cli_message(upgrade_success_message)
        try:
            context: DataContext = DataContext(context_root_dir)
            send_usage_message(
                data_context=context, event="cli.project.upgrade.end", success=True
            )
        except Exception:
            # Do not raise error for usage stats
            pass
    sys.exit(0)

Exemple #19

0

Afficher le fichier

Fichier : toolkit.py Projet : alfredo-f/great_expectations

def load_data_context_with_error_handling(
        directory: str,
        from_cli_upgrade_command: bool = False) -> Optional[DataContext]:
    """Return a DataContext with good error handling and exit codes."""
    context: Optional[DataContext]
    ge_config_version: float
    try:
        directory = directory or DataContext.find_context_root_dir()
        context = DataContext(context_root_dir=directory)
        ge_config_version = context.get_config().config_version

        if from_cli_upgrade_command:
            if ge_config_version < CURRENT_GE_CONFIG_VERSION:
                context = upgrade_project_one_or_multiple_versions_increment(
                    directory=directory,
                    context=context,
                    ge_config_version=ge_config_version,
                    from_cli_upgrade_command=from_cli_upgrade_command,
                )
            elif ge_config_version > CURRENT_GE_CONFIG_VERSION:
                raise ge_exceptions.UnsupportedConfigVersionError(
                    f"""Invalid config version ({ge_config_version}).\n    The maximum valid version is \
{CURRENT_GE_CONFIG_VERSION}.
""")
            else:
                context = upgrade_project_zero_versions_increment(
                    directory=directory,
                    context=context,
                    ge_config_version=ge_config_version,
                    from_cli_upgrade_command=from_cli_upgrade_command,
                )

        return context
    except ge_exceptions.UnsupportedConfigVersionError as err:
        directory = directory or DataContext.find_context_root_dir()
        ge_config_version = DataContext.get_ge_config_version(
            context_root_dir=directory)
        context = upgrade_project_strictly_multiple_versions_increment(
            directory=directory,
            ge_config_version=ge_config_version,
            from_cli_upgrade_command=from_cli_upgrade_command,
        )
        if context:
            return context
        else:
            cli_message(string=f"<red>{err.message}</red>")
            sys.exit(1)
    except (
            ge_exceptions.ConfigNotFoundError,
            ge_exceptions.InvalidConfigError,
    ) as err:
        cli_message(string=f"<red>{err.message}</red>")
        sys.exit(1)
    except ge_exceptions.PluginModuleNotFoundError as err:
        cli_message(string=err.cli_colored_message)
        sys.exit(1)
    except ge_exceptions.PluginClassNotFoundError as err:
        cli_message(string=err.cli_colored_message)
        sys.exit(1)
    except ge_exceptions.InvalidConfigurationYamlError as err:
        cli_message(string=f"<red>{str(err)}</red>")
        sys.exit(1)

Exemple #20

0

Afficher le fichier

from typing import List

import numpy as np

from great_expectations.core.batch import BatchRequest
from great_expectations.data_context.data_context import DataContext
from great_expectations.datasource.data_connector.batch_filter import (
    BatchFilter,
    build_batch_filter,
)
from great_expectations.validator.validation_graph import MetricConfiguration
from great_expectations.validator.validator import Validator

context = DataContext()
suite = context.get_expectation_suite("yellow_trip_data_validations")

# Create three BatchRequests for Jan, Feb, and March 2019 data and instantiate a Validator with all three BatchRequests
jan_batch_request: BatchRequest = BatchRequest(
    datasource_name="taxi_pandas",
    data_connector_name="monthly",
    data_asset_name="my_reports",
    data_connector_query={
        "batch_filter_parameters": {
            "month": "01",
            "year": "2019"
        }
    },
)

feb_batch_request: BatchRequest = BatchRequest(
    datasource_name="taxi_pandas",

Exemple #21

0

Afficher le fichier

Fichier : toolkit.py Projet : alfredo-f/great_expectations

def upgrade_project_zero_versions_increment(
    directory: str,
    context: DataContext,
    ge_config_version: float,
    from_cli_upgrade_command: bool = False,
) -> Optional[DataContext]:
    upgrade_helper_class = (GE_UPGRADE_HELPER_VERSION_MAP.get(
        int(ge_config_version)) if ge_config_version else None)
    if upgrade_helper_class:
        upgrade_helper = upgrade_helper_class(context_root_dir=directory,
                                              update_version=False)
    else:
        error_message: str = (
            f"The upgrade utility for version {ge_config_version} could not be found."
        )
        cli_message(string=f"<red>{error_message}</red>")
        sys.exit(1)

    manual_steps_required = upgrade_helper.manual_steps_required()

    if manual_steps_required:
        # noinspection PyBroadException
        try:
            send_usage_message(
                data_context=context,
                event="cli.project.upgrade.begin",
                success=True,
            )
        except Exception:
            # Don't fail for usage stats
            pass

    (
        increment_version,
        exception_occurred,
    ) = upgrade_project_up_to_one_version_increment(
        context_root_dir=directory,
        ge_config_version=ge_config_version,
        continuation_message=EXIT_UPGRADE_CONTINUATION_MESSAGE,
        update_version=False,
        from_cli_upgrade_command=from_cli_upgrade_command,
    )
    if exception_occurred or increment_version:
        context = None
    else:
        if manual_steps_required:
            upgrade_message = "Your project requires manual upgrade steps in order to be up-to-date.\n"
            cli_message(f"<yellow>{upgrade_message}</yellow>")
        else:
            upgrade_message = (
                "Your project is up-to-date - no further upgrade is necessary.\n"
            )
            cli_message(f"<green>{upgrade_message}</green>")

        context = DataContext(context_root_dir=directory)

        # noinspection PyBroadException
        try:
            send_usage_message(
                data_context=context,
                event="cli.project.upgrade.end",
                success=True,
            )
        except Exception:
            # Don't fail for usage stats
            pass

    return context

Exemple #22

0

Afficher le fichier

Fichier : two_batch_requests_two_validators.py Projet : yangrong688/great_expectations

from great_expectations.core.batch import BatchRequest
from great_expectations.data_context.data_context import DataContext
from great_expectations.validator.validation_graph import MetricConfiguration

context = DataContext()
suite = context.get_expectation_suite("yellow_trip_data_validations")

# Get February BatchRequest and Validator
batch_request_february = BatchRequest(
    datasource_name="taxi_pandas",
    data_connector_name="monthly",
    data_asset_name="my_reports",
    data_connector_query={"index": -2},
)
validator_february = context.get_validator(
    batch_request=batch_request_february, expectation_suite=suite)

# Get the table row count for February
february_table_row_count = validator_february.get_metric(
    MetricConfiguration("table.row_count", metric_domain_kwargs={}))

# Get March BatchRequest and Validator
batch_request_march = BatchRequest(
    datasource_name="taxi_pandas",
    data_connector_name="monthly",
    data_asset_name="my_reports",
    data_connector_query={"index": -1},
)
validator_march = context.get_validator(batch_request=batch_request_march,
                                        expectation_suite=suite)

Exemple #23

0

Afficher le fichier

def upgrade_project_one_version_increment(
    context_root_dir: str,
    ge_config_version: float,
    continuation_message: str,
    from_cli_upgrade_command: bool = False,
) -> [bool, bool]:  # Returns increment_version, exception_occurred
    upgrade_helper_class = GE_UPGRADE_HELPER_VERSION_MAP.get(int(ge_config_version))
    if not upgrade_helper_class:
        return False, False
    target_ge_config_version = int(ge_config_version) + 1
    # set version temporarily to CURRENT_GE_CONFIG_VERSION to get functional DataContext
    DataContext.set_ge_config_version(
        config_version=CURRENT_GE_CONFIG_VERSION,
        context_root_dir=context_root_dir,
    )
    upgrade_helper = upgrade_helper_class(context_root_dir=context_root_dir)
    upgrade_overview, confirmation_required = upgrade_helper.get_upgrade_overview()

    if confirmation_required or from_cli_upgrade_command:
        upgrade_confirmed = confirm_proceed_or_exit(
            confirm_prompt=upgrade_overview,
            continuation_message=continuation_message,
            exit_on_no=False,
        )
    else:
        upgrade_confirmed = True

    if upgrade_confirmed:
        cli_message("\nUpgrading project...")
        cli_message(SECTION_SEPARATOR)
        # run upgrade and get report of what was done, if version number should be incremented
        (
            upgrade_report,
            increment_version,
            exception_occurred,
        ) = upgrade_helper.upgrade_project()
        # display report to user
        cli_message(upgrade_report)
        if exception_occurred:
            # restore version number to current number
            DataContext.set_ge_config_version(
                ge_config_version, context_root_dir, validate_config_version=False
            )
            # display report to user
            return False, True
        # set config version to target version
        if increment_version:
            DataContext.set_ge_config_version(
                target_ge_config_version,
                context_root_dir,
                validate_config_version=False,
            )
            return True, False
        # restore version number to current number
        DataContext.set_ge_config_version(
            ge_config_version, context_root_dir, validate_config_version=False
        )
        return False, False

    # restore version number to current number
    DataContext.set_ge_config_version(
        ge_config_version, context_root_dir, validate_config_version=False
    )
    cli_message(continuation_message)
    sys.exit(0)

Exemple #24

0

Afficher le fichier

Fichier : multiple_batch_requests_one_validator_multiple_steps.py Projet : rpatil524/great_expectations

from typing import List

import numpy as np

from great_expectations.core.batch import Batch, BatchRequest
from great_expectations.data_context.data_context import DataContext
from great_expectations.datasource.data_connector.batch_filter import (
    BatchFilter,
    build_batch_filter,
)
from great_expectations.validator.metric_configuration import MetricConfiguration
from great_expectations.validator.validator import Validator

context = DataContext()
suite = context.get_expectation_suite("yellow_tripdata_validations")

# Create a BatchRequest and instantiate a Validator with only the January 2019 data
jan_batch_request: BatchRequest = BatchRequest(
    datasource_name="taxi_pandas",
    data_connector_name="monthly",
    data_asset_name="my_reports",
    data_connector_query={
        "batch_filter_parameters": {
            "month": "01",
            "year": "2019"
        }
    },
)

validator: Validator = context.get_validator(batch_request=jan_batch_request,
                                             expectation_suite=suite)