def _build_checkpoint_config(self) -> CheckpointConfig:
     action_list = self._default_action_list()
     if self.site_names:
         action_list = self._add_update_data_docs_action(action_list)
     if self.slack_webhook:
         action_list = self._add_slack_action(action_list)
     checkpoint_config = CheckpointConfig(
         **{
             "config_version": 1.0,
             "name": self.name,
             "class_name": "Checkpoint",
             "action_list": action_list,
         }
     )
     if self.other_kwargs:
         checkpoint_config.update(
             other_config=CheckpointConfig(
                 **{
                     "config_version": self.other_kwargs.pop("config_version", 1.0)
                     or 1.0,
                     **self.other_kwargs,
                 }
             )
         )
     logger.debug(
         f"SimpleCheckpointConfigurator built this CheckpointConfig:"
         f" {json.dumps(checkpoint_config.to_json_dict(), indent=4)}"
     )
     return checkpoint_config
Beispiel #2
0
    def instantiate_from_config_with_runtime_args(
        checkpoint_config: CheckpointConfig,
        data_context: "DataContext",  # noqa: F821
        **runtime_kwargs,
    ) -> "Checkpoint":
        config: dict = checkpoint_config.to_json_dict()

        key: str
        value: Any
        for key, value in runtime_kwargs.items():
            if value is not None:
                config[key] = value

        config = filter_properties_dict(properties=config, clean_falsy=True)

        checkpoint: Checkpoint = instantiate_class_from_config(
            config=config,
            runtime_environment={
                "data_context": data_context,
            },
            config_defaults={
                "module_name": "great_expectations.checkpoint",
            },
        )

        return checkpoint
Beispiel #3
0
def get_substituted_validation_dict(
        substituted_runtime_config: CheckpointConfig,
        validation_dict: dict) -> dict:
    substituted_validation_dict = {
        "batch_request":
        get_runtime_batch_request(
            substituted_runtime_config=substituted_runtime_config,
            validation_batch_request=validation_dict.get("batch_request"),
        ),
        "expectation_suite_name":
        validation_dict.get("expectation_suite_name")
        or substituted_runtime_config.expectation_suite_name,
        "action_list":
        CheckpointConfig.get_updated_action_list(
            base_action_list=substituted_runtime_config.action_list,
            other_action_list=validation_dict.get("action_list", {}),
        ),
        "evaluation_parameters":
        nested_update(
            substituted_runtime_config.evaluation_parameters,
            validation_dict.get("evaluation_parameters", {}),
        ),
        "runtime_configuration":
        nested_update(
            substituted_runtime_config.runtime_configuration,
            validation_dict.get("runtime_configuration", {}),
        ),
    }
    if validation_dict.get("name") is not None:
        substituted_validation_dict["name"] = validation_dict["name"]
    validate_validation_dict(substituted_validation_dict)
    return substituted_validation_dict
def in_memory_checkpoint_config():
    checkpoint_config = CheckpointConfig(
        **{
            "name":
            "taxi.pass.from_config",
            "config_version":
            1.0,
            "template_name":
            None,
            "module_name":
            "great_expectations.checkpoint",
            "class_name":
            "Checkpoint",
            "run_name_template":
            "%Y%m%d-%H%M%S-my-run-name-template",
            "expectation_suite_name":
            "taxi.demo",
            "batch_request":
            None,
            "action_list": [
                {
                    "name": "store_validation_result",
                    "action": {
                        "class_name": "StoreValidationResultAction"
                    },
                },
                {
                    "name": "store_evaluation_params",
                    "action": {
                        "class_name": "StoreEvaluationParametersAction"
                    },
                },
                {
                    "name": "update_data_docs",
                    "action": {
                        "class_name": "UpdateDataDocsAction",
                        "site_names": []
                    },
                },
            ],
            "evaluation_parameters": {},
            "runtime_configuration": {},
            "validations": [{
                "batch_request": {
                    "datasource_name": "my_datasource",
                    "data_connector_name":
                    "default_inferred_data_connector_name",
                    "data_asset_name": "yellow_tripdata_sample_2019-01.csv",
                    "data_connector_query": {
                        "index": -1
                    },
                },
            }],
            "profilers": [],
            "ge_cloud_id":
            None,
            "expectation_suite_ge_cloud_id":
            None,
        })
    return checkpoint_config
def test_resolve_config_using_acceptable_arguments(checkpoint):
    df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})

    batch_request_param = {
        "runtime_parameters": {
            "batch_data": df
        },
        "batch_identifiers": {
            "default_identifier_name": "my_simple_df"
        },
    }

    result_format_param = {"result_format": "SUMMARY"}

    kwargs = {
        "batch_request": batch_request_param,
        "result_format": result_format_param,
    }

    # Matching how this is called in usage_statistics.py (parameter style)
    resolved_runtime_kwargs: dict = (
        CheckpointConfig.resolve_config_using_acceptable_arguments(
            *(checkpoint, ), **kwargs))

    # Assertions about important bits of the substituted_runtime_config

    expected_top_level_batch_request = {
        "runtime_parameters": {
            "batch_data": df
        },
        "batch_identifiers": {
            "default_identifier_name": "my_simple_df"
        },
    }
    expected_top_level_batch_request = convert_to_json_serializable(
        data=expected_top_level_batch_request)
    actual_top_level_batch_request = convert_to_json_serializable(
        data=resolved_runtime_kwargs["batch_request"])
    assert actual_top_level_batch_request == expected_top_level_batch_request

    validation_level_batch_request = resolved_runtime_kwargs["validations"][0][
        "batch_request"]

    assert validation_level_batch_request == RuntimeBatchRequest(
        **{
            "datasource_name": "example_datasource",
            "data_connector_name": "default_runtime_data_connector_name",
            "data_asset_name": "my_data_asset",
            "batch_identifiers": {
                "default_identifier_name": "my_simple_df"
            },
            "runtime_parameters": {
                "batch_data": df
            },
        })
    assert (resolved_runtime_kwargs["validations"][0]["expectation_suite_name"]
            == "test_suite")
Beispiel #6
0
    def get_substituted_config(
        self,
        config: Optional[Union[CheckpointConfig, dict]] = None,
        runtime_kwargs: Optional[dict] = None,
    ) -> CheckpointConfig:
        runtime_kwargs = runtime_kwargs or {}
        if config is None:
            config = self.config
        if isinstance(config, dict):
            config = CheckpointConfig(**config)

        substituted_config: Union[CheckpointConfig, dict]
        if (
            self._substituted_config is not None
            and not runtime_kwargs.get("template_name")
            and not config.template_name
        ):
            substituted_config = deepcopy(self._substituted_config)
            if any(runtime_kwargs.values()):
                substituted_config.update(runtime_kwargs=runtime_kwargs)
        else:
            template_name = runtime_kwargs.get("template_name") or config.template_name

            if not template_name:
                substituted_config = copy.deepcopy(config)
                if any(runtime_kwargs.values()):
                    substituted_config.update(runtime_kwargs=runtime_kwargs)

                self._substituted_config = substituted_config
            else:
                checkpoint = self.data_context.get_checkpoint(name=template_name)
                template_config = checkpoint.config

                if template_config.config_version != config.config_version:
                    raise ge_exceptions.CheckpointError(
                        f"Invalid template '{template_name}' (ver. {template_config.config_version}) for Checkpoint "
                        f"'{config}' (ver. {config.config_version}. Checkpoints can only use templates with the same config_version."
                    )

                if template_config.template_name is not None:
                    substituted_config = self.get_substituted_config(
                        config=template_config
                    )
                else:
                    substituted_config = template_config

                # merge template with config
                substituted_config.update(
                    other_config=config, runtime_kwargs=runtime_kwargs
                )

                # don't replace _substituted_config if already exists
                if self._substituted_config is None:
                    self._substituted_config = substituted_config
        return self._substitute_config_variables(config=substituted_config)
    def __init__(
        self,
        name: str,
        data_context: "DataContext",  # noqa: F821
        config_version: Optional[Union[int, float]] = None,
        template_name: Optional[str] = None,
        run_name_template: Optional[str] = None,
        expectation_suite_name: Optional[str] = None,
        batch_request: Optional[Union[BatchRequest, RuntimeBatchRequest,
                                      dict]] = None,
        action_list: Optional[List[dict]] = None,
        evaluation_parameters: Optional[dict] = None,
        runtime_configuration: Optional[dict] = None,
        validations: Optional[List[dict]] = None,
        profilers: Optional[List[dict]] = None,
        validation_operator_name: Optional[str] = None,
        batches: Optional[List[dict]] = None,
        ge_cloud_id: Optional[UUID] = None,
        expectation_suite_ge_cloud_id: Optional[UUID] = None,
    ):
        # Only primitive types are allowed as constructor arguments; data frames are supplied to "run()" as arguments.
        if batch_request_contains_batch_data(batch_request=batch_request):
            raise ValueError(
                f"""Error: batch_data found in batch_request -- only primitive types are allowed as Checkpoint \
constructor arguments.
""")

        if batch_request_in_validations_contains_batch_data(
                validations=validations):
            raise ValueError(
                f"""Error: batch_data found in batch_request -- only primitive types are allowed as Checkpoint \
constructor arguments.
""")

        checkpoint_config: CheckpointConfig = CheckpointConfig(
            name=name,
            config_version=config_version,
            template_name=template_name,
            run_name_template=run_name_template,
            expectation_suite_name=expectation_suite_name,
            batch_request=batch_request,
            action_list=action_list,
            evaluation_parameters=evaluation_parameters,
            runtime_configuration=runtime_configuration,
            validations=validations,
            profilers=profilers,
            validation_operator_name=validation_operator_name,
            batches=batches,
            ge_cloud_id=ge_cloud_id,
            expectation_suite_ge_cloud_id=expectation_suite_ge_cloud_id,
        )
        super().__init__(
            checkpoint_config=checkpoint_config,
            data_context=data_context,
        )
Beispiel #8
0
    def __init__(
        self,
        name: str,
        data_context,
        config_version: Optional[Union[int, float]] = None,
        template_name: Optional[str] = None,
        module_name: Optional[str] = None,
        class_name: Optional[str] = None,
        run_name_template: Optional[str] = None,
        expectation_suite_name: Optional[str] = None,
        batch_request: Optional[Union[BatchRequest, dict]] = None,
        action_list: Optional[List[dict]] = None,
        evaluation_parameters: Optional[dict] = None,
        runtime_configuration: Optional[dict] = None,
        validations: Optional[List[dict]] = None,
        profilers: Optional[List[dict]] = None,
        validation_operator_name: Optional[str] = None,
        batches: Optional[List[dict]] = None,
        ge_cloud_id: Optional[UUID] = None,
    ):
        self._name = name
        # Note the gross typechecking to avoid a circular import
        if "DataContext" not in str(type(data_context)):
            raise TypeError("A Checkpoint requires a valid DataContext")
        self._data_context = data_context

        checkpoint_config: CheckpointConfig = CheckpointConfig(
            **{
                "name": name,
                "config_version": config_version,
                "template_name": template_name,
                "module_name": module_name,
                "class_name": class_name,
                "run_name_template": run_name_template,
                "expectation_suite_name": expectation_suite_name,
                "batch_request": batch_request,
                "action_list": action_list,
                "evaluation_parameters": evaluation_parameters,
                "runtime_configuration": runtime_configuration,
                "validations": validations,
                "profilers": profilers,
                "ge_cloud_id": ge_cloud_id,
                # Next two fields are for LegacyCheckpoint configuration
                "validation_operator_name": validation_operator_name,
                "batches": batches,
            }
        )
        self._config = checkpoint_config
        self._substituted_config = None
Beispiel #9
0
    def _build_checkpoint_config(self) -> CheckpointConfig:
        action_list = self._default_action_list()
        if self.site_names:
            action_list = self._add_update_data_docs_action(action_list)
        if self.slack_webhook:
            action_list = self._add_slack_action(action_list)

        config_kwargs: dict = self.other_kwargs or {}

        # DataFrames shouldn't be saved to CheckpointStore
        batch_request = config_kwargs.get("batch_request")
        if batch_request_contains_batch_data(batch_request=batch_request):
            config_kwargs.pop("batch_request", None)
        else:
            config_kwargs["batch_request"] = get_batch_request_as_dict(
                batch_request=batch_request
            )

        # DataFrames shouldn't be saved to CheckpointStore
        validations = config_kwargs.get("validations")
        if batch_request_in_validations_contains_batch_data(validations=validations):
            config_kwargs.pop("validations", [])
        else:
            config_kwargs["validations"] = get_validations_with_batch_request_as_dict(
                validations=validations
            )

        specific_config_kwargs_overrides: dict = {
            "config_version": 1.0,
            "name": self.name,
            "class_name": "Checkpoint",
            "action_list": action_list,
            "ge_cloud_id": self.other_kwargs.pop("ge_cloud_id", None),
        }
        config_kwargs.update(specific_config_kwargs_overrides)

        # Roundtrip through schema validation to remove any illegal fields add/or restore any missing fields.
        checkpoint_config: dict = checkpointConfigSchema.load(
            CommentedMap(**config_kwargs)
        )
        config_kwargs = checkpointConfigSchema.dump(checkpoint_config)

        logger.debug(
            f"SimpleCheckpointConfigurator built this CheckpointConfig:"
            f"{checkpoint_config}"
        )
        return CheckpointConfig(**config_kwargs)
Beispiel #10
0
    def serialization_self_check(self, pretty_print: bool) -> None:
        test_checkpoint_name: str = "test-name-" + "".join(
            [random.choice(list("0123456789ABCDEF")) for i in range(20)])
        test_checkpoint_configuration: CheckpointConfig = CheckpointConfig(
            **{"name": test_checkpoint_name})
        if self.ge_cloud_mode:
            test_key: GeCloudIdentifier = self.key_class(
                resource_type=GeCloudRESTResource.CONTRACT,
                ge_cloud_id=str(uuid.uuid4()),
            )
        else:
            test_key: ConfigurationIdentifier = self.key_class(
                configuration_key=test_checkpoint_name)

        if pretty_print:
            print(
                f"Attempting to add a new test key {test_key} to Checkpoint store..."
            )
        self.set(key=test_key, value=test_checkpoint_configuration)
        if pretty_print:
            print(
                f"\tTest key {test_key} successfully added to Checkpoint store.\n"
            )

        if pretty_print:
            print(
                f"Attempting to retrieve the test value associated with key {test_key} from Checkpoint store..."
            )
        # noinspection PyUnusedLocal
        test_value: CheckpointConfig = self.get(key=test_key)
        if pretty_print:
            print("\tTest value successfully retrieved from Checkpoint store.")
            print()

        if pretty_print:
            print(
                f"Cleaning up test key {test_key} and value from Checkpoint store..."
            )

        # noinspection PyUnusedLocal
        test_value: CheckpointConfig = self.remove_key(key=test_key)
        if pretty_print:
            print(
                "\tTest key and value successfully removed from Checkpoint store."
            )
            print()
    def _substitute_config_variables(
            self, config: CheckpointConfig) -> CheckpointConfig:
        substituted_config_variables = substitute_all_config_variables(
            self.data_context.config_variables,
            dict(os.environ),
            self.data_context.DOLLAR_SIGN_ESCAPE_STRING,
        )

        substitutions = {
            **substituted_config_variables,
            **dict(os.environ),
            **self.data_context.runtime_environment,
        }

        return CheckpointConfig(**substitute_all_config_variables(
            config, substitutions,
            self.data_context.DOLLAR_SIGN_ESCAPE_STRING))
def get_checkpoint_run_usage_statistics(
    checkpoint: "Checkpoint",  # noqa: F821
    *args,
    **kwargs,
) -> dict:
    usage_statistics_handler: Optional[
        UsageStatisticsHandler
    ] = checkpoint._usage_statistics_handler

    data_context_id: Optional[str] = None
    try:
        data_context_id = checkpoint.data_context.data_context_id
    except AttributeError:
        data_context_id = None

    anonymizer: Optional[Anonymizer] = _anonymizers.get(data_context_id, None)
    if anonymizer is None:
        anonymizer = Anonymizer(data_context_id)
        _anonymizers[data_context_id] = anonymizer

    payload: dict = {}

    if usage_statistics_handler:
        # noinspection PyBroadException
        try:
            anonymizer = usage_statistics_handler.anonymizer  # noqa: F821

            resolved_runtime_kwargs: dict = (
                CheckpointConfig.resolve_config_using_acceptable_arguments(
                    *(checkpoint,), **kwargs
                )
            )

            payload: dict = anonymizer.anonymize(
                *(checkpoint,), **resolved_runtime_kwargs
            )
        except Exception as e:
            logger.debug(
                f"{UsageStatsExceptionPrefix.EMIT_EXCEPTION.value}: {e} type: {type(e)}, get_checkpoint_run_usage_statistics: Unable to create anonymized_checkpoint_run payload field"
            )

    return payload
    def serialization_self_check(self, pretty_print: bool):
        test_checkpoint_name: str = "test-name-" + "".join(
            [random.choice(list("0123456789ABCDEF")) for i in range(20)])
        test_checkpoint_configuration: CheckpointConfig = CheckpointConfig(
            **{"name": test_checkpoint_name})
        test_key: ConfigurationIdentifier = self._key_class(
            configuration_key=test_checkpoint_name)

        if pretty_print:
            print(
                f"Attempting to add a new test key {test_key} to Checkpoint store..."
            )
        self.set(key=test_key, value=test_checkpoint_configuration)
        if pretty_print:
            print(
                f"\tTest key {test_key} successfully added to Checkpoint store."
            )
            print()

        if pretty_print:
            print(
                f"Attempting to retrieve the test value associated with key {test_key} from Checkpoint store..."
            )
        # noinspection PyUnusedLocal
        test_value: CheckpointConfig = self.get(key=test_key)
        if pretty_print:
            print("\tTest value successfully retreived from Checkpoint store.")
            print()

        if pretty_print:
            print(
                f"Cleaning up test key {test_key} and value from Checkpoint store..."
            )

        # noinspection PyUnusedLocal
        test_value: CheckpointConfig = self.remove_key(key=test_key)
        if pretty_print:
            print(
                "\tTest key and value successfully removed from Checkpoint store."
            )
            print()
def test_checkpoint_store(empty_data_context):
    store_name: str = "checkpoint_store"
    base_directory: str = str(
        Path(empty_data_context.root_directory) / "checkpoints")

    checkpoint_store: CheckpointStore = build_checkpoint_store_using_filesystem(
        store_name=store_name,
        base_directory=base_directory,
        overwrite_existing=True,
    )

    assert len(checkpoint_store.list_keys()) == 0

    with pytest.raises(TypeError):
        checkpoint_store.set(key="my_first_checkpoint",
                             value="this is not a checkpoint")

    assert len(checkpoint_store.list_keys()) == 0

    checkpoint_name_0: str = "my_checkpoint_0"
    run_name_template_0: str = "%Y-%M-my-run-template-$VAR"
    validations_0: Union[List, Dict] = [{
        "batch_request": {
            "datasource_name": "my_pandas_datasource",
            "data_connector_name": "my_runtime_data_connector",
            "data_asset_name": "my_website_logs",
        },
        "action_list": [
            {
                "name": "store_validation_result",
                "action": {
                    "class_name": "StoreValidationResultAction",
                },
            },
            {
                "name": "store_evaluation_params",
                "action": {
                    "class_name": "StoreEvaluationParametersAction",
                },
            },
            {
                "name": "update_data_docs",
                "action": {
                    "class_name": "UpdateDataDocsAction",
                },
            },
        ],
    }]
    expectation_suite_name_0: str = "my.test.expectation_suite.name"
    evaluation_parameters_0: dict = {
        "environment": "$GE_ENVIRONMENT",
        "tolerance": 1.0e-2,
        "aux_param_0": "$MY_PARAM",
        "aux_param_1": "1 + $MY_PARAM",
    }
    runtime_configuration_0: dict = {
        "result_format": {
            "result_format": "BASIC",
            "partial_unexpected_count": 20,
        },
    }
    my_checkpoint_config_0: CheckpointConfig = CheckpointConfig(
        name=checkpoint_name_0,
        run_name_template=run_name_template_0,
        expectation_suite_name=expectation_suite_name_0,
        evaluation_parameters=evaluation_parameters_0,
        runtime_configuration=runtime_configuration_0,
        validations=validations_0,
    )

    key_0: ConfigurationIdentifier = ConfigurationIdentifier(
        configuration_key=checkpoint_name_0, )
    checkpoint_store.set(key=key_0, value=my_checkpoint_config_0)

    assert len(checkpoint_store.list_keys()) == 1

    assert filter_properties_dict(properties=checkpoint_store.get(
        key=key_0).to_json_dict(), ) == filter_properties_dict(
            properties=my_checkpoint_config_0.to_json_dict(), )

    dir_tree: str = gen_directory_tree_str(startpath=base_directory)
    assert (dir_tree == """checkpoints/
    .ge_store_backend_id
    my_checkpoint_0.yml
""")

    checkpoint_name_1: str = "my_checkpoint_1"
    run_name_template_1: str = "%Y-%M-my-run-template-$VAR"
    validations_1: Union[List, Dict] = [{
        "action_list": [
            {
                "name": "store_validation_result",
                "action": {
                    "class_name": "StoreValidationResultAction",
                },
            },
            {
                "name": "store_evaluation_params",
                "action": {
                    "class_name": "StoreEvaluationParametersAction",
                },
            },
            {
                "name": "update_data_docs",
                "action": {
                    "class_name": "UpdateDataDocsAction",
                },
            },
        ]
    }]
    expectation_suite_name_1: str = "my.test.expectation_suite.name"
    batch_request_1: dict = {
        "datasource_name": "my_pandas_datasource",
        "data_connector_name": "my_runtime_data_connector",
        "data_asset_name": "my_website_logs",
    }
    evaluation_parameters_1: dict = {
        "environment": "$GE_ENVIRONMENT",
        "tolerance": 1.0e-2,
        "aux_param_0": "$MY_PARAM",
        "aux_param_1": "1 + $MY_PARAM",
    }
    runtime_configuration_1: dict = {
        "result_format": {
            "result_format": "BASIC",
            "partial_unexpected_count": 20,
        },
    }
    my_checkpoint_config_1: CheckpointConfig = CheckpointConfig(
        name=checkpoint_name_1,
        run_name_template=run_name_template_1,
        expectation_suite_name=expectation_suite_name_1,
        batch_request=batch_request_1,
        evaluation_parameters=evaluation_parameters_1,
        runtime_configuration=runtime_configuration_1,
        validations=validations_1,
    )

    key_1: ConfigurationIdentifier = ConfigurationIdentifier(
        configuration_key=checkpoint_name_1, )
    checkpoint_store.set(key=key_1, value=my_checkpoint_config_1)

    assert len(checkpoint_store.list_keys()) == 2

    assert filter_properties_dict(properties=checkpoint_store.get(
        key=key_1).to_json_dict(), ) == filter_properties_dict(
            properties=my_checkpoint_config_1.to_json_dict(), )

    dir_tree: str = gen_directory_tree_str(startpath=base_directory)
    assert (dir_tree == """checkpoints/
    .ge_store_backend_id
    my_checkpoint_0.yml
    my_checkpoint_1.yml
""")

    self_check_report: dict = convert_to_json_serializable(
        data=checkpoint_store.self_check())
    assert self_check_report == {
        "keys": ["my_checkpoint_0", "my_checkpoint_1"],
        "len_keys": 2,
        "config": {
            "store_name": "checkpoint_store",
            "class_name": "CheckpointStore",
            "module_name":
            "great_expectations.data_context.store.checkpoint_store",
            "overwrite_existing": True,
            "store_backend": {
                "base_directory":
                f"{empty_data_context.root_directory}/checkpoints",
                "platform_specific_separator": True,
                "fixed_length_key": False,
                "suppress_store_backend_id": False,
                "module_name":
                "great_expectations.data_context.store.tuple_store_backend",
                "class_name": "TupleFilesystemStoreBackend",
                "filepath_template": "{0}.yml",
            },
        },
    }

    checkpoint_store.remove_key(key=key_0)
    checkpoint_store.remove_key(key=key_1)
    assert len(checkpoint_store.list_keys()) == 0
Beispiel #15
0
def test_checkpoint_config_deepcopy(
    titanic_pandas_data_context_with_v013_datasource_stats_enabled_with_checkpoints_v1_with_templates,
    monkeypatch,
):
    monkeypatch.setenv("GE_ENVIRONMENT", "my_ge_environment")
    monkeypatch.setenv("VAR", "test")
    monkeypatch.setenv("MY_PARAM", "1")
    monkeypatch.setenv("OLD_PARAM", "2")

    context: DataContext = titanic_pandas_data_context_with_v013_datasource_stats_enabled_with_checkpoints_v1_with_templates

    test_df: pd.DataFrame = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]})

    runtime_batch_request: RuntimeBatchRequest = RuntimeBatchRequest(
        **{
            "datasource_name": "my_datasource",
            "data_connector_name": "my_runtime_data_connector",
            "data_asset_name": "default_data_asset_name",
            "batch_identifiers": {
                "pipeline_stage_name": "core_processing",
                "airflow_run_id": 1234567890,
            },
            "runtime_parameters": {"batch_data": test_df},
        }
    )

    nested_checkpoint_config: CheckpointConfig = CheckpointConfig(
        name="my_nested_checkpoint",
        config_version=1,
        template_name="my_nested_checkpoint_template_2",
        expectation_suite_name="users.delivery",
        validations=[
            {
                "batch_request": {
                    "datasource_name": "my_datasource",
                    "data_connector_name": "my_special_data_connector",
                    "data_asset_name": "users",
                    "data_connector_query": {"partition_index": -1},
                }
            },
            {
                "batch_request": {
                    "datasource_name": "my_datasource",
                    "data_connector_name": "my_other_data_connector",
                    "data_asset_name": "users",
                    "data_connector_query": {"partition_index": -2},
                }
            },
        ],
    )
    nested_checkpoint: Checkpoint = Checkpoint(
        data_context=context,
        **filter_properties_dict(
            properties=nested_checkpoint_config.to_json_dict(),
            delete_fields={"class_name", "module_name"},
            clean_falsy=True,
        ),
    )
    substituted_config_template_and_runtime_kwargs: dict = nested_checkpoint.get_substituted_config(
        runtime_kwargs={
            "batch_request": runtime_batch_request,
            "expectation_suite_name": "runtime_suite_name",
            "template_name": "my_nested_checkpoint_template_3",
            "validations": [
                {
                    "batch_request": {
                        "datasource_name": "my_datasource",
                        "data_connector_name": "my_other_data_connector_2_runtime",
                        "data_asset_name": "users",
                        "data_connector_query": {"partition_index": -3},
                    }
                },
                {
                    "batch_request": {
                        "datasource_name": "my_datasource",
                        "data_connector_name": "my_other_data_connector_3_runtime",
                        "data_asset_name": "users",
                        "data_connector_query": {"partition_index": -4},
                    }
                },
            ],
            "run_name_template": "runtime_run_template",
            "action_list": [
                {
                    "name": "store_validation_result",
                    "action": {
                        "class_name": "StoreValidationResultAction",
                    },
                },
                {
                    "name": "store_evaluation_params",
                    "action": {
                        "class_name": "MyCustomRuntimeStoreEvaluationParametersAction",
                    },
                },
                {
                    "name": "update_data_docs",
                    "action": None,
                },
                {
                    "name": "update_data_docs_deluxe_runtime",
                    "action": {
                        "class_name": "UpdateDataDocsAction",
                    },
                },
            ],
            "evaluation_parameters": {
                "environment": "runtime-$GE_ENVIRONMENT",
                "tolerance": 1.0e-2,
                "aux_param_0": "runtime-$MY_PARAM",
                "aux_param_1": "1 + $MY_PARAM",
                "new_runtime_eval_param": "bloopy!",
            },
            "runtime_configuration": {
                "result_format": "BASIC",
                "partial_unexpected_count": 999,
                "new_runtime_config_key": "bleepy!",
            },
        }
    )

    checkpoint_config_copy: dict = copy.deepcopy(
        substituted_config_template_and_runtime_kwargs
    )
    assert deep_filter_properties_iterable(
        properties=checkpoint_config_copy,
        clean_falsy=True,
    ) == deep_filter_properties_iterable(
        properties=substituted_config_template_and_runtime_kwargs,
        clean_falsy=True,
    )
Beispiel #16
0
def test_GeCloudStoreBackend():
    """
    What does this test test and why?

    Since GeCloudStoreBackend relies on GE Cloud, we mock requests.post, requests.get, and
    requests.patch and assert that the right calls are made for set, get, list, and remove_key.
    """
    ge_cloud_base_url = "https://app.greatexpectations.io/"
    ge_cloud_credentials = {
        "access_token": "1234",
        "account_id": "51379b8b-86d3-4fe7-84e9-e1a52f4a414c",
    }
    ge_cloud_resource_type = "checkpoint"
    my_simple_checkpoint_config: CheckpointConfig = CheckpointConfig(
        name="my_minimal_simple_checkpoint",
        class_name="SimpleCheckpoint",
        config_version=1,
    )
    my_simple_checkpoint_config_serialized = (
        my_simple_checkpoint_config.get_schema_class()().dump(
            my_simple_checkpoint_config
        )
    )

    # test .set
    with patch("requests.post", autospec=True) as mock_post:
        my_store_backend = GeCloudStoreBackend(
            ge_cloud_base_url=ge_cloud_base_url,
            ge_cloud_credentials=ge_cloud_credentials,
            ge_cloud_resource_type=ge_cloud_resource_type,
        )
        my_store_backend.set(
            ("my_checkpoint_name",), my_simple_checkpoint_config_serialized
        )
        mock_post.assert_called_with(
            "https://app.greatexpectations.io/accounts/51379b8b-86d3-4fe7-84e9-e1a52f4a414c/checkpoints",
            json={
                "data": {
                    "type": "checkpoint",
                    "attributes": {
                        "account_id": "51379b8b-86d3-4fe7-84e9-e1a52f4a414c",
                        "checkpoint_config": OrderedDict(
                            [
                                ("name", "my_minimal_simple_checkpoint"),
                                ("config_version", 1.0),
                                ("template_name", None),
                                ("module_name", "great_expectations.checkpoint"),
                                ("class_name", "SimpleCheckpoint"),
                                ("run_name_template", None),
                                ("expectation_suite_name", None),
                                ("batch_request", None),
                                ("action_list", []),
                                ("evaluation_parameters", {}),
                                ("runtime_configuration", {}),
                                ("validations", []),
                                ("profilers", []),
                                ("ge_cloud_id", None),
                            ]
                        ),
                    },
                }
            },
            headers={
                "Content-Type": "application/vnd.api+json",
                "Authorization": "Bearer 1234",
            },
        )

        # test .get
        with patch("requests.get", autospec=True) as mock_get:
            my_store_backend = GeCloudStoreBackend(
                ge_cloud_base_url=ge_cloud_base_url,
                ge_cloud_credentials=ge_cloud_credentials,
                ge_cloud_resource_type=ge_cloud_resource_type,
            )
            my_store_backend.get(("0ccac18e-7631-4bdd-8a42-3c35cce574c6",))
            mock_get.assert_called_with(
                "https://app.greatexpectations.io/accounts/51379b8b-86d3-4fe7-84e9-e1a52f4a414c/checkpoints/0ccac18e-7631-4bdd-8a42-3c35cce574c6",
                headers={
                    "Content-Type": "application/vnd.api+json",
                    "Authorization": "Bearer 1234",
                },
            )

        # test .list_keys
        with patch("requests.get", autospec=True) as mock_get:
            my_store_backend = GeCloudStoreBackend(
                ge_cloud_base_url=ge_cloud_base_url,
                ge_cloud_credentials=ge_cloud_credentials,
                ge_cloud_resource_type=ge_cloud_resource_type,
            )
            my_store_backend.list_keys()
            mock_get.assert_called_with(
                "https://app.greatexpectations.io/accounts/51379b8b-86d3-4fe7-84e9-e1a52f4a414c/checkpoints",
                headers={
                    "Content-Type": "application/vnd.api+json",
                    "Authorization": "Bearer 1234",
                },
            )

        # test .remove_key
        with patch("requests.patch", autospec=True) as mock_patch:
            mock_response = mock_patch.return_value
            mock_response.status_code = 200

            my_store_backend = GeCloudStoreBackend(
                ge_cloud_base_url=ge_cloud_base_url,
                ge_cloud_credentials=ge_cloud_credentials,
                ge_cloud_resource_type=ge_cloud_resource_type,
            )
            my_store_backend.remove_key(("0ccac18e-7631-4bdd-8a42-3c35cce574c6",))
            mock_patch.assert_called_with(
                "https://app.greatexpectations.io/accounts/51379b8b-86d3-4fe7-84e9-e1a52f4a414c/checkpoints/0ccac18e-7631-4bdd-8a42-3c35cce574c6",
                json={
                    "data": {
                        "type": "checkpoint",
                        "id": "0ccac18e-7631-4bdd-8a42-3c35cce574c6",
                        "attributes": {"deleted": True},
                    }
                },
                headers={
                    "Content-Type": "application/vnd.api+json",
                    "Authorization": "Bearer 1234",
                },
            )
Beispiel #17
0
def test_checkpoint_config_repr_after_substitution(checkpoint):
    df: pd.DataFrame = pd.DataFrame({"a": [1, 2], "b": [3, 4]})

    batch_request_param: dict = {
        "runtime_parameters": {
            "batch_data": df
        },
        "batch_identifiers": {
            "default_identifier_name": "my_simple_df"
        },
    }

    result_format_param: dict = {"result_format": "SUMMARY"}

    kwargs: dict = {
        "batch_request": batch_request_param,
        "result_format": result_format_param,
    }

    # Matching how this is called in usage_statistics.py (parameter style)
    resolved_runtime_kwargs: dict = (
        CheckpointConfig.resolve_config_using_acceptable_arguments(
            *(checkpoint, ), **kwargs))

    json_dict: dict = convert_to_json_serializable(
        data=resolved_runtime_kwargs)
    deep_filter_properties_iterable(
        properties=json_dict,
        inplace=True,
    )

    keys: List[str] = sorted(list(json_dict.keys()))

    key: str
    sorted_json_dict: dict = {key: json_dict[key] for key in keys}

    checkpoint_config_repr: str = json.dumps(sorted_json_dict, indent=2)

    assert (checkpoint_config_repr == """{
  "action_list": [
    {
      "name": "store_validation_result",
      "action": {
        "class_name": "StoreValidationResultAction"
      }
    },
    {
      "name": "store_evaluation_params",
      "action": {
        "class_name": "StoreEvaluationParametersAction"
      }
    },
    {
      "name": "update_data_docs",
      "action": {
        "class_name": "UpdateDataDocsAction",
        "site_names": []
      }
    }
  ],
  "batch_request": {
    "runtime_parameters": {
      "batch_data": [
        {
          "a": 1,
          "b": 3
        },
        {
          "a": 2,
          "b": 4
        }
      ]
    },
    "batch_identifiers": {
      "default_identifier_name": "my_simple_df"
    }
  },
  "class_name": "Checkpoint",
  "config_version": 1.0,
  "evaluation_parameters": {},
  "module_name": "great_expectations.checkpoint",
  "name": "my_checkpoint",
  "profilers": [],
  "runtime_configuration": {},
  "validations": [
    {
      "batch_request": {
        "datasource_name": "example_datasource",
        "data_connector_name": "default_runtime_data_connector_name",
        "data_asset_name": "my_data_asset",
        "runtime_parameters": {
          "batch_data": "<class \'pandas.core.frame.DataFrame\'>"
        },
        "batch_identifiers": {
          "default_identifier_name": "my_simple_df"
        }
      },
      "expectation_suite_name": "test_suite",
      "action_list": [
        {
          "name": "store_validation_result",
          "action": {
            "class_name": "StoreValidationResultAction"
          }
        },
        {
          "name": "store_evaluation_params",
          "action": {
            "class_name": "StoreEvaluationParametersAction"
          }
        },
        {
          "name": "update_data_docs",
          "action": {
            "class_name": "UpdateDataDocsAction",
            "site_names": []
          }
        }
      ]
    }
  ]
}""")