예제 #1
0
def get_substituted_batch_request(
    substituted_runtime_config: dict,
    validation_batch_request: Optional[Union[BatchRequestBase, dict]] = None,
) -> Optional[Union[BatchRequest, RuntimeBatchRequest]]:
    substituted_runtime_batch_request = substituted_runtime_config.get("batch_request")

    if substituted_runtime_batch_request is None and validation_batch_request is None:
        return None

    if substituted_runtime_batch_request is None:
        substituted_runtime_batch_request = {}

    if validation_batch_request is None:
        validation_batch_request = {}

    validation_batch_request = get_batch_request_as_dict(
        batch_request=validation_batch_request
    )
    substituted_runtime_batch_request = get_batch_request_as_dict(
        batch_request=substituted_runtime_batch_request
    )

    for key, value in validation_batch_request.items():
        substituted_value = substituted_runtime_batch_request.get(key)
        if value is not None and substituted_value is not None:
            raise ge_exceptions.CheckpointError(
                f'BatchRequest attribute "{key}" was specified in both validation and top-level CheckpointConfig.'
            )

    effective_batch_request: dict = dict(
        **substituted_runtime_batch_request, **validation_batch_request
    )

    return materialize_batch_request(batch_request=effective_batch_request)
예제 #2
0
def get_validations_with_batch_request_as_dict(
    validations: Optional[list] = None,
) -> Optional[list]:
    if validations:
        for value in validations:
            if "batch_request" in value:
                value["batch_request"] = get_batch_request_as_dict(
                    batch_request=value["batch_request"]
                )

    return validations
예제 #3
0
    def _build_checkpoint_config(self) -> CheckpointConfig:
        action_list = self._default_action_list()
        if self.site_names:
            action_list = self._add_update_data_docs_action(action_list)
        if self.slack_webhook:
            action_list = self._add_slack_action(action_list)

        config_kwargs: dict = self.other_kwargs or {}

        # DataFrames shouldn't be saved to CheckpointStore
        batch_request = config_kwargs.get("batch_request")
        if batch_request_contains_batch_data(batch_request=batch_request):
            config_kwargs.pop("batch_request", None)
        else:
            config_kwargs["batch_request"] = get_batch_request_as_dict(
                batch_request=batch_request
            )

        # DataFrames shouldn't be saved to CheckpointStore
        validations = config_kwargs.get("validations")
        if batch_request_in_validations_contains_batch_data(validations=validations):
            config_kwargs.pop("validations", [])
        else:
            config_kwargs["validations"] = get_validations_with_batch_request_as_dict(
                validations=validations
            )

        specific_config_kwargs_overrides: dict = {
            "config_version": 1.0,
            "name": self.name,
            "class_name": "Checkpoint",
            "action_list": action_list,
            "ge_cloud_id": self.other_kwargs.pop("ge_cloud_id", None),
        }
        config_kwargs.update(specific_config_kwargs_overrides)

        # Roundtrip through schema validation to remove any illegal fields add/or restore any missing fields.
        checkpoint_config: dict = checkpointConfigSchema.load(
            CommentedMap(**config_kwargs)
        )
        config_kwargs = checkpointConfigSchema.dump(checkpoint_config)

        logger.debug(
            f"SimpleCheckpointConfigurator built this CheckpointConfig:"
            f"{checkpoint_config}"
        )
        return CheckpointConfig(**config_kwargs)
    def _generate_rule_overrides_from_batch_request(
        self, batch_request: Union[dict, BatchRequest, RuntimeBatchRequest]
    ) -> Dict[str, Dict[str, Any]]:
        """Iterates through the profiler's builder attributes and generates a set of
        Rules that contain overrides from the input batch request. This only applies to
        ParameterBuilder and any DomainBuilder with a COLUMN MetricDomainType.

        Note that we are passing ALL batches to the parameter builder. If not used carefully,
        a bias may creep in to the resulting estimates computed by these objects.

        Users of this override should be aware that a batch request should either have no
        notion of "current/active" batch or it is excluded.

        Args:
            batch_request: Data used to override builder attributes

        Returns:
            The dictionary representation of the Rules used as runtime arguments to `run()`
        """
        rules: List[Rule] = self.rules
        if not isinstance(batch_request, dict):
            batch_request = get_batch_request_as_dict(batch_request)
            logger.info("Converted batch request to dictionary: %s",
                        batch_request)

        resulting_rules: Dict[str, Dict[str, Any]] = {}

        for rule in rules:
            domain_builder = rule.domain_builder
            if domain_builder.domain_type == MetricDomainTypes.COLUMN:
                domain_builder.batch_request = batch_request
                domain_builder.batch_request["data_connector_query"] = {
                    "index": -1
                }

            parameter_builders = rule.parameter_builders
            if parameter_builders:
                for parameter_builder in parameter_builders:
                    parameter_builder.batch_request = batch_request

            resulting_rules[rule.name] = rule.to_dict()

        return resulting_rules
예제 #5
0
def substitute_runtime_config(source_config: dict, runtime_kwargs: dict) -> dict:
    if not (runtime_kwargs and any(runtime_kwargs.values())):
        return source_config

    dest_config: dict = copy.deepcopy(source_config)

    # replace
    if runtime_kwargs.get("template_name") is not None:
        dest_config["template_name"] = runtime_kwargs["template_name"]
    if runtime_kwargs.get("run_name_template") is not None:
        dest_config["run_name_template"] = runtime_kwargs["run_name_template"]
    if runtime_kwargs.get("expectation_suite_name") is not None:
        dest_config["expectation_suite_name"] = runtime_kwargs["expectation_suite_name"]
    if runtime_kwargs.get("expectation_suite_ge_cloud_id") is not None:
        dest_config["expectation_suite_ge_cloud_id"] = runtime_kwargs[
            "expectation_suite_ge_cloud_id"
        ]
    # update
    if runtime_kwargs.get("batch_request") is not None:
        batch_request = dest_config.get("batch_request") or {}
        batch_request_from_runtime_kwargs = runtime_kwargs["batch_request"]
        batch_request_from_runtime_kwargs = get_batch_request_as_dict(
            batch_request=batch_request_from_runtime_kwargs
        )
        updated_batch_request = nested_update(
            batch_request,
            batch_request_from_runtime_kwargs,
            dedup=True,
        )
        dest_config["batch_request"] = updated_batch_request
    if runtime_kwargs.get("action_list") is not None:
        action_list = dest_config.get("action_list") or []
        dest_config["action_list"] = get_updated_action_list(
            base_action_list=action_list,
            other_action_list=runtime_kwargs["action_list"],
        )
    if runtime_kwargs.get("evaluation_parameters") is not None:
        evaluation_parameters = dest_config.get("evaluation_parameters") or {}
        updated_evaluation_parameters = nested_update(
            evaluation_parameters,
            runtime_kwargs["evaluation_parameters"],
            dedup=True,
        )
        dest_config["evaluation_parameters"] = updated_evaluation_parameters
    if runtime_kwargs.get("runtime_configuration") is not None:
        runtime_configuration = dest_config.get("runtime_configuration") or {}
        updated_runtime_configuration = nested_update(
            runtime_configuration,
            runtime_kwargs["runtime_configuration"],
            dedup=True,
        )
        dest_config["runtime_configuration"] = updated_runtime_configuration
    if runtime_kwargs.get("validations") is not None:
        validations = dest_config.get("validations") or []
        existing_validations = source_config.get("validations") or []
        validations.extend(
            filter(
                lambda v: v not in existing_validations,
                runtime_kwargs["validations"],
            )
        )
        dest_config["validations"] = validations
    if runtime_kwargs.get("profilers") is not None:
        profilers = dest_config.get("profilers") or []
        existing_profilers = source_config.get("profilers") or []
        profilers.extend(
            filter(lambda v: v not in existing_profilers, runtime_kwargs["profilers"])
        )
        dest_config["profilers"] = profilers

    return dest_config
예제 #6
0
def add_checkpoint(
    data_context: "DataContext",  # noqa: F821
    checkpoint_store: CheckpointStore,
    checkpoint_store_name: str,
    ge_cloud_mode: bool,
    name: str,
    config_version: Optional[Union[int, float]] = None,
    template_name: Optional[str] = None,
    module_name: Optional[str] = None,
    class_name: Optional[str] = None,
    run_name_template: Optional[str] = None,
    expectation_suite_name: Optional[str] = None,
    batch_request: Optional[dict] = None,
    action_list: Optional[List[dict]] = None,
    evaluation_parameters: Optional[dict] = None,
    runtime_configuration: Optional[dict] = None,
    validations: Optional[List[dict]] = None,
    profilers: Optional[List[dict]] = None,
    # Next two fields are for LegacyCheckpoint configuration
    validation_operator_name: Optional[str] = None,
    batches: Optional[List[dict]] = None,
    # the following four arguments are used by SimpleCheckpoint
    site_names: Optional[Union[str, List[str]]] = None,
    slack_webhook: Optional[str] = None,
    notify_on: Optional[str] = None,
    notify_with: Optional[Union[str, List[str]]] = None,
    ge_cloud_id: Optional[str] = None,
    expectation_suite_ge_cloud_id: Optional[str] = None,
) -> Union[Checkpoint, LegacyCheckpoint]:
    checkpoint_config: Union[CheckpointConfig, dict]

    # These checks protect against typed objects (BatchRequest and/or RuntimeBatchRequest) encountered in arguments.
    batch_request = get_batch_request_as_dict(batch_request=batch_request)
    validations = get_validations_with_batch_request_as_dict(validations=validations)

    # DataFrames shouldn't be saved to CheckpointStore
    if batch_request_contains_batch_data(batch_request=batch_request):
        raise ge_exceptions.InvalidConfigError(
            f'batch_data found in batch_request cannot be saved to CheckpointStore "{checkpoint_store_name}"'
        )

    if batch_request_in_validations_contains_batch_data(validations=validations):
        raise ge_exceptions.InvalidConfigError(
            f'batch_data found in validations cannot be saved to CheckpointStore "{checkpoint_store_name}"'
        )

    checkpoint_config = {
        "name": name,
        "config_version": config_version,
        "template_name": template_name,
        "module_name": module_name,
        "class_name": class_name,
        "run_name_template": run_name_template,
        "expectation_suite_name": expectation_suite_name,
        "batch_request": batch_request,
        "action_list": action_list,
        "evaluation_parameters": evaluation_parameters,
        "runtime_configuration": runtime_configuration,
        "validations": validations,
        "profilers": profilers,
        # Next two fields are for LegacyCheckpoint configuration
        "validation_operator_name": validation_operator_name,
        "batches": batches,
        # the following four keys are used by SimpleCheckpoint
        "site_names": site_names,
        "slack_webhook": slack_webhook,
        "notify_on": notify_on,
        "notify_with": notify_with,
        "ge_cloud_id": ge_cloud_id,
        "expectation_suite_ge_cloud_id": expectation_suite_ge_cloud_id,
    }

    checkpoint_config = deep_filter_properties_iterable(
        properties=checkpoint_config,
        clean_falsy=True,
    )

    new_checkpoint: Union[
        Checkpoint, SimpleCheckpoint, LegacyCheckpoint
    ] = instantiate_class_from_config(
        config=checkpoint_config,
        runtime_environment={
            "data_context": data_context,
        },
        config_defaults={
            "module_name": "great_expectations.checkpoint",
        },
    )

    if ge_cloud_mode:
        key: GeCloudIdentifier = GeCloudIdentifier(
            resource_type="contract", ge_cloud_id=ge_cloud_id
        )
    else:
        key: ConfigurationIdentifier = ConfigurationIdentifier(
            configuration_key=name,
        )

    checkpoint_config = new_checkpoint.get_config()

    checkpoint_ref = checkpoint_store.set(key=key, value=checkpoint_config)
    if isinstance(checkpoint_ref, GeCloudIdAwareRef):
        ge_cloud_id = checkpoint_ref.ge_cloud_id
        new_checkpoint.ge_cloud_id = uuid.UUID(ge_cloud_id)

    return new_checkpoint
예제 #7
0
def run_checkpoint(
    data_context: "DataContext",  # noqa: F821
    checkpoint_store: CheckpointStore,
    checkpoint_name: Optional[str] = None,
    template_name: Optional[str] = None,
    run_name_template: Optional[str] = None,
    expectation_suite_name: Optional[str] = None,
    batch_request: Optional[Union[BatchRequest, RuntimeBatchRequest, dict]] = None,
    action_list: Optional[List[dict]] = None,
    evaluation_parameters: Optional[dict] = None,
    runtime_configuration: Optional[dict] = None,
    validations: Optional[List[dict]] = None,
    profilers: Optional[List[dict]] = None,
    run_id: Optional[Union[str, int, float]] = None,
    run_name: Optional[str] = None,
    run_time: Optional[datetime.datetime] = None,
    result_format: Optional[str] = None,
    ge_cloud_id: Optional[str] = None,
    expectation_suite_ge_cloud_id: Optional[str] = None,
    **kwargs,
) -> CheckpointResult:
    """
    Validate against a pre-defined Checkpoint. (Experimental)
    Args:
        data_context: DataContext for Checkpoint class instantiation purposes
        checkpoint_store: CheckpointStore for managing Checkpoint configurations
        checkpoint_name: The name of a Checkpoint defined via the CLI or by manually creating a yml file
        template_name: The name of a Checkpoint template to retrieve from the CheckpointStore
        run_name_template: The template to use for run_name
        expectation_suite_name: Expectation suite to be used by Checkpoint run
        batch_request: Batch request to be used by Checkpoint run
        action_list: List of actions to be performed by the Checkpoint
        evaluation_parameters: $parameter_name syntax references to be evaluated at runtime
        runtime_configuration: Runtime configuration override parameters
        validations: Validations to be performed by the Checkpoint run
        profilers: Profilers to be used by the Checkpoint run
        run_id: The run_id for the validation; if None, a default value will be used
        run_name: The run_name for the validation; if None, a default value will be used
        run_time: The date/time of the run
        result_format: One of several supported formatting directives for expectation validation results
        ge_cloud_id: Great Expectations Cloud id for the checkpoint
        expectation_suite_ge_cloud_id: Great Expectations Cloud id for the expectation suite
        **kwargs: Additional kwargs to pass to the validation operator

    Returns:
        CheckpointResult
    """
    checkpoint: Union[Checkpoint, SimpleCheckpoint, LegacyCheckpoint] = get_checkpoint(
        data_context=data_context,
        checkpoint_store=checkpoint_store,
        name=checkpoint_name,
        ge_cloud_id=ge_cloud_id,
    )
    checkpoint_config_from_store: CheckpointConfig = checkpoint.get_config()

    if (
        "runtime_configuration" in checkpoint_config_from_store
        and checkpoint_config_from_store.runtime_configuration
        and "result_format" in checkpoint_config_from_store.runtime_configuration
    ):
        result_format = (
            result_format
            or checkpoint_config_from_store.runtime_configuration.get("result_format")
        )

    if result_format is None:
        result_format = {"result_format": "SUMMARY"}

    batch_request = get_batch_request_as_dict(batch_request=batch_request)
    validations = get_validations_with_batch_request_as_dict(validations=validations)

    checkpoint_config_from_call_args: dict = {
        "template_name": template_name,
        "run_name_template": run_name_template,
        "expectation_suite_name": expectation_suite_name,
        "batch_request": batch_request,
        "action_list": action_list,
        "evaluation_parameters": evaluation_parameters,
        "runtime_configuration": runtime_configuration,
        "validations": validations,
        "profilers": profilers,
        "run_id": run_id,
        "run_name": run_name,
        "run_time": run_time,
        "result_format": result_format,
        "expectation_suite_ge_cloud_id": expectation_suite_ge_cloud_id,
    }

    checkpoint_config: dict = {
        key: value
        for key, value in checkpoint_config_from_store.items()
        if key in checkpoint_config_from_call_args
    }
    checkpoint_config.update(checkpoint_config_from_call_args)

    checkpoint_run_arguments: dict = dict(**checkpoint_config, **kwargs)
    filter_properties_dict(
        properties=checkpoint_run_arguments,
        clean_falsy=True,
        inplace=True,
    )

    return checkpoint.run(**checkpoint_run_arguments)
    def _anonymize_checkpoint_run(self, obj: object, **kwargs) -> dict:
        """
        Traverse the entire Checkpoint configuration structure (as per its formal, validated Marshmallow schema) and
        anonymize every field that can be customized by a user (public fields are recorded as their original names).
        """
        attribute_name: str
        attribute_value: Optional[Union[str, dict]]
        validation_obj: dict

        checkpoint_optional_top_level_keys: List[str] = []

        name: Optional[str] = kwargs.get("name")
        anonymized_name: Optional[str] = self._anonymize_string(name)

        config_version: Optional[Union[Number,
                                       str]] = kwargs.get("config_version")
        if config_version is None:
            config_version = 1.0

        template_name: Optional[str] = kwargs.get("template_name")
        anonymized_template_name: Optional[str] = self._anonymize_string(
            template_name)

        run_name_template: Optional[str] = kwargs.get("run_name_template")
        anonymized_run_name_template: Optional[str] = self._anonymize_string(
            run_name_template)

        expectation_suite_name: Optional[str] = kwargs.get(
            "expectation_suite_name")
        anonymized_expectation_suite_name: Optional[
            str] = self._anonymize_string(expectation_suite_name)

        batch_request: Optional[Union[BatchRequest, RuntimeBatchRequest,
                                      dict]] = kwargs.get("batch_request")
        if batch_request is None:
            batch_request = {}

        anonymized_batch_request: Optional[Dict[
            str, List[str]]] = self._aggregate_anonymizer.anonymize(
                *(), **batch_request)

        action_list: Optional[List[dict]] = kwargs.get("action_list")
        anonymized_action_list: Optional[List[dict]] = None
        if action_list:
            # noinspection PyBroadException
            try:
                anonymized_action_list = [
                    self._aggregate_anonymizer.anonymize(
                        action_name=action_config_dict["name"],
                        action_config=action_config_dict["action"],
                    ) for action_config_dict in action_list
                ]
            except Exception:
                logger.debug(
                    "anonymize_checkpoint_run: Unable to create anonymized_action_list payload field"
                )

        validations: Optional[List[dict]] = kwargs.get("validations")
        anonymized_validations: Optional[List[dict]] = []
        if validations:
            for validation_obj in validations:
                validation_batch_request: Optional[
                    Union[BatchRequest, RuntimeBatchRequest,
                          dict]] = validation_obj.get("batch_request")
                if validation_batch_request is None:
                    validation_batch_request = {}

                validation_batch_request = get_batch_request_as_dict(
                    batch_request=validation_batch_request)

                anonymized_validation_batch_request: Optional[Optional[Dict[
                    str, List[str]]]] = self._aggregate_anonymizer.anonymize(
                        *(), **validation_batch_request)

                validation_expectation_suite_name: Optional[
                    str] = validation_obj.get("expectation_suite_name")
                anonymized_validation_expectation_suite_name: Optional[
                    str] = self._anonymize_string(
                        validation_expectation_suite_name)

                validation_action_list: Optional[
                    List[dict]] = validation_obj.get("action_list")
                anonymized_validation_action_list: Optional[List[dict]] = None
                if validation_action_list:
                    # noinspection PyBroadException
                    try:
                        anonymized_validation_action_list = [
                            self._aggregate_anonymizer.anonymize(
                                action_name=action_config_dict["name"],
                                action_config=action_config_dict["action"],
                            ) for action_config_dict in validation_action_list
                        ]
                    except Exception:
                        logger.debug(
                            "anonymize_checkpoint_run: Unable to create anonymized_validation_action_list payload field"
                        )

                anonymized_validation: Dict[str, Union[str, Dict[str, Any],
                                                       List[Dict[str,
                                                                 Any]]]] = {}

                if anonymized_validation_batch_request:
                    anonymized_validation[
                        "anonymized_batch_request"] = anonymized_validation_batch_request

                if anonymized_validation_expectation_suite_name:
                    anonymized_validation[
                        "anonymized_expectation_suite_name"] = anonymized_validation_expectation_suite_name

                if anonymized_validation_action_list:
                    anonymized_validation[
                        "anonymized_action_list"] = anonymized_validation_action_list

                anonymized_validation: Dict[str, Dict[str, Any]] = {
                    "anonymized_batch_request":
                    anonymized_validation_batch_request,
                    "anonymized_expectation_suite_name":
                    anonymized_validation_expectation_suite_name,
                    "anonymized_action_list":
                    anonymized_validation_action_list,
                }

                anonymized_validations.append(anonymized_validation)

        run_id: Optional[Union[str, RunIdentifier]] = kwargs.get("run_id")
        anonymized_run_id: Optional[Union[str, RunIdentifier]]
        if run_id is None:
            anonymized_run_id = None
        else:
            anonymized_run_id = self._anonymize_string(str(run_id))

        run_name: Optional[str] = kwargs.get("run_name")
        anonymized_run_name: Optional[str]
        if run_name is None:
            anonymized_run_name = None
        else:
            anonymized_run_name = self._anonymize_string(run_name)

        run_time: Optional[Union[str,
                                 datetime.datetime]] = kwargs.get("run_time")
        anonymized_run_time: Optional[str]
        if run_time is None:
            anonymized_run_time = None
        else:
            anonymized_run_time = self._anonymize_string(str(run_time))

        expectation_suite_ge_cloud_id: Optional[str] = kwargs.get(
            "expectation_suite_ge_cloud_id")
        anonymized_expectation_suite_ge_cloud_id: Optional[str]
        if expectation_suite_ge_cloud_id is None:
            anonymized_expectation_suite_ge_cloud_id = None
        else:
            anonymized_expectation_suite_ge_cloud_id = self._anonymize_string(
                str(expectation_suite_ge_cloud_id))

        for attribute_name in sorted(CHECKPOINT_OPTIONAL_TOP_LEVEL_KEYS):
            attribute_value = kwargs.get(attribute_name)
            if attribute_value:
                checkpoint_optional_top_level_keys.append(attribute_name)

        anonymized_checkpoint_run_properties_dict: Dict[str, List[str]] = {
            "anonymized_name":
            anonymized_name,
            "config_version":
            config_version,
            "anonymized_template_name":
            anonymized_template_name,
            "anonymized_run_name_template":
            anonymized_run_name_template,
            "anonymized_expectation_suite_name":
            anonymized_expectation_suite_name,
            "anonymized_batch_request":
            anonymized_batch_request,
            "anonymized_action_list":
            anonymized_action_list,
            "anonymized_validations":
            anonymized_validations,
            "anonymized_run_id":
            anonymized_run_id,
            "anonymized_run_name":
            anonymized_run_name,
            "anonymized_run_time":
            anonymized_run_time,
            "anonymized_expectation_suite_ge_cloud_id":
            anonymized_expectation_suite_ge_cloud_id,
            "checkpoint_optional_top_level_keys":
            checkpoint_optional_top_level_keys,
        }

        deep_filter_properties_iterable(
            properties=anonymized_checkpoint_run_properties_dict,
            clean_falsy=True,
            inplace=True,
        )

        return anonymized_checkpoint_run_properties_dict
예제 #9
0
    def run(
        self,
        template_name: Optional[str] = None,
        run_name_template: Optional[str] = None,
        expectation_suite_name: Optional[str] = None,
        batch_request: Optional[Union[BatchRequestBase, dict]] = None,
        action_list: Optional[List[dict]] = None,
        evaluation_parameters: Optional[dict] = None,
        runtime_configuration: Optional[dict] = None,
        validations: Optional[List[dict]] = None,
        profilers: Optional[List[dict]] = None,
        run_id: Optional[Union[str, RunIdentifier]] = None,
        run_name: Optional[str] = None,
        run_time: Optional[Union[str, datetime.datetime]] = None,
        result_format: Optional[Union[str, dict]] = None,
        expectation_suite_ge_cloud_id: Optional[str] = None,
    ) -> CheckpointResult:
        assert not (run_id and run_name) and not (
            run_id and run_time
        ), "Please provide either a run_id or run_name and/or run_time."

        run_time = run_time or datetime.datetime.now()
        runtime_configuration = runtime_configuration or {}
        result_format = result_format or runtime_configuration.get(
            "result_format")

        batch_request = get_batch_request_as_dict(batch_request=batch_request)
        validations = get_validations_with_batch_request_as_dict(
            validations=validations)

        runtime_kwargs: dict = {
            "template_name": template_name,
            "run_name_template": run_name_template,
            "expectation_suite_name": expectation_suite_name,
            "batch_request": batch_request or {},
            "action_list": action_list or [],
            "evaluation_parameters": evaluation_parameters or {},
            "runtime_configuration": runtime_configuration or {},
            "validations": validations or [],
            "profilers": profilers or [],
            "expectation_suite_ge_cloud_id": expectation_suite_ge_cloud_id,
        }

        substituted_runtime_config: dict = self.get_substituted_config(
            runtime_kwargs=runtime_kwargs)

        run_name_template = substituted_runtime_config.get("run_name_template")

        batch_request = substituted_runtime_config.get("batch_request")
        validations = substituted_runtime_config.get("validations") or []

        if len(validations) == 0 and not batch_request:
            raise ge_exceptions.CheckpointError(
                f'Checkpoint "{self.name}" must contain either a batch_request or validations.'
            )

        if run_name is None and run_name_template is not None:
            run_name = get_datetime_string_from_strftime_format(
                format_str=run_name_template, datetime_obj=run_time)

        run_id = run_id or RunIdentifier(run_name=run_name, run_time=run_time)

        # Use AsyncExecutor to speed up I/O bound validations by running them in parallel with multithreading (if
        # concurrency is enabled in the data context configuration) -- please see the below arguments used to initialize
        # AsyncExecutor and the corresponding AsyncExecutor docstring for more details on when multiple threads are
        # used.
        with AsyncExecutor(self.data_context.concurrency,
                           max_workers=len(validations)) as async_executor:
            # noinspection PyUnresolvedReferences
            async_validation_operator_results: List[
                AsyncResult[ValidationOperatorResult]] = []
            if len(validations) > 0:
                for idx, validation_dict in enumerate(validations):
                    self._run_validation(
                        substituted_runtime_config=substituted_runtime_config,
                        async_validation_operator_results=
                        async_validation_operator_results,
                        async_executor=async_executor,
                        result_format=result_format,
                        run_id=run_id,
                        idx=idx,
                        validation_dict=validation_dict,
                    )
            else:
                self._run_validation(
                    substituted_runtime_config=substituted_runtime_config,
                    async_validation_operator_results=
                    async_validation_operator_results,
                    async_executor=async_executor,
                    result_format=result_format,
                    run_id=run_id,
                )

            run_results: dict = {}
            for async_validation_operator_result in async_validation_operator_results:
                run_results.update(
                    async_validation_operator_result.result().run_results)

        return CheckpointResult(
            run_id=run_id,
            run_results=run_results,
            checkpoint_config=self.config,
        )
예제 #10
0
    def run_with_runtime_args(
        self,
        template_name: Optional[str] = None,
        run_name_template: Optional[str] = None,
        expectation_suite_name: Optional[str] = None,
        batch_request: Optional[Union[BatchRequestBase, dict]] = None,
        action_list: Optional[List[dict]] = None,
        evaluation_parameters: Optional[dict] = None,
        runtime_configuration: Optional[dict] = None,
        validations: Optional[List[dict]] = None,
        profilers: Optional[List[dict]] = None,
        run_id: Optional[Union[str, int, float]] = None,
        run_name: Optional[str] = None,
        run_time: Optional[datetime.datetime] = None,
        result_format: Optional[str] = None,
        expectation_suite_ge_cloud_id: Optional[str] = None,
        **kwargs,
    ) -> CheckpointResult:
        checkpoint_config_from_store: CheckpointConfig = cast(
            CheckpointConfig, self.get_config())

        if ("runtime_configuration" in checkpoint_config_from_store
                and checkpoint_config_from_store.runtime_configuration
                and "result_format"
                in checkpoint_config_from_store.runtime_configuration):
            result_format = (result_format or checkpoint_config_from_store.
                             runtime_configuration.get("result_format"))

        if result_format is None:
            result_format = {"result_format": "SUMMARY"}

        batch_request = get_batch_request_as_dict(batch_request=batch_request)
        validations = get_validations_with_batch_request_as_dict(
            validations=validations)

        checkpoint_config_from_call_args: dict = {
            "template_name": template_name,
            "run_name_template": run_name_template,
            "expectation_suite_name": expectation_suite_name,
            "batch_request": batch_request,
            "action_list": action_list,
            "evaluation_parameters": evaluation_parameters,
            "runtime_configuration": runtime_configuration,
            "validations": validations,
            "profilers": profilers,
            "run_id": run_id,
            "run_name": run_name,
            "run_time": run_time,
            "result_format": result_format,
            "expectation_suite_ge_cloud_id": expectation_suite_ge_cloud_id,
        }

        checkpoint_config: dict = {
            key: value
            for key, value in checkpoint_config_from_store.items()
            if key in checkpoint_config_from_call_args
        }
        checkpoint_config.update(checkpoint_config_from_call_args)

        checkpoint_run_arguments: dict = dict(**checkpoint_config, **kwargs)
        filter_properties_dict(
            properties=checkpoint_run_arguments,
            clean_falsy=True,
            inplace=True,
        )

        return self.run(**checkpoint_run_arguments)
    def resolve_config_using_acceptable_arguments(
        checkpoint: "Checkpoint",  # noqa: F821
        template_name: Optional[str] = None,
        run_name_template: Optional[str] = None,
        expectation_suite_name: Optional[str] = None,
        batch_request: Optional[Union[BatchRequest, RuntimeBatchRequest,
                                      dict]] = None,
        action_list: Optional[List[dict]] = None,
        evaluation_parameters: Optional[dict] = None,
        runtime_configuration: Optional[dict] = None,
        validations: Optional[List[dict]] = None,
        profilers: Optional[List[dict]] = None,
        run_id: Optional[Union[str, RunIdentifier]] = None,
        run_name: Optional[str] = None,
        run_time: Optional[Union[str, datetime.datetime]] = None,
        result_format: Optional[Union[str, dict]] = None,
        expectation_suite_ge_cloud_id: Optional[str] = None,
    ) -> dict:
        """
        This method reconciles the Checkpoint configuration (e.g., obtained from the Checkpoint store) with dynamically
        supplied arguments in order to obtain that Checkpoint specification that is ready for running validation on it.
        This procedure is necessecitated by the fact that the Checkpoint configuration is hierarchical in its form,
        which was established for the purposes of making the specification of different Checkpoint capabilities easy.
        In particular, entities, such as BatchRequest, expectation_suite_name, and action_list, can be specified at the
        top Checkpoint level with the suitable ovverrides provided at lower levels (e.g., in the validations section).
        Reconciling and normalizing the Checkpoint configuration is essential for usage statistics, because the exact
        values of the entities in their formally validated form (e.g., BatchRequest) is the required level of detail.
        """
        assert not (run_id and run_name) and not (
            run_id and run_time
        ), "Please provide either a run_id or run_name and/or run_time."

        run_time = run_time or datetime.datetime.now()
        runtime_configuration = runtime_configuration or {}

        batch_request = get_batch_request_as_dict(batch_request=batch_request)
        validations = get_validations_with_batch_request_as_dict(
            validations=validations)

        runtime_kwargs: dict = {
            "template_name": template_name,
            "run_name_template": run_name_template,
            "expectation_suite_name": expectation_suite_name,
            "batch_request": batch_request,
            "action_list": action_list,
            "evaluation_parameters": evaluation_parameters,
            "runtime_configuration": runtime_configuration,
            "validations": validations,
            "profilers": profilers,
            "expectation_suite_ge_cloud_id": expectation_suite_ge_cloud_id,
        }
        substituted_runtime_config: dict = checkpoint.get_substituted_config(
            runtime_kwargs=runtime_kwargs)
        run_name_template = substituted_runtime_config.get("run_name_template")
        validations = substituted_runtime_config.get("validations") or []
        batch_request = substituted_runtime_config.get("batch_request")
        if len(validations) == 0 and not batch_request:
            raise ge_exceptions.CheckpointError(
                f'Checkpoint "{checkpoint.name}" must contain either a batch_request or validations.'
            )

        if run_name is None and run_name_template is not None:
            run_name = get_datetime_string_from_strftime_format(
                format_str=run_name_template, datetime_obj=run_time)

        run_id = run_id or RunIdentifier(run_name=run_name, run_time=run_time)

        validation_dict: dict

        for validation_dict in validations:
            substituted_validation_dict: dict = get_substituted_validation_dict(
                substituted_runtime_config=substituted_runtime_config,
                validation_dict=validation_dict,
            )
            validation_batch_request: Union[
                BatchRequest,
                RuntimeBatchRequest] = substituted_validation_dict.get(
                    "batch_request")
            validation_dict["batch_request"] = validation_batch_request
            validation_expectation_suite_name: str = substituted_validation_dict.get(
                "expectation_suite_name")
            validation_dict[
                "expectation_suite_name"] = validation_expectation_suite_name
            validation_expectation_suite_ge_cloud_id: str = (
                substituted_validation_dict.get(
                    "expectation_suite_ge_cloud_id"))
            validation_dict[
                "expectation_suite_ge_cloud_id"] = validation_expectation_suite_ge_cloud_id
            validation_action_list: list = substituted_validation_dict.get(
                "action_list")
            validation_dict["action_list"] = validation_action_list

        return substituted_runtime_config
예제 #12
0
    def batch_request(self, value: Optional[Union[BatchRequestBase,
                                                  dict]]) -> None:
        if not (value is None or isinstance(value, dict)):
            value = get_batch_request_as_dict(batch_request=value)

        self._batch_request = value