def _run_default_validation_operator(
        self,
        assets_to_validate: List,
        run_id: Optional[Union[str, RunIdentifier]] = None,
        evaluation_parameters: Optional[dict] = None,
        run_name: Optional[str] = None,
        run_time: Optional[Union[str, datetime.datetime]] = None,
        result_format: Optional[Union[str,
                                      dict]] = {"result_format": "SUMMARY"},
    ):
        result_format = result_format or {"result_format": "SUMMARY"}

        if not assets_to_validate:
            raise ge_exceptions.DataContextError(
                "No batches of data were passed in. These are required")

        for batch in assets_to_validate:
            if not isinstance(batch, (tuple, DataAsset, Validator)):
                raise ge_exceptions.DataContextError(
                    "Batches are required to be of type DataAsset or Validator"
                )

        if run_id is None and run_name is None:
            run_name = datetime.datetime.now(
                datetime.timezone.utc).strftime("%Y%m%dT%H%M%S.%fZ")
            logger.info("Setting run_name to: {}".format(run_name))

        default_validation_operator = ActionListValidationOperator(
            data_context=self.data_context,
            action_list=[
                {
                    "name": "store_validation_result",
                    "action": {
                        "class_name": "StoreValidationResultAction"
                    },
                },
                {
                    "name": "store_evaluation_params",
                    "action": {
                        "class_name": "StoreEvaluationParametersAction"
                    },
                },
                {
                    "name": "update_data_docs",
                    "action": {
                        "class_name": "UpdateDataDocsAction",
                        "site_names": []
                    },
                },
            ],
            result_format=result_format,
            name="default-action-list-validation-operator",
        )

        if evaluation_parameters is None:
            return default_validation_operator.run(
                assets_to_validate=assets_to_validate,
                run_id=run_id,
                run_name=run_name,
                run_time=run_time,
                result_format=result_format,
            )
        else:
            return default_validation_operator.run(
                assets_to_validate=assets_to_validate,
                run_id=run_id,
                evaluation_parameters=evaluation_parameters,
                run_name=run_name,
                run_time=run_time,
                result_format=result_format,
            )
    def run(
        self,
        template_name: Optional[str] = None,
        run_name_template: Optional[str] = None,
        expectation_suite_name: Optional[str] = None,
        batch_request: Optional[Union[BatchRequest, dict]] = None,
        action_list: Optional[List[dict]] = None,
        evaluation_parameters: Optional[dict] = None,
        runtime_configuration: Optional[dict] = None,
        validations: Optional[List[dict]] = None,
        profilers: Optional[List[dict]] = None,
        run_id=None,
        run_name=None,
        run_time=None,
        result_format=None,
        **kwargs,
    ) -> CheckpointResult:
        assert not (run_id and run_name) and not (
            run_id and run_time
        ), "Please provide either a run_id or run_name and/or run_time."

        run_time = run_time or datetime.now()
        runtime_configuration: dict = runtime_configuration or {}
        result_format: Optional[
            dict] = result_format or runtime_configuration.get("result_format")
        if result_format is None:
            result_format = {"result_format": "SUMMARY"}

        runtime_kwargs = {
            "template_name": template_name,
            "run_name_template": run_name_template,
            "expectation_suite_name": expectation_suite_name,
            "batch_request": batch_request,
            "action_list": action_list,
            "evaluation_parameters": evaluation_parameters,
            "runtime_configuration": runtime_configuration,
            "validations": validations,
            "profilers": profilers,
        }
        substituted_runtime_config: CheckpointConfig = self.get_substituted_config(
            runtime_kwargs=runtime_kwargs)
        run_name_template: Optional[
            str] = substituted_runtime_config.run_name_template
        validations: list = substituted_runtime_config.validations
        run_results = {}

        if run_name is None and run_name_template is not None:
            run_name: str = get_datetime_string_from_strftime_format(
                format_str=run_name_template, datetime_obj=run_time)

        run_id = run_id or RunIdentifier(run_name=run_name, run_time=run_time)

        for idx, validation_dict in enumerate(validations):
            try:
                substituted_validation_dict: dict = get_substituted_validation_dict(
                    substituted_runtime_config=substituted_runtime_config,
                    validation_dict=validation_dict,
                )
                batch_request: BatchRequest = substituted_validation_dict.get(
                    "batch_request")
                expectation_suite_name: str = substituted_validation_dict.get(
                    "expectation_suite_name")
                action_list: list = substituted_validation_dict.get(
                    "action_list")

                validator: Validator = self.data_context.get_validator(
                    batch_request=batch_request,
                    expectation_suite_name=expectation_suite_name,
                )
                action_list_validation_operator: ActionListValidationOperator = (
                    ActionListValidationOperator(
                        data_context=self.data_context,
                        action_list=action_list,
                        result_format=result_format,
                        name=f"{self.name}-checkpoint-validation[{idx}]",
                    ))
                val_op_run_result: ValidationOperatorResult = (
                    action_list_validation_operator.run(
                        assets_to_validate=[validator],
                        run_id=run_id,
                        evaluation_parameters=substituted_validation_dict.get(
                            "evaluation_parameters"),
                        result_format=result_format,
                    ))
                run_results.update(val_op_run_result.run_results)
            except CheckpointError as e:
                raise CheckpointError(
                    f"Exception occurred while running validation[{idx}] of checkpoint '{self.name}': {e.message}"
                )
        return CheckpointResult(run_id=run_id,
                                run_results=run_results,
                                checkpoint_config=self.config)
Beispiel #3
0
    def _run_validation(
        self,
        substituted_runtime_config: dict,
        async_validation_operator_results: List[AsyncResult],
        async_executor: AsyncExecutor,
        result_format: Optional[dict],
        run_id: Optional[Union[str, RunIdentifier]],
        idx: Optional[int] = 0,
        validation_dict: Optional[dict] = None,
    ) -> None:
        if validation_dict is None:
            validation_dict = {}

        try:
            substituted_validation_dict: dict = get_substituted_validation_dict(
                substituted_runtime_config=substituted_runtime_config,
                validation_dict=validation_dict,
            )
            batch_request: Union[
                BatchRequest,
                RuntimeBatchRequest] = substituted_validation_dict.get(
                    "batch_request")
            expectation_suite_name: str = substituted_validation_dict.get(
                "expectation_suite_name")
            expectation_suite_ge_cloud_id: str = substituted_validation_dict.get(
                "expectation_suite_ge_cloud_id")
            include_rendered_content: bool = substituted_validation_dict.get(
                "include_rendered_content", False)

            validator: Validator = self.data_context.get_validator(
                batch_request=batch_request,
                expectation_suite_name=(expectation_suite_name
                                        if not self.data_context.ge_cloud_mode
                                        else None),
                expectation_suite_ge_cloud_id=(expectation_suite_ge_cloud_id if
                                               self.data_context.ge_cloud_mode
                                               else None),
                include_rendered_content=include_rendered_content,
            )

            action_list: list = substituted_validation_dict.get("action_list")
            runtime_configuration_validation = substituted_validation_dict.get(
                "runtime_configuration", {})
            catch_exceptions_validation = runtime_configuration_validation.get(
                "catch_exceptions")
            result_format_validation = runtime_configuration_validation.get(
                "result_format")
            result_format = result_format or result_format_validation

            if result_format is None:
                result_format = {"result_format": "SUMMARY"}

            action_list_validation_operator: ActionListValidationOperator = (
                ActionListValidationOperator(
                    data_context=self.data_context,
                    action_list=action_list,
                    result_format=result_format,
                    name=f"{self.name}-checkpoint-validation[{idx}]",
                ))
            checkpoint_identifier = None
            if self.data_context.ge_cloud_mode:
                checkpoint_identifier = GeCloudIdentifier(
                    resource_type=GeCloudRESTResource.CONTRACT,
                    ge_cloud_id=str(self.ge_cloud_id),
                )

            operator_run_kwargs = {}

            if catch_exceptions_validation is not None:
                operator_run_kwargs[
                    "catch_exceptions"] = catch_exceptions_validation

            async_validation_operator_results.append(
                async_executor.submit(
                    action_list_validation_operator.run,
                    assets_to_validate=[validator],
                    run_id=run_id,
                    evaluation_parameters=substituted_validation_dict.get(
                        "evaluation_parameters"),
                    result_format=result_format,
                    checkpoint_identifier=checkpoint_identifier,
                    checkpoint_name=self.name,
                    **operator_run_kwargs,
                ))
        except (
                ge_exceptions.CheckpointError,
                ge_exceptions.ExecutionEngineError,
                ge_exceptions.MetricError,
        ) as e:
            raise ge_exceptions.CheckpointError(
                f"Exception occurred while running validation[{idx}] of Checkpoint '{self.name}': {e.message}."
            )