def save_expectation_suite(
        self,
        expectation_suite: ExpectationSuite,
        expectation_suite_name: Optional[str] = None,
        overwrite_existing: bool = True,
        ge_cloud_id: Optional[str] = None,
        **kwargs: Optional[dict],
    ) -> None:
        """Save the provided expectation suite into the DataContext.

        Args:
            expectation_suite: the suite to save
            expectation_suite_name: the name of this expectation suite. If no name is provided the name will \
                be read from the suite
            ge_cloud_id: cloud id for saving expectation suite
            overwrite_existing: should I over-write the Suite if it already exists?
        Returns:
            None
        """
        key: GeCloudIdentifier = GeCloudIdentifier(
            resource_type=GeCloudRESTResource.EXPECTATION_SUITE,
            ge_cloud_id=ge_cloud_id
            if ge_cloud_id is not None else str(expectation_suite.ge_cloud_id),
        )
        if self.expectations_store.has_key(key) and not overwrite_existing:
            raise ge_exceptions.DataContextError(
                f"expectation_suite with GE Cloud ID {ge_cloud_id} already exists. "
                f"If you would like to overwrite this expectation_suite, set overwrite_existing=True."
            )
        self._evaluation_parameter_dependencies_compiled = False
        self.expectations_store.set(key, expectation_suite, **kwargs)
    def create_expectation_suite(
        self,
        expectation_suite_name: str,
        overwrite_existing: bool = False,
        ge_cloud_id: Optional[str] = None,
        **kwargs: Optional[dict],
    ) -> ExpectationSuite:
        """Build a new expectation suite and save it into the data_context expectation store.

        Args:
            expectation_suite_name: The name of the expectation_suite to create
            overwrite_existing (boolean): Whether to overwrite expectation suite if expectation suite with given name
                already exists.

        Returns:
            A new (empty) expectation suite.
        """
        if not isinstance(overwrite_existing, bool):
            raise ValueError(
                "Parameter overwrite_existing must be of type BOOL")

        expectation_suite: ExpectationSuite = ExpectationSuite(
            expectation_suite_name=expectation_suite_name, data_context=self)
        key = GeCloudIdentifier(
            resource_type=GeCloudRESTResource.EXPECTATION_SUITE,
            ge_cloud_id=ge_cloud_id,
        )
        if self.expectations_store.has_key(key) and not overwrite_existing:
            raise ge_exceptions.DataContextError(
                "expectation_suite with GE Cloud ID {} already exists. If you would like to overwrite this "
                "expectation_suite, set overwrite_existing=True.".format(
                    ge_cloud_id))
        self.expectations_store.set(key, expectation_suite, **kwargs)
        return expectation_suite
    def delete_profiler(
        profiler_store: ProfilerStore,
        name: Optional[str] = None,
        ge_cloud_id: Optional[str] = None,
    ) -> None:
        assert bool(name) ^ bool(
            ge_cloud_id
        ), "Must provide either name or ge_cloud_id (but not both)"

        key: Union[GeCloudIdentifier, ConfigurationIdentifier]
        if ge_cloud_id:
            key = GeCloudIdentifier(resource_type="contract",
                                    ge_cloud_id=ge_cloud_id)
        else:
            key = ConfigurationIdentifier(configuration_key=name)

        try:
            profiler_store.remove_key(key=key)
        except (ge_exceptions.InvalidKeyError, KeyError) as exc_ik:
            id_ = (key.configuration_key if isinstance(
                key, ConfigurationIdentifier) else key)
            raise ge_exceptions.ProfilerNotFoundError(
                message=
                f'Non-existent Profiler configuration named "{id_}".\n\nDetails: {exc_ik}'
            )
    def get_expectation_suite(
        self,
        expectation_suite_name: Optional[str] = None,
        ge_cloud_id: Optional[str] = None,
    ) -> ExpectationSuite:
        """Get an Expectation Suite by name or GE Cloud ID
        Args:
            expectation_suite_name (str): the name for the Expectation Suite
            ge_cloud_id (str): the GE Cloud ID for the Expectation Suite

        Returns:
            expectation_suite
        """
        key = GeCloudIdentifier(
            resource_type=GeCloudRESTResource.EXPECTATION_SUITE,
            ge_cloud_id=ge_cloud_id,
        )
        if self.expectations_store.has_key(key):
            expectations_schema_dict: dict = cast(
                dict, self.expectations_store.get(key))
            # create the ExpectationSuite from constructor
            return ExpectationSuite(**expectations_schema_dict,
                                    data_context=self)

        else:
            raise ge_exceptions.DataContextError(
                f"expectation_suite {expectation_suite_name} not found")
    def add_profiler(
        config: RuleBasedProfilerConfig,
        data_context: "DataContext",  # noqa: F821
        profiler_store: ProfilerStore,
        ge_cloud_id: Optional[str] = None,
    ) -> "RuleBasedProfiler":
        if not RuleBasedProfiler._check_validity_of_batch_requests_in_config(
                config=config):
            raise ge_exceptions.InvalidConfigError(
                f'batch_data found in batch_request cannot be saved to ProfilerStore "{profiler_store.store_name}"'
            )

        # Chetan - 20220204 - DataContext to be removed once it can be decoupled from RBP
        new_profiler: "RuleBasedProfiler" = instantiate_class_from_config(
            config=config.to_json_dict(),
            runtime_environment={
                "data_context": data_context,
            },
            config_defaults={
                "module_name": "great_expectations.rule_based_profiler",
                "class_name": "RuleBasedProfiler",
            },
        )

        key: Union[GeCloudIdentifier, ConfigurationIdentifier]
        if ge_cloud_id:
            key = GeCloudIdentifier(resource_type="contract",
                                    ge_cloud_id=ge_cloud_id)
        else:
            key = ConfigurationIdentifier(configuration_key=config.name, )

        profiler_store.set(key=key, value=config)

        return new_profiler
    def get_key(self) -> GeCloudIdentifier:
        """
        Generates a GE Cloud-specific key for use with Stores. See parent "DataContextVariables.get_key" for more details.
        """
        from great_expectations.data_context.store.ge_cloud_store_backend import (
            GeCloudRESTResource, )

        key: GeCloudIdentifier = GeCloudIdentifier(
            resource_type=GeCloudRESTResource.DATA_CONTEXT_VARIABLES)
        return key
    def determine_key(
        name: Optional[str], ge_cloud_id: Optional[str]
    ) -> Union[GeCloudIdentifier, ConfigurationIdentifier]:
        assert bool(name) ^ bool(
            ge_cloud_id), "Must provide either name or ge_cloud_id."

        key: Union[GeCloudIdentifier, ConfigurationIdentifier]
        if ge_cloud_id:
            key = GeCloudIdentifier(resource_type=GeCloudRESTResource.CONTRACT,
                                    ge_cloud_id=ge_cloud_id)
        else:
            key = ConfigurationIdentifier(configuration_key=name)

        return key
Example #8
0
def test_datasource_store_retrieval_cloud_mode(
    datasource_config: DatasourceConfig,
    ge_cloud_base_url: str,
    ge_cloud_access_token: str,
    ge_cloud_organization_id: str,
) -> None:
    ge_cloud_store_backend_config: dict = {
        "class_name": "GeCloudStoreBackend",
        "ge_cloud_base_url": ge_cloud_base_url,
        "ge_cloud_resource_type": "datasource",
        "ge_cloud_credentials": {
            "access_token": ge_cloud_access_token,
            "organization_id": ge_cloud_organization_id,
        },
        "suppress_store_backend_id": True,
    }

    store: DatasourceStore = DatasourceStore(
        store_name="my_cloud_datasource_store",
        store_backend=ge_cloud_store_backend_config,
    )

    key: GeCloudIdentifier = GeCloudIdentifier(resource_type="datasource",
                                               ge_cloud_id="foobarbaz")

    with patch("requests.patch", autospec=True) as mock_patch:
        type(mock_patch.return_value).status_code = PropertyMock(
            return_value=200)

        store.set(key=key, value=datasource_config)

        mock_patch.assert_called_with(
            "https://app.test.greatexpectations.io/organizations/bd20fead-2c31-4392-bcd1-f1e87ad5a79c/datasources/foobarbaz",
            json={
                "data": {
                    "type": "datasource",
                    "id": "foobarbaz",
                    "attributes": {
                        "datasource_config": datasource_config.to_dict(),
                        "organization_id": ge_cloud_organization_id,
                    },
                }
            },
            headers={
                "Content-Type": "application/vnd.api+json",
                "Authorization": "Bearer 6bb5b6f5c7794892a4ca168c65c2603e",
            },
        )
    def get_profiler(
        data_context: "DataContext",  # noqa: F821
        profiler_store: ProfilerStore,
        name: Optional[str] = None,
        ge_cloud_id: Optional[str] = None,
    ) -> "RuleBasedProfiler":
        assert bool(name) ^ bool(
            ge_cloud_id
        ), "Must provide either name or ge_cloud_id (but not both)"

        key: Union[GeCloudIdentifier, ConfigurationIdentifier]
        if ge_cloud_id:
            key = GeCloudIdentifier(resource_type="contract",
                                    ge_cloud_id=ge_cloud_id)
        else:
            key = ConfigurationIdentifier(configuration_key=name, )
        try:
            profiler_config: RuleBasedProfilerConfig = profiler_store.get(
                key=key)
        except ge_exceptions.InvalidKeyError as exc_ik:
            id_ = (key.configuration_key if isinstance(
                key, ConfigurationIdentifier) else key)
            raise ge_exceptions.ProfilerNotFoundError(
                message=
                f'Non-existent Profiler configuration named "{id_}".\n\nDetails: {exc_ik}'
            )

        config = profiler_config.to_json_dict()
        if name:
            config.update({"name": name})
        config = filter_properties_dict(properties=config, clean_falsy=True)

        profiler = instantiate_class_from_config(
            config=config,
            runtime_environment={
                "data_context": data_context,
            },
            config_defaults={
                "module_name": "great_expectations.rule_based_profiler",
                "class_name": "RuleBasedProfiler",
            },
        )

        return profiler
Example #10
0
def delete_checkpoint(
    checkpoint_store: CheckpointStore,
    name: Optional[str] = None,
    ge_cloud_id: Optional[str] = None,
):
    assert bool(name) ^ bool(ge_cloud_id), "Must provide either name or ge_cloud_id."

    if ge_cloud_id:
        key: GeCloudIdentifier = GeCloudIdentifier(
            resource_type="contract", ge_cloud_id=ge_cloud_id
        )
    else:
        key: ConfigurationIdentifier = ConfigurationIdentifier(configuration_key=name)

    try:
        checkpoint_store.remove_key(key=key)
    except ge_exceptions.InvalidKeyError as exc_ik:
        raise ge_exceptions.CheckpointNotFoundError(
            message=f'Non-existent Checkpoint configuration named "{key.configuration_key}".\n\nDetails: {exc_ik}'
        )
    def delete_expectation_suite(
        self,
        expectation_suite_name: Optional[str] = None,
        ge_cloud_id: Optional[str] = None,
    ):
        """Delete specified expectation suite from data_context expectation store.

        Args:
            expectation_suite_name: The name of the expectation_suite to create

        Returns:
            True for Success and False for Failure.
        """
        key = GeCloudIdentifier(
            resource_type=GeCloudRESTResource.EXPECTATION_SUITE,
            ge_cloud_id=ge_cloud_id,
        )
        if not self.expectations_store.has_key(key):
            raise ge_exceptions.DataContextError(
                "expectation_suite with name {} does not exist.")
        else:
            self.expectations_store.remove_key(key)
            return True
Example #12
0
    def build(self, resource_identifiers=None) -> None:
        source_store_keys = self.source_store.list_keys()
        if self.name == "validations" and self.validation_results_limit:
            source_store_keys = sorted(
                source_store_keys,
                key=lambda x: x.run_id.run_time,
                reverse=True)[:self.validation_results_limit]

        for resource_key in source_store_keys:
            # if no resource_identifiers are passed, the section
            # builder will build
            # a page for every keys in its source store.
            # if the caller did pass resource_identifiers, the section builder
            # will build pages only for the specified resources
            if resource_identifiers and resource_key not in resource_identifiers:
                continue

            if self.run_name_filter and not isinstance(resource_key,
                                                       GeCloudIdentifier):
                if not resource_key_passes_run_name_filter(
                        resource_key, self.run_name_filter):
                    continue
            try:
                resource = self.source_store.get(resource_key)
                if isinstance(resource_key, ExpectationSuiteIdentifier):
                    resource = ExpectationSuite(**resource,
                                                data_context=self.data_context)
            except exceptions.InvalidKeyError:
                logger.warning(
                    f"Object with Key: {str(resource_key)} could not be retrieved. Skipping..."
                )
                continue

            if isinstance(resource_key, ExpectationSuiteIdentifier):
                expectation_suite_name = resource_key.expectation_suite_name
                logger.debug(
                    f"        Rendering expectation suite {expectation_suite_name}"
                )
            elif isinstance(resource_key, ValidationResultIdentifier):
                run_id = resource_key.run_id
                run_name = run_id.run_name
                run_time = run_id.run_time
                expectation_suite_name = (
                    resource_key.expectation_suite_identifier.
                    expectation_suite_name)
                if self.name == "profiling":
                    logger.debug(
                        f"        Rendering profiling for batch {resource_key.batch_identifier}"
                    )
                else:

                    logger.debug(
                        f"        Rendering validation: run name: {run_name}, run time: {run_time}, suite {expectation_suite_name} for batch {resource_key.batch_identifier}"
                    )

            try:
                rendered_content = self.renderer_class.render(resource)

                if self.ge_cloud_mode:
                    self.target_store.set(
                        GeCloudIdentifier(resource_type=GeCloudRESTResource.
                                          RENDERED_DATA_DOC),
                        rendered_content,
                        source_type=resource_key.resource_type,
                        source_id=resource_key.ge_cloud_id,
                    )
                else:
                    viewable_content = self.view_class.render(
                        rendered_content,
                        data_context_id=self.data_context_id,
                        show_how_to_buttons=self.show_how_to_buttons,
                    )
                    # Verify type
                    self.target_store.set(
                        SiteSectionIdentifier(
                            site_section_name=self.name,
                            resource_identifier=resource_key,
                        ),
                        viewable_content,
                    )
            except Exception as e:
                exception_message = """\
An unexpected Exception occurred during data docs rendering.  Because of this error, certain parts of data docs will \
not be rendered properly and/or may not appear altogether.  Please use the trace, included in this message, to \
diagnose and repair the underlying issue.  Detailed information follows:
                """
                exception_traceback = traceback.format_exc()
                exception_message += (f'{type(e).__name__}: "{str(e)}".  '
                                      f'Traceback: "{exception_traceback}".')
                logger.error(exception_message)
Example #13
0
def add_checkpoint(
    data_context: "DataContext",  # noqa: F821
    checkpoint_store: CheckpointStore,
    checkpoint_store_name: str,
    ge_cloud_mode: bool,
    name: str,
    config_version: Optional[Union[int, float]] = None,
    template_name: Optional[str] = None,
    module_name: Optional[str] = None,
    class_name: Optional[str] = None,
    run_name_template: Optional[str] = None,
    expectation_suite_name: Optional[str] = None,
    batch_request: Optional[dict] = None,
    action_list: Optional[List[dict]] = None,
    evaluation_parameters: Optional[dict] = None,
    runtime_configuration: Optional[dict] = None,
    validations: Optional[List[dict]] = None,
    profilers: Optional[List[dict]] = None,
    # Next two fields are for LegacyCheckpoint configuration
    validation_operator_name: Optional[str] = None,
    batches: Optional[List[dict]] = None,
    # the following four arguments are used by SimpleCheckpoint
    site_names: Optional[Union[str, List[str]]] = None,
    slack_webhook: Optional[str] = None,
    notify_on: Optional[str] = None,
    notify_with: Optional[Union[str, List[str]]] = None,
    ge_cloud_id: Optional[str] = None,
    expectation_suite_ge_cloud_id: Optional[str] = None,
) -> Union[Checkpoint, LegacyCheckpoint]:
    checkpoint_config: Union[CheckpointConfig, dict]

    # These checks protect against typed objects (BatchRequest and/or RuntimeBatchRequest) encountered in arguments.
    batch_request = get_batch_request_as_dict(batch_request=batch_request)
    validations = get_validations_with_batch_request_as_dict(validations=validations)

    # DataFrames shouldn't be saved to CheckpointStore
    if batch_request_contains_batch_data(batch_request=batch_request):
        raise ge_exceptions.InvalidConfigError(
            f'batch_data found in batch_request cannot be saved to CheckpointStore "{checkpoint_store_name}"'
        )

    if batch_request_in_validations_contains_batch_data(validations=validations):
        raise ge_exceptions.InvalidConfigError(
            f'batch_data found in validations cannot be saved to CheckpointStore "{checkpoint_store_name}"'
        )

    checkpoint_config = {
        "name": name,
        "config_version": config_version,
        "template_name": template_name,
        "module_name": module_name,
        "class_name": class_name,
        "run_name_template": run_name_template,
        "expectation_suite_name": expectation_suite_name,
        "batch_request": batch_request,
        "action_list": action_list,
        "evaluation_parameters": evaluation_parameters,
        "runtime_configuration": runtime_configuration,
        "validations": validations,
        "profilers": profilers,
        # Next two fields are for LegacyCheckpoint configuration
        "validation_operator_name": validation_operator_name,
        "batches": batches,
        # the following four keys are used by SimpleCheckpoint
        "site_names": site_names,
        "slack_webhook": slack_webhook,
        "notify_on": notify_on,
        "notify_with": notify_with,
        "ge_cloud_id": ge_cloud_id,
        "expectation_suite_ge_cloud_id": expectation_suite_ge_cloud_id,
    }

    checkpoint_config = deep_filter_properties_iterable(
        properties=checkpoint_config,
        clean_falsy=True,
    )

    new_checkpoint: Union[
        Checkpoint, SimpleCheckpoint, LegacyCheckpoint
    ] = instantiate_class_from_config(
        config=checkpoint_config,
        runtime_environment={
            "data_context": data_context,
        },
        config_defaults={
            "module_name": "great_expectations.checkpoint",
        },
    )

    if ge_cloud_mode:
        key: GeCloudIdentifier = GeCloudIdentifier(
            resource_type="contract", ge_cloud_id=ge_cloud_id
        )
    else:
        key: ConfigurationIdentifier = ConfigurationIdentifier(
            configuration_key=name,
        )

    checkpoint_config = new_checkpoint.get_config()

    checkpoint_ref = checkpoint_store.set(key=key, value=checkpoint_config)
    if isinstance(checkpoint_ref, GeCloudIdAwareRef):
        ge_cloud_id = checkpoint_ref.ge_cloud_id
        new_checkpoint.ge_cloud_id = uuid.UUID(ge_cloud_id)

    return new_checkpoint
Example #14
0
def get_checkpoint(
    data_context: "DataContext",  # noqa: F821
    checkpoint_store: CheckpointStore,
    name: Optional[str] = None,
    ge_cloud_id: Optional[str] = None,
) -> Union[Checkpoint, LegacyCheckpoint]:
    if ge_cloud_id:
        key: GeCloudIdentifier = GeCloudIdentifier(
            resource_type="contract", ge_cloud_id=ge_cloud_id
        )
    else:
        key: ConfigurationIdentifier = ConfigurationIdentifier(
            configuration_key=name,
        )
    try:
        checkpoint_config: CheckpointConfig = checkpoint_store.get(key=key)
    except ge_exceptions.InvalidKeyError as exc_ik:
        raise ge_exceptions.CheckpointNotFoundError(
            message=f'Non-existent Checkpoint configuration named "{key.configuration_key}".\n\nDetails: {exc_ik}'
        )
    except ValidationError as exc_ve:
        raise ge_exceptions.InvalidCheckpointConfigError(
            message="Invalid Checkpoint configuration", validation_error=exc_ve
        )

    if checkpoint_config.config_version is None:
        if not (
            "batches" in checkpoint_config.to_json_dict()
            and (
                len(checkpoint_config.to_json_dict()["batches"]) == 0
                or {"batch_kwargs", "expectation_suite_names",}.issubset(
                    set(
                        list(
                            itertools.chain.from_iterable(
                                [
                                    item.keys()
                                    for item in checkpoint_config.to_json_dict()[
                                        "batches"
                                    ]
                                ]
                            )
                        )
                    )
                )
            )
        ):
            raise ge_exceptions.CheckpointError(
                message="Attempt to instantiate LegacyCheckpoint with insufficient and/or incorrect arguments."
            )

    config: dict = checkpoint_config.to_json_dict()

    if name:
        config.update({"name": name})

    config = filter_properties_dict(properties=config, clean_falsy=True)

    checkpoint: Union[Checkpoint, LegacyCheckpoint] = instantiate_class_from_config(
        config=config,
        runtime_environment={
            "data_context": data_context,
        },
        config_defaults={
            "module_name": "great_expectations.checkpoint",
        },
    )

    return checkpoint
Example #15
0
 def _get_key(
         self,
         attr: "DataContextVariablesSchema"  # noqa: F821
 ) -> GeCloudIdentifier:
     key: GeCloudIdentifier = GeCloudIdentifier(resource_type=attr.value)
     return key
Example #16
0
    def _run_validation(
        self,
        substituted_runtime_config: dict,
        async_validation_operator_results: List[AsyncResult],
        async_executor: AsyncExecutor,
        result_format: Optional[dict],
        run_id: Optional[Union[str, RunIdentifier]],
        idx: Optional[int] = 0,
        validation_dict: Optional[dict] = None,
    ) -> None:
        if validation_dict is None:
            validation_dict = {}

        try:
            substituted_validation_dict: dict = get_substituted_validation_dict(
                substituted_runtime_config=substituted_runtime_config,
                validation_dict=validation_dict,
            )
            batch_request: Union[
                BatchRequest,
                RuntimeBatchRequest] = substituted_validation_dict.get(
                    "batch_request")
            expectation_suite_name: str = substituted_validation_dict.get(
                "expectation_suite_name")
            expectation_suite_ge_cloud_id: str = substituted_validation_dict.get(
                "expectation_suite_ge_cloud_id")
            include_rendered_content: bool = substituted_validation_dict.get(
                "include_rendered_content", False)

            validator: Validator = self.data_context.get_validator(
                batch_request=batch_request,
                expectation_suite_name=(expectation_suite_name
                                        if not self.data_context.ge_cloud_mode
                                        else None),
                expectation_suite_ge_cloud_id=(expectation_suite_ge_cloud_id if
                                               self.data_context.ge_cloud_mode
                                               else None),
                include_rendered_content=include_rendered_content,
            )

            action_list: list = substituted_validation_dict.get("action_list")
            runtime_configuration_validation = substituted_validation_dict.get(
                "runtime_configuration", {})
            catch_exceptions_validation = runtime_configuration_validation.get(
                "catch_exceptions")
            result_format_validation = runtime_configuration_validation.get(
                "result_format")
            result_format = result_format or result_format_validation

            if result_format is None:
                result_format = {"result_format": "SUMMARY"}

            action_list_validation_operator: ActionListValidationOperator = (
                ActionListValidationOperator(
                    data_context=self.data_context,
                    action_list=action_list,
                    result_format=result_format,
                    name=f"{self.name}-checkpoint-validation[{idx}]",
                ))
            checkpoint_identifier = None
            if self.data_context.ge_cloud_mode:
                checkpoint_identifier = GeCloudIdentifier(
                    resource_type=GeCloudRESTResource.CONTRACT,
                    ge_cloud_id=str(self.ge_cloud_id),
                )

            operator_run_kwargs = {}

            if catch_exceptions_validation is not None:
                operator_run_kwargs[
                    "catch_exceptions"] = catch_exceptions_validation

            async_validation_operator_results.append(
                async_executor.submit(
                    action_list_validation_operator.run,
                    assets_to_validate=[validator],
                    run_id=run_id,
                    evaluation_parameters=substituted_validation_dict.get(
                        "evaluation_parameters"),
                    result_format=result_format,
                    checkpoint_identifier=checkpoint_identifier,
                    checkpoint_name=self.name,
                    **operator_run_kwargs,
                ))
        except (
                ge_exceptions.CheckpointError,
                ge_exceptions.ExecutionEngineError,
                ge_exceptions.MetricError,
        ) as e:
            raise ge_exceptions.CheckpointError(
                f"Exception occurred while running validation[{idx}] of Checkpoint '{self.name}': {e.message}."
            )
    def run(
        self,
        assets_to_validate,
        run_id=None,
        evaluation_parameters=None,
        run_name=None,
        run_time=None,
        catch_exceptions=None,
        result_format=None,
        checkpoint_identifier=None,
    ):
        assert not (run_id and run_name) and not (
            run_id and run_time
        ), "Please provide either a run_id or run_name and/or run_time."
        if isinstance(run_id, str) and not run_name:
            warnings.warn(
                "String run_ids will be deprecated in the future. Please provide a run_id of type "
                "RunIdentifier(run_name=None, run_time=None), or a dictionary containing run_name "
                "and run_time (both optional). Instead of providing a run_id, you may also provide"
                "run_name and run_time separately.",
                DeprecationWarning,
            )
            try:
                run_time = parse(run_id)
            except (ValueError, TypeError):
                pass
            run_id = RunIdentifier(run_name=run_id, run_time=run_time)
        elif isinstance(run_id, dict):
            run_id = RunIdentifier(**run_id)
        elif not isinstance(run_id, RunIdentifier):
            run_id = RunIdentifier(run_name=run_name, run_time=run_time)

        ###
        # NOTE: 20211010 - jdimatteo: This method is called by both Checkpoint.run and LegacyCheckpoint.run and below
        # usage of AsyncExecutor may speed up I/O bound validations by running them in parallel with multithreading
        # (if concurrency is enabled in the data context configuration).
        #
        # When this method is called by LegacyCheckpoint.run, len(assets_to_validate) may be greater than 1. If
        # concurrency is enabled in the configuration AND len(assets_to_validate) > 1, then execution is run in multiple
        # threads with AsyncExecutor -- otherwise AsyncExecutor only uses the current single thread to execute the work.
        # Please see the below arguments used to initialize AsyncExecutor and the corresponding AsyncExecutor docstring
        # for more details on when multiple threads are used.
        #
        # When this method is called by Checkpoint.run, len(assets_to_validate) may be 1 even if there are multiple
        # validations, because Checkpoint.run calls this method in a loop for each validation. AsyncExecutor is also
        # used in the Checkpoint.run loop to optionally run each validation in parallel with multithreading, so this
        # method's AsyncExecutor is nested within the Checkpoint.run AsyncExecutor. The AsyncExecutor logic to only use
        # multithreading when max_workers > 1 ensures that no nested multithreading is ever used when
        # len(assets_to_validate) is equal to 1. So no unnecessary multithreading is ever used here even though it may
        # be nested inside another AsyncExecutor (and this is a good thing because it avoids extra overhead associated
        # with each thread and minimizes the total number of threads to simplify debugging).
        with AsyncExecutor(
                self.data_context.concurrency,
                max_workers=len(assets_to_validate)) as async_executor:
            batch_and_async_result_tuples = []
            for item in assets_to_validate:
                batch = self._build_batch_from_item(item)

                if hasattr(batch, "active_batch_id"):
                    batch_identifier = batch.active_batch_id
                else:
                    batch_identifier = batch.batch_id

                if result_format is None:
                    result_format = self.result_format

                batch_validate_arguments = {
                    "run_id": run_id,
                    "result_format": result_format,
                    "evaluation_parameters": evaluation_parameters,
                }

                if catch_exceptions is not None:
                    batch_validate_arguments[
                        "catch_exceptions"] = catch_exceptions

                batch_and_async_result_tuples.append((
                    batch,
                    async_executor.submit(
                        batch.validate,
                        **batch_validate_arguments,
                    ),
                ))

            run_results = {}
            for batch, async_batch_validation_result in batch_and_async_result_tuples:
                if self.data_context.ge_cloud_mode:
                    expectation_suite_identifier = GeCloudIdentifier(
                        resource_type="expectation_suite",
                        ge_cloud_id=batch._expectation_suite.ge_cloud_id,
                    )
                    validation_result_id = GeCloudIdentifier(
                        resource_type="suite_validation_result")
                else:
                    expectation_suite_identifier = ExpectationSuiteIdentifier(
                        expectation_suite_name=batch._expectation_suite.
                        expectation_suite_name)
                    validation_result_id = ValidationResultIdentifier(
                        batch_identifier=batch_identifier,
                        expectation_suite_identifier=
                        expectation_suite_identifier,
                        run_id=run_id,
                    )

                batch_actions_results = self._run_actions(
                    batch=batch,
                    expectation_suite_identifier=expectation_suite_identifier,
                    expectation_suite=batch._expectation_suite,
                    batch_validation_result=async_batch_validation_result.
                    result(),
                    run_id=run_id,
                    validation_result_id=validation_result_id,
                    checkpoint_identifier=checkpoint_identifier,
                )

                run_result_obj = {
                    "validation_result":
                    async_batch_validation_result.result(),
                    "actions_results": batch_actions_results,
                }
                run_results[validation_result_id] = run_result_obj

        return ValidationOperatorResult(
            run_id=run_id,
            run_results=run_results,
            validation_operator_config=self.validation_operator_config,
            evaluation_parameters=evaluation_parameters,
        )
Example #18
0
def validation_result_suite_ge_cloud_identifier(
        validation_result_suite_ge_cloud_id):
    return GeCloudIdentifier(
        resource_type=GeCloudRESTResource.CONTRACT,
        ge_cloud_id=validation_result_suite_ge_cloud_id,
    )
Example #19
0
def validation_result_suite_ge_cloud_identifier(
        validation_result_suite_ge_cloud_id):
    return GeCloudIdentifier(resource_type="contract",
                             ge_cloud_id=validation_result_suite_ge_cloud_id)