コード例 #1
0
    def _check_validity_of_batch_requests_in_config(
        config: RuleBasedProfilerConfig, ) -> bool:
        # Evaluate nested types in RuleConfig to parse out BatchRequests
        batch_requests: List[Union[BatchRequest, RuntimeBatchRequest,
                                   dict]] = []
        rule: dict
        for rule in config.rules.values():

            domain_builder: dict = rule["domain_builder"]
            if "batch_request" in domain_builder:
                batch_requests.append(domain_builder["batch_request"])

            parameter_builders: List[dict] = rule.get("parameter_builders", [])
            parameter_builder: dict
            for parameter_builder in parameter_builders:
                if "batch_request" in parameter_builder:
                    batch_requests.append(parameter_builder["batch_request"])

        # DataFrames shouldn't be saved to ProfilerStore
        batch_request: Union[BatchRequest, RuntimeBatchRequest, dict]
        for batch_request in batch_requests:
            if batch_request_contains_batch_data(batch_request=batch_request):
                return False

        return True
コード例 #2
0
    def to_dict(self) -> dict:
        dict_obj: dict = super().to_dict()
        dict_obj["class_name"] = self.__class__.__name__
        dict_obj["module_name"] = self.__class__.__module__

        if batch_request_contains_batch_data(batch_request=self.batch_request):
            dict_obj.pop("batch_request", None)

        return dict_obj
コード例 #3
0
    def __init__(
        self,
        name: str,
        data_context: "DataContext",  # noqa: F821
        config_version: Optional[Union[int, float]] = None,
        template_name: Optional[str] = None,
        run_name_template: Optional[str] = None,
        expectation_suite_name: Optional[str] = None,
        batch_request: Optional[Union[BatchRequest, RuntimeBatchRequest,
                                      dict]] = None,
        action_list: Optional[List[dict]] = None,
        evaluation_parameters: Optional[dict] = None,
        runtime_configuration: Optional[dict] = None,
        validations: Optional[List[dict]] = None,
        profilers: Optional[List[dict]] = None,
        validation_operator_name: Optional[str] = None,
        batches: Optional[List[dict]] = None,
        ge_cloud_id: Optional[UUID] = None,
        expectation_suite_ge_cloud_id: Optional[UUID] = None,
    ):
        # Only primitive types are allowed as constructor arguments; data frames are supplied to "run()" as arguments.
        if batch_request_contains_batch_data(batch_request=batch_request):
            raise ValueError(
                f"""Error: batch_data found in batch_request -- only primitive types are allowed as Checkpoint \
constructor arguments.
""")

        if batch_request_in_validations_contains_batch_data(
                validations=validations):
            raise ValueError(
                f"""Error: batch_data found in batch_request -- only primitive types are allowed as Checkpoint \
constructor arguments.
""")

        checkpoint_config: CheckpointConfig = CheckpointConfig(
            name=name,
            config_version=config_version,
            template_name=template_name,
            run_name_template=run_name_template,
            expectation_suite_name=expectation_suite_name,
            batch_request=batch_request,
            action_list=action_list,
            evaluation_parameters=evaluation_parameters,
            runtime_configuration=runtime_configuration,
            validations=validations,
            profilers=profilers,
            validation_operator_name=validation_operator_name,
            batches=batches,
            ge_cloud_id=ge_cloud_id,
            expectation_suite_ge_cloud_id=expectation_suite_ge_cloud_id,
        )
        super().__init__(
            checkpoint_config=checkpoint_config,
            data_context=data_context,
        )
コード例 #4
0
    def _build_checkpoint_config(self) -> CheckpointConfig:
        action_list = self._default_action_list()
        if self.site_names:
            action_list = self._add_update_data_docs_action(action_list)
        if self.slack_webhook:
            action_list = self._add_slack_action(action_list)

        config_kwargs: dict = self.other_kwargs or {}

        # DataFrames shouldn't be saved to CheckpointStore
        batch_request = config_kwargs.get("batch_request")
        if batch_request_contains_batch_data(batch_request=batch_request):
            config_kwargs.pop("batch_request", None)
        else:
            config_kwargs["batch_request"] = get_batch_request_as_dict(
                batch_request=batch_request
            )

        # DataFrames shouldn't be saved to CheckpointStore
        validations = config_kwargs.get("validations")
        if batch_request_in_validations_contains_batch_data(validations=validations):
            config_kwargs.pop("validations", [])
        else:
            config_kwargs["validations"] = get_validations_with_batch_request_as_dict(
                validations=validations
            )

        specific_config_kwargs_overrides: dict = {
            "config_version": 1.0,
            "name": self.name,
            "class_name": "Checkpoint",
            "action_list": action_list,
            "ge_cloud_id": self.other_kwargs.pop("ge_cloud_id", None),
        }
        config_kwargs.update(specific_config_kwargs_overrides)

        # Roundtrip through schema validation to remove any illegal fields add/or restore any missing fields.
        checkpoint_config: dict = checkpointConfigSchema.load(
            CommentedMap(**config_kwargs)
        )
        config_kwargs = checkpointConfigSchema.dump(checkpoint_config)

        logger.debug(
            f"SimpleCheckpointConfigurator built this CheckpointConfig:"
            f"{checkpoint_config}"
        )
        return CheckpointConfig(**config_kwargs)
コード例 #5
0
def add_checkpoint(
    data_context: "DataContext",  # noqa: F821
    checkpoint_store: CheckpointStore,
    checkpoint_store_name: str,
    ge_cloud_mode: bool,
    name: str,
    config_version: Optional[Union[int, float]] = None,
    template_name: Optional[str] = None,
    module_name: Optional[str] = None,
    class_name: Optional[str] = None,
    run_name_template: Optional[str] = None,
    expectation_suite_name: Optional[str] = None,
    batch_request: Optional[dict] = None,
    action_list: Optional[List[dict]] = None,
    evaluation_parameters: Optional[dict] = None,
    runtime_configuration: Optional[dict] = None,
    validations: Optional[List[dict]] = None,
    profilers: Optional[List[dict]] = None,
    # Next two fields are for LegacyCheckpoint configuration
    validation_operator_name: Optional[str] = None,
    batches: Optional[List[dict]] = None,
    # the following four arguments are used by SimpleCheckpoint
    site_names: Optional[Union[str, List[str]]] = None,
    slack_webhook: Optional[str] = None,
    notify_on: Optional[str] = None,
    notify_with: Optional[Union[str, List[str]]] = None,
    ge_cloud_id: Optional[str] = None,
    expectation_suite_ge_cloud_id: Optional[str] = None,
) -> Union[Checkpoint, LegacyCheckpoint]:
    checkpoint_config: Union[CheckpointConfig, dict]

    # These checks protect against typed objects (BatchRequest and/or RuntimeBatchRequest) encountered in arguments.
    batch_request = get_batch_request_as_dict(batch_request=batch_request)
    validations = get_validations_with_batch_request_as_dict(validations=validations)

    # DataFrames shouldn't be saved to CheckpointStore
    if batch_request_contains_batch_data(batch_request=batch_request):
        raise ge_exceptions.InvalidConfigError(
            f'batch_data found in batch_request cannot be saved to CheckpointStore "{checkpoint_store_name}"'
        )

    if batch_request_in_validations_contains_batch_data(validations=validations):
        raise ge_exceptions.InvalidConfigError(
            f'batch_data found in validations cannot be saved to CheckpointStore "{checkpoint_store_name}"'
        )

    checkpoint_config = {
        "name": name,
        "config_version": config_version,
        "template_name": template_name,
        "module_name": module_name,
        "class_name": class_name,
        "run_name_template": run_name_template,
        "expectation_suite_name": expectation_suite_name,
        "batch_request": batch_request,
        "action_list": action_list,
        "evaluation_parameters": evaluation_parameters,
        "runtime_configuration": runtime_configuration,
        "validations": validations,
        "profilers": profilers,
        # Next two fields are for LegacyCheckpoint configuration
        "validation_operator_name": validation_operator_name,
        "batches": batches,
        # the following four keys are used by SimpleCheckpoint
        "site_names": site_names,
        "slack_webhook": slack_webhook,
        "notify_on": notify_on,
        "notify_with": notify_with,
        "ge_cloud_id": ge_cloud_id,
        "expectation_suite_ge_cloud_id": expectation_suite_ge_cloud_id,
    }

    checkpoint_config = deep_filter_properties_iterable(
        properties=checkpoint_config,
        clean_falsy=True,
    )

    new_checkpoint: Union[
        Checkpoint, SimpleCheckpoint, LegacyCheckpoint
    ] = instantiate_class_from_config(
        config=checkpoint_config,
        runtime_environment={
            "data_context": data_context,
        },
        config_defaults={
            "module_name": "great_expectations.checkpoint",
        },
    )

    if ge_cloud_mode:
        key: GeCloudIdentifier = GeCloudIdentifier(
            resource_type="contract", ge_cloud_id=ge_cloud_id
        )
    else:
        key: ConfigurationIdentifier = ConfigurationIdentifier(
            configuration_key=name,
        )

    checkpoint_config = new_checkpoint.get_config()

    checkpoint_ref = checkpoint_store.set(key=key, value=checkpoint_config)
    if isinstance(checkpoint_ref, GeCloudIdAwareRef):
        ge_cloud_id = checkpoint_ref.ge_cloud_id
        new_checkpoint.ge_cloud_id = uuid.UUID(ge_cloud_id)

    return new_checkpoint