def _build_checkpoint_config(self) -> CheckpointConfig: action_list = self._default_action_list() if self.site_names: action_list = self._add_update_data_docs_action(action_list) if self.slack_webhook: action_list = self._add_slack_action(action_list) config_kwargs: dict = self.other_kwargs or {} # DataFrames shouldn't be saved to CheckpointStore batch_request = config_kwargs.get("batch_request") if batch_request_contains_batch_data(batch_request=batch_request): config_kwargs.pop("batch_request", None) else: config_kwargs["batch_request"] = get_batch_request_as_dict( batch_request=batch_request ) # DataFrames shouldn't be saved to CheckpointStore validations = config_kwargs.get("validations") if batch_request_in_validations_contains_batch_data(validations=validations): config_kwargs.pop("validations", []) else: config_kwargs["validations"] = get_validations_with_batch_request_as_dict( validations=validations ) specific_config_kwargs_overrides: dict = { "config_version": 1.0, "name": self.name, "class_name": "Checkpoint", "action_list": action_list, "ge_cloud_id": self.other_kwargs.pop("ge_cloud_id", None), } config_kwargs.update(specific_config_kwargs_overrides) # Roundtrip through schema validation to remove any illegal fields add/or restore any missing fields. checkpoint_config: dict = checkpointConfigSchema.load( CommentedMap(**config_kwargs) ) config_kwargs = checkpointConfigSchema.dump(checkpoint_config) logger.debug( f"SimpleCheckpointConfigurator built this CheckpointConfig:" f"{checkpoint_config}" ) return CheckpointConfig(**config_kwargs)
def add_checkpoint( data_context: "DataContext", # noqa: F821 checkpoint_store: CheckpointStore, checkpoint_store_name: str, ge_cloud_mode: bool, name: str, config_version: Optional[Union[int, float]] = None, template_name: Optional[str] = None, module_name: Optional[str] = None, class_name: Optional[str] = None, run_name_template: Optional[str] = None, expectation_suite_name: Optional[str] = None, batch_request: Optional[dict] = None, action_list: Optional[List[dict]] = None, evaluation_parameters: Optional[dict] = None, runtime_configuration: Optional[dict] = None, validations: Optional[List[dict]] = None, profilers: Optional[List[dict]] = None, # Next two fields are for LegacyCheckpoint configuration validation_operator_name: Optional[str] = None, batches: Optional[List[dict]] = None, # the following four arguments are used by SimpleCheckpoint site_names: Optional[Union[str, List[str]]] = None, slack_webhook: Optional[str] = None, notify_on: Optional[str] = None, notify_with: Optional[Union[str, List[str]]] = None, ge_cloud_id: Optional[str] = None, expectation_suite_ge_cloud_id: Optional[str] = None, ) -> Union[Checkpoint, LegacyCheckpoint]: checkpoint_config: Union[CheckpointConfig, dict] # These checks protect against typed objects (BatchRequest and/or RuntimeBatchRequest) encountered in arguments. batch_request = get_batch_request_as_dict(batch_request=batch_request) validations = get_validations_with_batch_request_as_dict(validations=validations) # DataFrames shouldn't be saved to CheckpointStore if batch_request_contains_batch_data(batch_request=batch_request): raise ge_exceptions.InvalidConfigError( f'batch_data found in batch_request cannot be saved to CheckpointStore "{checkpoint_store_name}"' ) if batch_request_in_validations_contains_batch_data(validations=validations): raise ge_exceptions.InvalidConfigError( f'batch_data found in validations cannot be saved to CheckpointStore "{checkpoint_store_name}"' ) checkpoint_config = { "name": name, "config_version": config_version, "template_name": template_name, "module_name": module_name, "class_name": class_name, "run_name_template": run_name_template, "expectation_suite_name": expectation_suite_name, "batch_request": batch_request, "action_list": action_list, "evaluation_parameters": evaluation_parameters, "runtime_configuration": runtime_configuration, "validations": validations, "profilers": profilers, # Next two fields are for LegacyCheckpoint configuration "validation_operator_name": validation_operator_name, "batches": batches, # the following four keys are used by SimpleCheckpoint "site_names": site_names, "slack_webhook": slack_webhook, "notify_on": notify_on, "notify_with": notify_with, "ge_cloud_id": ge_cloud_id, "expectation_suite_ge_cloud_id": expectation_suite_ge_cloud_id, } checkpoint_config = deep_filter_properties_iterable( properties=checkpoint_config, clean_falsy=True, ) new_checkpoint: Union[ Checkpoint, SimpleCheckpoint, LegacyCheckpoint ] = instantiate_class_from_config( config=checkpoint_config, runtime_environment={ "data_context": data_context, }, config_defaults={ "module_name": "great_expectations.checkpoint", }, ) if ge_cloud_mode: key: GeCloudIdentifier = GeCloudIdentifier( resource_type="contract", ge_cloud_id=ge_cloud_id ) else: key: ConfigurationIdentifier = ConfigurationIdentifier( configuration_key=name, ) checkpoint_config = new_checkpoint.get_config() checkpoint_ref = checkpoint_store.set(key=key, value=checkpoint_config) if isinstance(checkpoint_ref, GeCloudIdAwareRef): ge_cloud_id = checkpoint_ref.ge_cloud_id new_checkpoint.ge_cloud_id = uuid.UUID(ge_cloud_id) return new_checkpoint
def run_checkpoint( data_context: "DataContext", # noqa: F821 checkpoint_store: CheckpointStore, checkpoint_name: Optional[str] = None, template_name: Optional[str] = None, run_name_template: Optional[str] = None, expectation_suite_name: Optional[str] = None, batch_request: Optional[Union[BatchRequest, RuntimeBatchRequest, dict]] = None, action_list: Optional[List[dict]] = None, evaluation_parameters: Optional[dict] = None, runtime_configuration: Optional[dict] = None, validations: Optional[List[dict]] = None, profilers: Optional[List[dict]] = None, run_id: Optional[Union[str, int, float]] = None, run_name: Optional[str] = None, run_time: Optional[datetime.datetime] = None, result_format: Optional[str] = None, ge_cloud_id: Optional[str] = None, expectation_suite_ge_cloud_id: Optional[str] = None, **kwargs, ) -> CheckpointResult: """ Validate against a pre-defined Checkpoint. (Experimental) Args: data_context: DataContext for Checkpoint class instantiation purposes checkpoint_store: CheckpointStore for managing Checkpoint configurations checkpoint_name: The name of a Checkpoint defined via the CLI or by manually creating a yml file template_name: The name of a Checkpoint template to retrieve from the CheckpointStore run_name_template: The template to use for run_name expectation_suite_name: Expectation suite to be used by Checkpoint run batch_request: Batch request to be used by Checkpoint run action_list: List of actions to be performed by the Checkpoint evaluation_parameters: $parameter_name syntax references to be evaluated at runtime runtime_configuration: Runtime configuration override parameters validations: Validations to be performed by the Checkpoint run profilers: Profilers to be used by the Checkpoint run run_id: The run_id for the validation; if None, a default value will be used run_name: The run_name for the validation; if None, a default value will be used run_time: The date/time of the run result_format: One of several supported formatting directives for expectation validation results ge_cloud_id: Great Expectations Cloud id for the checkpoint expectation_suite_ge_cloud_id: Great Expectations Cloud id for the expectation suite **kwargs: Additional kwargs to pass to the validation operator Returns: CheckpointResult """ checkpoint: Union[Checkpoint, SimpleCheckpoint, LegacyCheckpoint] = get_checkpoint( data_context=data_context, checkpoint_store=checkpoint_store, name=checkpoint_name, ge_cloud_id=ge_cloud_id, ) checkpoint_config_from_store: CheckpointConfig = checkpoint.get_config() if ( "runtime_configuration" in checkpoint_config_from_store and checkpoint_config_from_store.runtime_configuration and "result_format" in checkpoint_config_from_store.runtime_configuration ): result_format = ( result_format or checkpoint_config_from_store.runtime_configuration.get("result_format") ) if result_format is None: result_format = {"result_format": "SUMMARY"} batch_request = get_batch_request_as_dict(batch_request=batch_request) validations = get_validations_with_batch_request_as_dict(validations=validations) checkpoint_config_from_call_args: dict = { "template_name": template_name, "run_name_template": run_name_template, "expectation_suite_name": expectation_suite_name, "batch_request": batch_request, "action_list": action_list, "evaluation_parameters": evaluation_parameters, "runtime_configuration": runtime_configuration, "validations": validations, "profilers": profilers, "run_id": run_id, "run_name": run_name, "run_time": run_time, "result_format": result_format, "expectation_suite_ge_cloud_id": expectation_suite_ge_cloud_id, } checkpoint_config: dict = { key: value for key, value in checkpoint_config_from_store.items() if key in checkpoint_config_from_call_args } checkpoint_config.update(checkpoint_config_from_call_args) checkpoint_run_arguments: dict = dict(**checkpoint_config, **kwargs) filter_properties_dict( properties=checkpoint_run_arguments, clean_falsy=True, inplace=True, ) return checkpoint.run(**checkpoint_run_arguments)
def run( self, template_name: Optional[str] = None, run_name_template: Optional[str] = None, expectation_suite_name: Optional[str] = None, batch_request: Optional[Union[BatchRequestBase, dict]] = None, action_list: Optional[List[dict]] = None, evaluation_parameters: Optional[dict] = None, runtime_configuration: Optional[dict] = None, validations: Optional[List[dict]] = None, profilers: Optional[List[dict]] = None, run_id: Optional[Union[str, RunIdentifier]] = None, run_name: Optional[str] = None, run_time: Optional[Union[str, datetime.datetime]] = None, result_format: Optional[Union[str, dict]] = None, expectation_suite_ge_cloud_id: Optional[str] = None, ) -> CheckpointResult: assert not (run_id and run_name) and not ( run_id and run_time ), "Please provide either a run_id or run_name and/or run_time." run_time = run_time or datetime.datetime.now() runtime_configuration = runtime_configuration or {} result_format = result_format or runtime_configuration.get( "result_format") batch_request = get_batch_request_as_dict(batch_request=batch_request) validations = get_validations_with_batch_request_as_dict( validations=validations) runtime_kwargs: dict = { "template_name": template_name, "run_name_template": run_name_template, "expectation_suite_name": expectation_suite_name, "batch_request": batch_request or {}, "action_list": action_list or [], "evaluation_parameters": evaluation_parameters or {}, "runtime_configuration": runtime_configuration or {}, "validations": validations or [], "profilers": profilers or [], "expectation_suite_ge_cloud_id": expectation_suite_ge_cloud_id, } substituted_runtime_config: dict = self.get_substituted_config( runtime_kwargs=runtime_kwargs) run_name_template = substituted_runtime_config.get("run_name_template") batch_request = substituted_runtime_config.get("batch_request") validations = substituted_runtime_config.get("validations") or [] if len(validations) == 0 and not batch_request: raise ge_exceptions.CheckpointError( f'Checkpoint "{self.name}" must contain either a batch_request or validations.' ) if run_name is None and run_name_template is not None: run_name = get_datetime_string_from_strftime_format( format_str=run_name_template, datetime_obj=run_time) run_id = run_id or RunIdentifier(run_name=run_name, run_time=run_time) # Use AsyncExecutor to speed up I/O bound validations by running them in parallel with multithreading (if # concurrency is enabled in the data context configuration) -- please see the below arguments used to initialize # AsyncExecutor and the corresponding AsyncExecutor docstring for more details on when multiple threads are # used. with AsyncExecutor(self.data_context.concurrency, max_workers=len(validations)) as async_executor: # noinspection PyUnresolvedReferences async_validation_operator_results: List[ AsyncResult[ValidationOperatorResult]] = [] if len(validations) > 0: for idx, validation_dict in enumerate(validations): self._run_validation( substituted_runtime_config=substituted_runtime_config, async_validation_operator_results= async_validation_operator_results, async_executor=async_executor, result_format=result_format, run_id=run_id, idx=idx, validation_dict=validation_dict, ) else: self._run_validation( substituted_runtime_config=substituted_runtime_config, async_validation_operator_results= async_validation_operator_results, async_executor=async_executor, result_format=result_format, run_id=run_id, ) run_results: dict = {} for async_validation_operator_result in async_validation_operator_results: run_results.update( async_validation_operator_result.result().run_results) return CheckpointResult( run_id=run_id, run_results=run_results, checkpoint_config=self.config, )
def run_with_runtime_args( self, template_name: Optional[str] = None, run_name_template: Optional[str] = None, expectation_suite_name: Optional[str] = None, batch_request: Optional[Union[BatchRequestBase, dict]] = None, action_list: Optional[List[dict]] = None, evaluation_parameters: Optional[dict] = None, runtime_configuration: Optional[dict] = None, validations: Optional[List[dict]] = None, profilers: Optional[List[dict]] = None, run_id: Optional[Union[str, int, float]] = None, run_name: Optional[str] = None, run_time: Optional[datetime.datetime] = None, result_format: Optional[str] = None, expectation_suite_ge_cloud_id: Optional[str] = None, **kwargs, ) -> CheckpointResult: checkpoint_config_from_store: CheckpointConfig = cast( CheckpointConfig, self.get_config()) if ("runtime_configuration" in checkpoint_config_from_store and checkpoint_config_from_store.runtime_configuration and "result_format" in checkpoint_config_from_store.runtime_configuration): result_format = (result_format or checkpoint_config_from_store. runtime_configuration.get("result_format")) if result_format is None: result_format = {"result_format": "SUMMARY"} batch_request = get_batch_request_as_dict(batch_request=batch_request) validations = get_validations_with_batch_request_as_dict( validations=validations) checkpoint_config_from_call_args: dict = { "template_name": template_name, "run_name_template": run_name_template, "expectation_suite_name": expectation_suite_name, "batch_request": batch_request, "action_list": action_list, "evaluation_parameters": evaluation_parameters, "runtime_configuration": runtime_configuration, "validations": validations, "profilers": profilers, "run_id": run_id, "run_name": run_name, "run_time": run_time, "result_format": result_format, "expectation_suite_ge_cloud_id": expectation_suite_ge_cloud_id, } checkpoint_config: dict = { key: value for key, value in checkpoint_config_from_store.items() if key in checkpoint_config_from_call_args } checkpoint_config.update(checkpoint_config_from_call_args) checkpoint_run_arguments: dict = dict(**checkpoint_config, **kwargs) filter_properties_dict( properties=checkpoint_run_arguments, clean_falsy=True, inplace=True, ) return self.run(**checkpoint_run_arguments)
def resolve_config_using_acceptable_arguments( checkpoint: "Checkpoint", # noqa: F821 template_name: Optional[str] = None, run_name_template: Optional[str] = None, expectation_suite_name: Optional[str] = None, batch_request: Optional[Union[BatchRequest, RuntimeBatchRequest, dict]] = None, action_list: Optional[List[dict]] = None, evaluation_parameters: Optional[dict] = None, runtime_configuration: Optional[dict] = None, validations: Optional[List[dict]] = None, profilers: Optional[List[dict]] = None, run_id: Optional[Union[str, RunIdentifier]] = None, run_name: Optional[str] = None, run_time: Optional[Union[str, datetime.datetime]] = None, result_format: Optional[Union[str, dict]] = None, expectation_suite_ge_cloud_id: Optional[str] = None, ) -> dict: """ This method reconciles the Checkpoint configuration (e.g., obtained from the Checkpoint store) with dynamically supplied arguments in order to obtain that Checkpoint specification that is ready for running validation on it. This procedure is necessecitated by the fact that the Checkpoint configuration is hierarchical in its form, which was established for the purposes of making the specification of different Checkpoint capabilities easy. In particular, entities, such as BatchRequest, expectation_suite_name, and action_list, can be specified at the top Checkpoint level with the suitable ovverrides provided at lower levels (e.g., in the validations section). Reconciling and normalizing the Checkpoint configuration is essential for usage statistics, because the exact values of the entities in their formally validated form (e.g., BatchRequest) is the required level of detail. """ assert not (run_id and run_name) and not ( run_id and run_time ), "Please provide either a run_id or run_name and/or run_time." run_time = run_time or datetime.datetime.now() runtime_configuration = runtime_configuration or {} batch_request = get_batch_request_as_dict(batch_request=batch_request) validations = get_validations_with_batch_request_as_dict( validations=validations) runtime_kwargs: dict = { "template_name": template_name, "run_name_template": run_name_template, "expectation_suite_name": expectation_suite_name, "batch_request": batch_request, "action_list": action_list, "evaluation_parameters": evaluation_parameters, "runtime_configuration": runtime_configuration, "validations": validations, "profilers": profilers, "expectation_suite_ge_cloud_id": expectation_suite_ge_cloud_id, } substituted_runtime_config: dict = checkpoint.get_substituted_config( runtime_kwargs=runtime_kwargs) run_name_template = substituted_runtime_config.get("run_name_template") validations = substituted_runtime_config.get("validations") or [] batch_request = substituted_runtime_config.get("batch_request") if len(validations) == 0 and not batch_request: raise ge_exceptions.CheckpointError( f'Checkpoint "{checkpoint.name}" must contain either a batch_request or validations.' ) if run_name is None and run_name_template is not None: run_name = get_datetime_string_from_strftime_format( format_str=run_name_template, datetime_obj=run_time) run_id = run_id or RunIdentifier(run_name=run_name, run_time=run_time) validation_dict: dict for validation_dict in validations: substituted_validation_dict: dict = get_substituted_validation_dict( substituted_runtime_config=substituted_runtime_config, validation_dict=validation_dict, ) validation_batch_request: Union[ BatchRequest, RuntimeBatchRequest] = substituted_validation_dict.get( "batch_request") validation_dict["batch_request"] = validation_batch_request validation_expectation_suite_name: str = substituted_validation_dict.get( "expectation_suite_name") validation_dict[ "expectation_suite_name"] = validation_expectation_suite_name validation_expectation_suite_ge_cloud_id: str = ( substituted_validation_dict.get( "expectation_suite_ge_cloud_id")) validation_dict[ "expectation_suite_ge_cloud_id"] = validation_expectation_suite_ge_cloud_id validation_action_list: list = substituted_validation_dict.get( "action_list") validation_dict["action_list"] = validation_action_list return substituted_runtime_config