def get_substituted_batch_request( substituted_runtime_config: dict, validation_batch_request: Optional[Union[BatchRequestBase, dict]] = None, ) -> Optional[Union[BatchRequest, RuntimeBatchRequest]]: substituted_runtime_batch_request = substituted_runtime_config.get("batch_request") if substituted_runtime_batch_request is None and validation_batch_request is None: return None if substituted_runtime_batch_request is None: substituted_runtime_batch_request = {} if validation_batch_request is None: validation_batch_request = {} validation_batch_request = get_batch_request_as_dict( batch_request=validation_batch_request ) substituted_runtime_batch_request = get_batch_request_as_dict( batch_request=substituted_runtime_batch_request ) for key, value in validation_batch_request.items(): substituted_value = substituted_runtime_batch_request.get(key) if value is not None and substituted_value is not None: raise ge_exceptions.CheckpointError( f'BatchRequest attribute "{key}" was specified in both validation and top-level CheckpointConfig.' ) effective_batch_request: dict = dict( **substituted_runtime_batch_request, **validation_batch_request ) return materialize_batch_request(batch_request=effective_batch_request)
def get_validations_with_batch_request_as_dict( validations: Optional[list] = None, ) -> Optional[list]: if validations: for value in validations: if "batch_request" in value: value["batch_request"] = get_batch_request_as_dict( batch_request=value["batch_request"] ) return validations
def _build_checkpoint_config(self) -> CheckpointConfig: action_list = self._default_action_list() if self.site_names: action_list = self._add_update_data_docs_action(action_list) if self.slack_webhook: action_list = self._add_slack_action(action_list) config_kwargs: dict = self.other_kwargs or {} # DataFrames shouldn't be saved to CheckpointStore batch_request = config_kwargs.get("batch_request") if batch_request_contains_batch_data(batch_request=batch_request): config_kwargs.pop("batch_request", None) else: config_kwargs["batch_request"] = get_batch_request_as_dict( batch_request=batch_request ) # DataFrames shouldn't be saved to CheckpointStore validations = config_kwargs.get("validations") if batch_request_in_validations_contains_batch_data(validations=validations): config_kwargs.pop("validations", []) else: config_kwargs["validations"] = get_validations_with_batch_request_as_dict( validations=validations ) specific_config_kwargs_overrides: dict = { "config_version": 1.0, "name": self.name, "class_name": "Checkpoint", "action_list": action_list, "ge_cloud_id": self.other_kwargs.pop("ge_cloud_id", None), } config_kwargs.update(specific_config_kwargs_overrides) # Roundtrip through schema validation to remove any illegal fields add/or restore any missing fields. checkpoint_config: dict = checkpointConfigSchema.load( CommentedMap(**config_kwargs) ) config_kwargs = checkpointConfigSchema.dump(checkpoint_config) logger.debug( f"SimpleCheckpointConfigurator built this CheckpointConfig:" f"{checkpoint_config}" ) return CheckpointConfig(**config_kwargs)
def _generate_rule_overrides_from_batch_request( self, batch_request: Union[dict, BatchRequest, RuntimeBatchRequest] ) -> Dict[str, Dict[str, Any]]: """Iterates through the profiler's builder attributes and generates a set of Rules that contain overrides from the input batch request. This only applies to ParameterBuilder and any DomainBuilder with a COLUMN MetricDomainType. Note that we are passing ALL batches to the parameter builder. If not used carefully, a bias may creep in to the resulting estimates computed by these objects. Users of this override should be aware that a batch request should either have no notion of "current/active" batch or it is excluded. Args: batch_request: Data used to override builder attributes Returns: The dictionary representation of the Rules used as runtime arguments to `run()` """ rules: List[Rule] = self.rules if not isinstance(batch_request, dict): batch_request = get_batch_request_as_dict(batch_request) logger.info("Converted batch request to dictionary: %s", batch_request) resulting_rules: Dict[str, Dict[str, Any]] = {} for rule in rules: domain_builder = rule.domain_builder if domain_builder.domain_type == MetricDomainTypes.COLUMN: domain_builder.batch_request = batch_request domain_builder.batch_request["data_connector_query"] = { "index": -1 } parameter_builders = rule.parameter_builders if parameter_builders: for parameter_builder in parameter_builders: parameter_builder.batch_request = batch_request resulting_rules[rule.name] = rule.to_dict() return resulting_rules
def substitute_runtime_config(source_config: dict, runtime_kwargs: dict) -> dict: if not (runtime_kwargs and any(runtime_kwargs.values())): return source_config dest_config: dict = copy.deepcopy(source_config) # replace if runtime_kwargs.get("template_name") is not None: dest_config["template_name"] = runtime_kwargs["template_name"] if runtime_kwargs.get("run_name_template") is not None: dest_config["run_name_template"] = runtime_kwargs["run_name_template"] if runtime_kwargs.get("expectation_suite_name") is not None: dest_config["expectation_suite_name"] = runtime_kwargs["expectation_suite_name"] if runtime_kwargs.get("expectation_suite_ge_cloud_id") is not None: dest_config["expectation_suite_ge_cloud_id"] = runtime_kwargs[ "expectation_suite_ge_cloud_id" ] # update if runtime_kwargs.get("batch_request") is not None: batch_request = dest_config.get("batch_request") or {} batch_request_from_runtime_kwargs = runtime_kwargs["batch_request"] batch_request_from_runtime_kwargs = get_batch_request_as_dict( batch_request=batch_request_from_runtime_kwargs ) updated_batch_request = nested_update( batch_request, batch_request_from_runtime_kwargs, dedup=True, ) dest_config["batch_request"] = updated_batch_request if runtime_kwargs.get("action_list") is not None: action_list = dest_config.get("action_list") or [] dest_config["action_list"] = get_updated_action_list( base_action_list=action_list, other_action_list=runtime_kwargs["action_list"], ) if runtime_kwargs.get("evaluation_parameters") is not None: evaluation_parameters = dest_config.get("evaluation_parameters") or {} updated_evaluation_parameters = nested_update( evaluation_parameters, runtime_kwargs["evaluation_parameters"], dedup=True, ) dest_config["evaluation_parameters"] = updated_evaluation_parameters if runtime_kwargs.get("runtime_configuration") is not None: runtime_configuration = dest_config.get("runtime_configuration") or {} updated_runtime_configuration = nested_update( runtime_configuration, runtime_kwargs["runtime_configuration"], dedup=True, ) dest_config["runtime_configuration"] = updated_runtime_configuration if runtime_kwargs.get("validations") is not None: validations = dest_config.get("validations") or [] existing_validations = source_config.get("validations") or [] validations.extend( filter( lambda v: v not in existing_validations, runtime_kwargs["validations"], ) ) dest_config["validations"] = validations if runtime_kwargs.get("profilers") is not None: profilers = dest_config.get("profilers") or [] existing_profilers = source_config.get("profilers") or [] profilers.extend( filter(lambda v: v not in existing_profilers, runtime_kwargs["profilers"]) ) dest_config["profilers"] = profilers return dest_config
def add_checkpoint( data_context: "DataContext", # noqa: F821 checkpoint_store: CheckpointStore, checkpoint_store_name: str, ge_cloud_mode: bool, name: str, config_version: Optional[Union[int, float]] = None, template_name: Optional[str] = None, module_name: Optional[str] = None, class_name: Optional[str] = None, run_name_template: Optional[str] = None, expectation_suite_name: Optional[str] = None, batch_request: Optional[dict] = None, action_list: Optional[List[dict]] = None, evaluation_parameters: Optional[dict] = None, runtime_configuration: Optional[dict] = None, validations: Optional[List[dict]] = None, profilers: Optional[List[dict]] = None, # Next two fields are for LegacyCheckpoint configuration validation_operator_name: Optional[str] = None, batches: Optional[List[dict]] = None, # the following four arguments are used by SimpleCheckpoint site_names: Optional[Union[str, List[str]]] = None, slack_webhook: Optional[str] = None, notify_on: Optional[str] = None, notify_with: Optional[Union[str, List[str]]] = None, ge_cloud_id: Optional[str] = None, expectation_suite_ge_cloud_id: Optional[str] = None, ) -> Union[Checkpoint, LegacyCheckpoint]: checkpoint_config: Union[CheckpointConfig, dict] # These checks protect against typed objects (BatchRequest and/or RuntimeBatchRequest) encountered in arguments. batch_request = get_batch_request_as_dict(batch_request=batch_request) validations = get_validations_with_batch_request_as_dict(validations=validations) # DataFrames shouldn't be saved to CheckpointStore if batch_request_contains_batch_data(batch_request=batch_request): raise ge_exceptions.InvalidConfigError( f'batch_data found in batch_request cannot be saved to CheckpointStore "{checkpoint_store_name}"' ) if batch_request_in_validations_contains_batch_data(validations=validations): raise ge_exceptions.InvalidConfigError( f'batch_data found in validations cannot be saved to CheckpointStore "{checkpoint_store_name}"' ) checkpoint_config = { "name": name, "config_version": config_version, "template_name": template_name, "module_name": module_name, "class_name": class_name, "run_name_template": run_name_template, "expectation_suite_name": expectation_suite_name, "batch_request": batch_request, "action_list": action_list, "evaluation_parameters": evaluation_parameters, "runtime_configuration": runtime_configuration, "validations": validations, "profilers": profilers, # Next two fields are for LegacyCheckpoint configuration "validation_operator_name": validation_operator_name, "batches": batches, # the following four keys are used by SimpleCheckpoint "site_names": site_names, "slack_webhook": slack_webhook, "notify_on": notify_on, "notify_with": notify_with, "ge_cloud_id": ge_cloud_id, "expectation_suite_ge_cloud_id": expectation_suite_ge_cloud_id, } checkpoint_config = deep_filter_properties_iterable( properties=checkpoint_config, clean_falsy=True, ) new_checkpoint: Union[ Checkpoint, SimpleCheckpoint, LegacyCheckpoint ] = instantiate_class_from_config( config=checkpoint_config, runtime_environment={ "data_context": data_context, }, config_defaults={ "module_name": "great_expectations.checkpoint", }, ) if ge_cloud_mode: key: GeCloudIdentifier = GeCloudIdentifier( resource_type="contract", ge_cloud_id=ge_cloud_id ) else: key: ConfigurationIdentifier = ConfigurationIdentifier( configuration_key=name, ) checkpoint_config = new_checkpoint.get_config() checkpoint_ref = checkpoint_store.set(key=key, value=checkpoint_config) if isinstance(checkpoint_ref, GeCloudIdAwareRef): ge_cloud_id = checkpoint_ref.ge_cloud_id new_checkpoint.ge_cloud_id = uuid.UUID(ge_cloud_id) return new_checkpoint
def run_checkpoint( data_context: "DataContext", # noqa: F821 checkpoint_store: CheckpointStore, checkpoint_name: Optional[str] = None, template_name: Optional[str] = None, run_name_template: Optional[str] = None, expectation_suite_name: Optional[str] = None, batch_request: Optional[Union[BatchRequest, RuntimeBatchRequest, dict]] = None, action_list: Optional[List[dict]] = None, evaluation_parameters: Optional[dict] = None, runtime_configuration: Optional[dict] = None, validations: Optional[List[dict]] = None, profilers: Optional[List[dict]] = None, run_id: Optional[Union[str, int, float]] = None, run_name: Optional[str] = None, run_time: Optional[datetime.datetime] = None, result_format: Optional[str] = None, ge_cloud_id: Optional[str] = None, expectation_suite_ge_cloud_id: Optional[str] = None, **kwargs, ) -> CheckpointResult: """ Validate against a pre-defined Checkpoint. (Experimental) Args: data_context: DataContext for Checkpoint class instantiation purposes checkpoint_store: CheckpointStore for managing Checkpoint configurations checkpoint_name: The name of a Checkpoint defined via the CLI or by manually creating a yml file template_name: The name of a Checkpoint template to retrieve from the CheckpointStore run_name_template: The template to use for run_name expectation_suite_name: Expectation suite to be used by Checkpoint run batch_request: Batch request to be used by Checkpoint run action_list: List of actions to be performed by the Checkpoint evaluation_parameters: $parameter_name syntax references to be evaluated at runtime runtime_configuration: Runtime configuration override parameters validations: Validations to be performed by the Checkpoint run profilers: Profilers to be used by the Checkpoint run run_id: The run_id for the validation; if None, a default value will be used run_name: The run_name for the validation; if None, a default value will be used run_time: The date/time of the run result_format: One of several supported formatting directives for expectation validation results ge_cloud_id: Great Expectations Cloud id for the checkpoint expectation_suite_ge_cloud_id: Great Expectations Cloud id for the expectation suite **kwargs: Additional kwargs to pass to the validation operator Returns: CheckpointResult """ checkpoint: Union[Checkpoint, SimpleCheckpoint, LegacyCheckpoint] = get_checkpoint( data_context=data_context, checkpoint_store=checkpoint_store, name=checkpoint_name, ge_cloud_id=ge_cloud_id, ) checkpoint_config_from_store: CheckpointConfig = checkpoint.get_config() if ( "runtime_configuration" in checkpoint_config_from_store and checkpoint_config_from_store.runtime_configuration and "result_format" in checkpoint_config_from_store.runtime_configuration ): result_format = ( result_format or checkpoint_config_from_store.runtime_configuration.get("result_format") ) if result_format is None: result_format = {"result_format": "SUMMARY"} batch_request = get_batch_request_as_dict(batch_request=batch_request) validations = get_validations_with_batch_request_as_dict(validations=validations) checkpoint_config_from_call_args: dict = { "template_name": template_name, "run_name_template": run_name_template, "expectation_suite_name": expectation_suite_name, "batch_request": batch_request, "action_list": action_list, "evaluation_parameters": evaluation_parameters, "runtime_configuration": runtime_configuration, "validations": validations, "profilers": profilers, "run_id": run_id, "run_name": run_name, "run_time": run_time, "result_format": result_format, "expectation_suite_ge_cloud_id": expectation_suite_ge_cloud_id, } checkpoint_config: dict = { key: value for key, value in checkpoint_config_from_store.items() if key in checkpoint_config_from_call_args } checkpoint_config.update(checkpoint_config_from_call_args) checkpoint_run_arguments: dict = dict(**checkpoint_config, **kwargs) filter_properties_dict( properties=checkpoint_run_arguments, clean_falsy=True, inplace=True, ) return checkpoint.run(**checkpoint_run_arguments)
def _anonymize_checkpoint_run(self, obj: object, **kwargs) -> dict: """ Traverse the entire Checkpoint configuration structure (as per its formal, validated Marshmallow schema) and anonymize every field that can be customized by a user (public fields are recorded as their original names). """ attribute_name: str attribute_value: Optional[Union[str, dict]] validation_obj: dict checkpoint_optional_top_level_keys: List[str] = [] name: Optional[str] = kwargs.get("name") anonymized_name: Optional[str] = self._anonymize_string(name) config_version: Optional[Union[Number, str]] = kwargs.get("config_version") if config_version is None: config_version = 1.0 template_name: Optional[str] = kwargs.get("template_name") anonymized_template_name: Optional[str] = self._anonymize_string( template_name) run_name_template: Optional[str] = kwargs.get("run_name_template") anonymized_run_name_template: Optional[str] = self._anonymize_string( run_name_template) expectation_suite_name: Optional[str] = kwargs.get( "expectation_suite_name") anonymized_expectation_suite_name: Optional[ str] = self._anonymize_string(expectation_suite_name) batch_request: Optional[Union[BatchRequest, RuntimeBatchRequest, dict]] = kwargs.get("batch_request") if batch_request is None: batch_request = {} anonymized_batch_request: Optional[Dict[ str, List[str]]] = self._aggregate_anonymizer.anonymize( *(), **batch_request) action_list: Optional[List[dict]] = kwargs.get("action_list") anonymized_action_list: Optional[List[dict]] = None if action_list: # noinspection PyBroadException try: anonymized_action_list = [ self._aggregate_anonymizer.anonymize( action_name=action_config_dict["name"], action_config=action_config_dict["action"], ) for action_config_dict in action_list ] except Exception: logger.debug( "anonymize_checkpoint_run: Unable to create anonymized_action_list payload field" ) validations: Optional[List[dict]] = kwargs.get("validations") anonymized_validations: Optional[List[dict]] = [] if validations: for validation_obj in validations: validation_batch_request: Optional[ Union[BatchRequest, RuntimeBatchRequest, dict]] = validation_obj.get("batch_request") if validation_batch_request is None: validation_batch_request = {} validation_batch_request = get_batch_request_as_dict( batch_request=validation_batch_request) anonymized_validation_batch_request: Optional[Optional[Dict[ str, List[str]]]] = self._aggregate_anonymizer.anonymize( *(), **validation_batch_request) validation_expectation_suite_name: Optional[ str] = validation_obj.get("expectation_suite_name") anonymized_validation_expectation_suite_name: Optional[ str] = self._anonymize_string( validation_expectation_suite_name) validation_action_list: Optional[ List[dict]] = validation_obj.get("action_list") anonymized_validation_action_list: Optional[List[dict]] = None if validation_action_list: # noinspection PyBroadException try: anonymized_validation_action_list = [ self._aggregate_anonymizer.anonymize( action_name=action_config_dict["name"], action_config=action_config_dict["action"], ) for action_config_dict in validation_action_list ] except Exception: logger.debug( "anonymize_checkpoint_run: Unable to create anonymized_validation_action_list payload field" ) anonymized_validation: Dict[str, Union[str, Dict[str, Any], List[Dict[str, Any]]]] = {} if anonymized_validation_batch_request: anonymized_validation[ "anonymized_batch_request"] = anonymized_validation_batch_request if anonymized_validation_expectation_suite_name: anonymized_validation[ "anonymized_expectation_suite_name"] = anonymized_validation_expectation_suite_name if anonymized_validation_action_list: anonymized_validation[ "anonymized_action_list"] = anonymized_validation_action_list anonymized_validation: Dict[str, Dict[str, Any]] = { "anonymized_batch_request": anonymized_validation_batch_request, "anonymized_expectation_suite_name": anonymized_validation_expectation_suite_name, "anonymized_action_list": anonymized_validation_action_list, } anonymized_validations.append(anonymized_validation) run_id: Optional[Union[str, RunIdentifier]] = kwargs.get("run_id") anonymized_run_id: Optional[Union[str, RunIdentifier]] if run_id is None: anonymized_run_id = None else: anonymized_run_id = self._anonymize_string(str(run_id)) run_name: Optional[str] = kwargs.get("run_name") anonymized_run_name: Optional[str] if run_name is None: anonymized_run_name = None else: anonymized_run_name = self._anonymize_string(run_name) run_time: Optional[Union[str, datetime.datetime]] = kwargs.get("run_time") anonymized_run_time: Optional[str] if run_time is None: anonymized_run_time = None else: anonymized_run_time = self._anonymize_string(str(run_time)) expectation_suite_ge_cloud_id: Optional[str] = kwargs.get( "expectation_suite_ge_cloud_id") anonymized_expectation_suite_ge_cloud_id: Optional[str] if expectation_suite_ge_cloud_id is None: anonymized_expectation_suite_ge_cloud_id = None else: anonymized_expectation_suite_ge_cloud_id = self._anonymize_string( str(expectation_suite_ge_cloud_id)) for attribute_name in sorted(CHECKPOINT_OPTIONAL_TOP_LEVEL_KEYS): attribute_value = kwargs.get(attribute_name) if attribute_value: checkpoint_optional_top_level_keys.append(attribute_name) anonymized_checkpoint_run_properties_dict: Dict[str, List[str]] = { "anonymized_name": anonymized_name, "config_version": config_version, "anonymized_template_name": anonymized_template_name, "anonymized_run_name_template": anonymized_run_name_template, "anonymized_expectation_suite_name": anonymized_expectation_suite_name, "anonymized_batch_request": anonymized_batch_request, "anonymized_action_list": anonymized_action_list, "anonymized_validations": anonymized_validations, "anonymized_run_id": anonymized_run_id, "anonymized_run_name": anonymized_run_name, "anonymized_run_time": anonymized_run_time, "anonymized_expectation_suite_ge_cloud_id": anonymized_expectation_suite_ge_cloud_id, "checkpoint_optional_top_level_keys": checkpoint_optional_top_level_keys, } deep_filter_properties_iterable( properties=anonymized_checkpoint_run_properties_dict, clean_falsy=True, inplace=True, ) return anonymized_checkpoint_run_properties_dict
def run( self, template_name: Optional[str] = None, run_name_template: Optional[str] = None, expectation_suite_name: Optional[str] = None, batch_request: Optional[Union[BatchRequestBase, dict]] = None, action_list: Optional[List[dict]] = None, evaluation_parameters: Optional[dict] = None, runtime_configuration: Optional[dict] = None, validations: Optional[List[dict]] = None, profilers: Optional[List[dict]] = None, run_id: Optional[Union[str, RunIdentifier]] = None, run_name: Optional[str] = None, run_time: Optional[Union[str, datetime.datetime]] = None, result_format: Optional[Union[str, dict]] = None, expectation_suite_ge_cloud_id: Optional[str] = None, ) -> CheckpointResult: assert not (run_id and run_name) and not ( run_id and run_time ), "Please provide either a run_id or run_name and/or run_time." run_time = run_time or datetime.datetime.now() runtime_configuration = runtime_configuration or {} result_format = result_format or runtime_configuration.get( "result_format") batch_request = get_batch_request_as_dict(batch_request=batch_request) validations = get_validations_with_batch_request_as_dict( validations=validations) runtime_kwargs: dict = { "template_name": template_name, "run_name_template": run_name_template, "expectation_suite_name": expectation_suite_name, "batch_request": batch_request or {}, "action_list": action_list or [], "evaluation_parameters": evaluation_parameters or {}, "runtime_configuration": runtime_configuration or {}, "validations": validations or [], "profilers": profilers or [], "expectation_suite_ge_cloud_id": expectation_suite_ge_cloud_id, } substituted_runtime_config: dict = self.get_substituted_config( runtime_kwargs=runtime_kwargs) run_name_template = substituted_runtime_config.get("run_name_template") batch_request = substituted_runtime_config.get("batch_request") validations = substituted_runtime_config.get("validations") or [] if len(validations) == 0 and not batch_request: raise ge_exceptions.CheckpointError( f'Checkpoint "{self.name}" must contain either a batch_request or validations.' ) if run_name is None and run_name_template is not None: run_name = get_datetime_string_from_strftime_format( format_str=run_name_template, datetime_obj=run_time) run_id = run_id or RunIdentifier(run_name=run_name, run_time=run_time) # Use AsyncExecutor to speed up I/O bound validations by running them in parallel with multithreading (if # concurrency is enabled in the data context configuration) -- please see the below arguments used to initialize # AsyncExecutor and the corresponding AsyncExecutor docstring for more details on when multiple threads are # used. with AsyncExecutor(self.data_context.concurrency, max_workers=len(validations)) as async_executor: # noinspection PyUnresolvedReferences async_validation_operator_results: List[ AsyncResult[ValidationOperatorResult]] = [] if len(validations) > 0: for idx, validation_dict in enumerate(validations): self._run_validation( substituted_runtime_config=substituted_runtime_config, async_validation_operator_results= async_validation_operator_results, async_executor=async_executor, result_format=result_format, run_id=run_id, idx=idx, validation_dict=validation_dict, ) else: self._run_validation( substituted_runtime_config=substituted_runtime_config, async_validation_operator_results= async_validation_operator_results, async_executor=async_executor, result_format=result_format, run_id=run_id, ) run_results: dict = {} for async_validation_operator_result in async_validation_operator_results: run_results.update( async_validation_operator_result.result().run_results) return CheckpointResult( run_id=run_id, run_results=run_results, checkpoint_config=self.config, )
def run_with_runtime_args( self, template_name: Optional[str] = None, run_name_template: Optional[str] = None, expectation_suite_name: Optional[str] = None, batch_request: Optional[Union[BatchRequestBase, dict]] = None, action_list: Optional[List[dict]] = None, evaluation_parameters: Optional[dict] = None, runtime_configuration: Optional[dict] = None, validations: Optional[List[dict]] = None, profilers: Optional[List[dict]] = None, run_id: Optional[Union[str, int, float]] = None, run_name: Optional[str] = None, run_time: Optional[datetime.datetime] = None, result_format: Optional[str] = None, expectation_suite_ge_cloud_id: Optional[str] = None, **kwargs, ) -> CheckpointResult: checkpoint_config_from_store: CheckpointConfig = cast( CheckpointConfig, self.get_config()) if ("runtime_configuration" in checkpoint_config_from_store and checkpoint_config_from_store.runtime_configuration and "result_format" in checkpoint_config_from_store.runtime_configuration): result_format = (result_format or checkpoint_config_from_store. runtime_configuration.get("result_format")) if result_format is None: result_format = {"result_format": "SUMMARY"} batch_request = get_batch_request_as_dict(batch_request=batch_request) validations = get_validations_with_batch_request_as_dict( validations=validations) checkpoint_config_from_call_args: dict = { "template_name": template_name, "run_name_template": run_name_template, "expectation_suite_name": expectation_suite_name, "batch_request": batch_request, "action_list": action_list, "evaluation_parameters": evaluation_parameters, "runtime_configuration": runtime_configuration, "validations": validations, "profilers": profilers, "run_id": run_id, "run_name": run_name, "run_time": run_time, "result_format": result_format, "expectation_suite_ge_cloud_id": expectation_suite_ge_cloud_id, } checkpoint_config: dict = { key: value for key, value in checkpoint_config_from_store.items() if key in checkpoint_config_from_call_args } checkpoint_config.update(checkpoint_config_from_call_args) checkpoint_run_arguments: dict = dict(**checkpoint_config, **kwargs) filter_properties_dict( properties=checkpoint_run_arguments, clean_falsy=True, inplace=True, ) return self.run(**checkpoint_run_arguments)
def resolve_config_using_acceptable_arguments( checkpoint: "Checkpoint", # noqa: F821 template_name: Optional[str] = None, run_name_template: Optional[str] = None, expectation_suite_name: Optional[str] = None, batch_request: Optional[Union[BatchRequest, RuntimeBatchRequest, dict]] = None, action_list: Optional[List[dict]] = None, evaluation_parameters: Optional[dict] = None, runtime_configuration: Optional[dict] = None, validations: Optional[List[dict]] = None, profilers: Optional[List[dict]] = None, run_id: Optional[Union[str, RunIdentifier]] = None, run_name: Optional[str] = None, run_time: Optional[Union[str, datetime.datetime]] = None, result_format: Optional[Union[str, dict]] = None, expectation_suite_ge_cloud_id: Optional[str] = None, ) -> dict: """ This method reconciles the Checkpoint configuration (e.g., obtained from the Checkpoint store) with dynamically supplied arguments in order to obtain that Checkpoint specification that is ready for running validation on it. This procedure is necessecitated by the fact that the Checkpoint configuration is hierarchical in its form, which was established for the purposes of making the specification of different Checkpoint capabilities easy. In particular, entities, such as BatchRequest, expectation_suite_name, and action_list, can be specified at the top Checkpoint level with the suitable ovverrides provided at lower levels (e.g., in the validations section). Reconciling and normalizing the Checkpoint configuration is essential for usage statistics, because the exact values of the entities in their formally validated form (e.g., BatchRequest) is the required level of detail. """ assert not (run_id and run_name) and not ( run_id and run_time ), "Please provide either a run_id or run_name and/or run_time." run_time = run_time or datetime.datetime.now() runtime_configuration = runtime_configuration or {} batch_request = get_batch_request_as_dict(batch_request=batch_request) validations = get_validations_with_batch_request_as_dict( validations=validations) runtime_kwargs: dict = { "template_name": template_name, "run_name_template": run_name_template, "expectation_suite_name": expectation_suite_name, "batch_request": batch_request, "action_list": action_list, "evaluation_parameters": evaluation_parameters, "runtime_configuration": runtime_configuration, "validations": validations, "profilers": profilers, "expectation_suite_ge_cloud_id": expectation_suite_ge_cloud_id, } substituted_runtime_config: dict = checkpoint.get_substituted_config( runtime_kwargs=runtime_kwargs) run_name_template = substituted_runtime_config.get("run_name_template") validations = substituted_runtime_config.get("validations") or [] batch_request = substituted_runtime_config.get("batch_request") if len(validations) == 0 and not batch_request: raise ge_exceptions.CheckpointError( f'Checkpoint "{checkpoint.name}" must contain either a batch_request or validations.' ) if run_name is None and run_name_template is not None: run_name = get_datetime_string_from_strftime_format( format_str=run_name_template, datetime_obj=run_time) run_id = run_id or RunIdentifier(run_name=run_name, run_time=run_time) validation_dict: dict for validation_dict in validations: substituted_validation_dict: dict = get_substituted_validation_dict( substituted_runtime_config=substituted_runtime_config, validation_dict=validation_dict, ) validation_batch_request: Union[ BatchRequest, RuntimeBatchRequest] = substituted_validation_dict.get( "batch_request") validation_dict["batch_request"] = validation_batch_request validation_expectation_suite_name: str = substituted_validation_dict.get( "expectation_suite_name") validation_dict[ "expectation_suite_name"] = validation_expectation_suite_name validation_expectation_suite_ge_cloud_id: str = ( substituted_validation_dict.get( "expectation_suite_ge_cloud_id")) validation_dict[ "expectation_suite_ge_cloud_id"] = validation_expectation_suite_ge_cloud_id validation_action_list: list = substituted_validation_dict.get( "action_list") validation_dict["action_list"] = validation_action_list return substituted_runtime_config
def batch_request(self, value: Optional[Union[BatchRequestBase, dict]]) -> None: if not (value is None or isinstance(value, dict)): value = get_batch_request_as_dict(batch_request=value) self._batch_request = value