def _build_checkpoint_config(self) -> CheckpointConfig: action_list = self._default_action_list() if self.site_names: action_list = self._add_update_data_docs_action(action_list) if self.slack_webhook: action_list = self._add_slack_action(action_list) checkpoint_config = CheckpointConfig( **{ "config_version": 1.0, "name": self.name, "class_name": "Checkpoint", "action_list": action_list, } ) if self.other_kwargs: checkpoint_config.update( other_config=CheckpointConfig( **{ "config_version": self.other_kwargs.pop("config_version", 1.0) or 1.0, **self.other_kwargs, } ) ) logger.debug( f"SimpleCheckpointConfigurator built this CheckpointConfig:" f" {json.dumps(checkpoint_config.to_json_dict(), indent=4)}" ) return checkpoint_config
def instantiate_from_config_with_runtime_args( checkpoint_config: CheckpointConfig, data_context: "DataContext", # noqa: F821 **runtime_kwargs, ) -> "Checkpoint": config: dict = checkpoint_config.to_json_dict() key: str value: Any for key, value in runtime_kwargs.items(): if value is not None: config[key] = value config = filter_properties_dict(properties=config, clean_falsy=True) checkpoint: Checkpoint = instantiate_class_from_config( config=config, runtime_environment={ "data_context": data_context, }, config_defaults={ "module_name": "great_expectations.checkpoint", }, ) return checkpoint
def get_substituted_validation_dict( substituted_runtime_config: CheckpointConfig, validation_dict: dict) -> dict: substituted_validation_dict = { "batch_request": get_runtime_batch_request( substituted_runtime_config=substituted_runtime_config, validation_batch_request=validation_dict.get("batch_request"), ), "expectation_suite_name": validation_dict.get("expectation_suite_name") or substituted_runtime_config.expectation_suite_name, "action_list": CheckpointConfig.get_updated_action_list( base_action_list=substituted_runtime_config.action_list, other_action_list=validation_dict.get("action_list", {}), ), "evaluation_parameters": nested_update( substituted_runtime_config.evaluation_parameters, validation_dict.get("evaluation_parameters", {}), ), "runtime_configuration": nested_update( substituted_runtime_config.runtime_configuration, validation_dict.get("runtime_configuration", {}), ), } if validation_dict.get("name") is not None: substituted_validation_dict["name"] = validation_dict["name"] validate_validation_dict(substituted_validation_dict) return substituted_validation_dict
def in_memory_checkpoint_config(): checkpoint_config = CheckpointConfig( **{ "name": "taxi.pass.from_config", "config_version": 1.0, "template_name": None, "module_name": "great_expectations.checkpoint", "class_name": "Checkpoint", "run_name_template": "%Y%m%d-%H%M%S-my-run-name-template", "expectation_suite_name": "taxi.demo", "batch_request": None, "action_list": [ { "name": "store_validation_result", "action": { "class_name": "StoreValidationResultAction" }, }, { "name": "store_evaluation_params", "action": { "class_name": "StoreEvaluationParametersAction" }, }, { "name": "update_data_docs", "action": { "class_name": "UpdateDataDocsAction", "site_names": [] }, }, ], "evaluation_parameters": {}, "runtime_configuration": {}, "validations": [{ "batch_request": { "datasource_name": "my_datasource", "data_connector_name": "default_inferred_data_connector_name", "data_asset_name": "yellow_tripdata_sample_2019-01.csv", "data_connector_query": { "index": -1 }, }, }], "profilers": [], "ge_cloud_id": None, "expectation_suite_ge_cloud_id": None, }) return checkpoint_config
def test_resolve_config_using_acceptable_arguments(checkpoint): df = pd.DataFrame({"a": [1, 2], "b": [3, 4]}) batch_request_param = { "runtime_parameters": { "batch_data": df }, "batch_identifiers": { "default_identifier_name": "my_simple_df" }, } result_format_param = {"result_format": "SUMMARY"} kwargs = { "batch_request": batch_request_param, "result_format": result_format_param, } # Matching how this is called in usage_statistics.py (parameter style) resolved_runtime_kwargs: dict = ( CheckpointConfig.resolve_config_using_acceptable_arguments( *(checkpoint, ), **kwargs)) # Assertions about important bits of the substituted_runtime_config expected_top_level_batch_request = { "runtime_parameters": { "batch_data": df }, "batch_identifiers": { "default_identifier_name": "my_simple_df" }, } expected_top_level_batch_request = convert_to_json_serializable( data=expected_top_level_batch_request) actual_top_level_batch_request = convert_to_json_serializable( data=resolved_runtime_kwargs["batch_request"]) assert actual_top_level_batch_request == expected_top_level_batch_request validation_level_batch_request = resolved_runtime_kwargs["validations"][0][ "batch_request"] assert validation_level_batch_request == RuntimeBatchRequest( **{ "datasource_name": "example_datasource", "data_connector_name": "default_runtime_data_connector_name", "data_asset_name": "my_data_asset", "batch_identifiers": { "default_identifier_name": "my_simple_df" }, "runtime_parameters": { "batch_data": df }, }) assert (resolved_runtime_kwargs["validations"][0]["expectation_suite_name"] == "test_suite")
def get_substituted_config( self, config: Optional[Union[CheckpointConfig, dict]] = None, runtime_kwargs: Optional[dict] = None, ) -> CheckpointConfig: runtime_kwargs = runtime_kwargs or {} if config is None: config = self.config if isinstance(config, dict): config = CheckpointConfig(**config) substituted_config: Union[CheckpointConfig, dict] if ( self._substituted_config is not None and not runtime_kwargs.get("template_name") and not config.template_name ): substituted_config = deepcopy(self._substituted_config) if any(runtime_kwargs.values()): substituted_config.update(runtime_kwargs=runtime_kwargs) else: template_name = runtime_kwargs.get("template_name") or config.template_name if not template_name: substituted_config = copy.deepcopy(config) if any(runtime_kwargs.values()): substituted_config.update(runtime_kwargs=runtime_kwargs) self._substituted_config = substituted_config else: checkpoint = self.data_context.get_checkpoint(name=template_name) template_config = checkpoint.config if template_config.config_version != config.config_version: raise ge_exceptions.CheckpointError( f"Invalid template '{template_name}' (ver. {template_config.config_version}) for Checkpoint " f"'{config}' (ver. {config.config_version}. Checkpoints can only use templates with the same config_version." ) if template_config.template_name is not None: substituted_config = self.get_substituted_config( config=template_config ) else: substituted_config = template_config # merge template with config substituted_config.update( other_config=config, runtime_kwargs=runtime_kwargs ) # don't replace _substituted_config if already exists if self._substituted_config is None: self._substituted_config = substituted_config return self._substitute_config_variables(config=substituted_config)
def __init__( self, name: str, data_context: "DataContext", # noqa: F821 config_version: Optional[Union[int, float]] = None, template_name: Optional[str] = None, run_name_template: Optional[str] = None, expectation_suite_name: Optional[str] = None, batch_request: Optional[Union[BatchRequest, RuntimeBatchRequest, dict]] = None, action_list: Optional[List[dict]] = None, evaluation_parameters: Optional[dict] = None, runtime_configuration: Optional[dict] = None, validations: Optional[List[dict]] = None, profilers: Optional[List[dict]] = None, validation_operator_name: Optional[str] = None, batches: Optional[List[dict]] = None, ge_cloud_id: Optional[UUID] = None, expectation_suite_ge_cloud_id: Optional[UUID] = None, ): # Only primitive types are allowed as constructor arguments; data frames are supplied to "run()" as arguments. if batch_request_contains_batch_data(batch_request=batch_request): raise ValueError( f"""Error: batch_data found in batch_request -- only primitive types are allowed as Checkpoint \ constructor arguments. """) if batch_request_in_validations_contains_batch_data( validations=validations): raise ValueError( f"""Error: batch_data found in batch_request -- only primitive types are allowed as Checkpoint \ constructor arguments. """) checkpoint_config: CheckpointConfig = CheckpointConfig( name=name, config_version=config_version, template_name=template_name, run_name_template=run_name_template, expectation_suite_name=expectation_suite_name, batch_request=batch_request, action_list=action_list, evaluation_parameters=evaluation_parameters, runtime_configuration=runtime_configuration, validations=validations, profilers=profilers, validation_operator_name=validation_operator_name, batches=batches, ge_cloud_id=ge_cloud_id, expectation_suite_ge_cloud_id=expectation_suite_ge_cloud_id, ) super().__init__( checkpoint_config=checkpoint_config, data_context=data_context, )
def __init__( self, name: str, data_context, config_version: Optional[Union[int, float]] = None, template_name: Optional[str] = None, module_name: Optional[str] = None, class_name: Optional[str] = None, run_name_template: Optional[str] = None, expectation_suite_name: Optional[str] = None, batch_request: Optional[Union[BatchRequest, dict]] = None, action_list: Optional[List[dict]] = None, evaluation_parameters: Optional[dict] = None, runtime_configuration: Optional[dict] = None, validations: Optional[List[dict]] = None, profilers: Optional[List[dict]] = None, validation_operator_name: Optional[str] = None, batches: Optional[List[dict]] = None, ge_cloud_id: Optional[UUID] = None, ): self._name = name # Note the gross typechecking to avoid a circular import if "DataContext" not in str(type(data_context)): raise TypeError("A Checkpoint requires a valid DataContext") self._data_context = data_context checkpoint_config: CheckpointConfig = CheckpointConfig( **{ "name": name, "config_version": config_version, "template_name": template_name, "module_name": module_name, "class_name": class_name, "run_name_template": run_name_template, "expectation_suite_name": expectation_suite_name, "batch_request": batch_request, "action_list": action_list, "evaluation_parameters": evaluation_parameters, "runtime_configuration": runtime_configuration, "validations": validations, "profilers": profilers, "ge_cloud_id": ge_cloud_id, # Next two fields are for LegacyCheckpoint configuration "validation_operator_name": validation_operator_name, "batches": batches, } ) self._config = checkpoint_config self._substituted_config = None
def _build_checkpoint_config(self) -> CheckpointConfig: action_list = self._default_action_list() if self.site_names: action_list = self._add_update_data_docs_action(action_list) if self.slack_webhook: action_list = self._add_slack_action(action_list) config_kwargs: dict = self.other_kwargs or {} # DataFrames shouldn't be saved to CheckpointStore batch_request = config_kwargs.get("batch_request") if batch_request_contains_batch_data(batch_request=batch_request): config_kwargs.pop("batch_request", None) else: config_kwargs["batch_request"] = get_batch_request_as_dict( batch_request=batch_request ) # DataFrames shouldn't be saved to CheckpointStore validations = config_kwargs.get("validations") if batch_request_in_validations_contains_batch_data(validations=validations): config_kwargs.pop("validations", []) else: config_kwargs["validations"] = get_validations_with_batch_request_as_dict( validations=validations ) specific_config_kwargs_overrides: dict = { "config_version": 1.0, "name": self.name, "class_name": "Checkpoint", "action_list": action_list, "ge_cloud_id": self.other_kwargs.pop("ge_cloud_id", None), } config_kwargs.update(specific_config_kwargs_overrides) # Roundtrip through schema validation to remove any illegal fields add/or restore any missing fields. checkpoint_config: dict = checkpointConfigSchema.load( CommentedMap(**config_kwargs) ) config_kwargs = checkpointConfigSchema.dump(checkpoint_config) logger.debug( f"SimpleCheckpointConfigurator built this CheckpointConfig:" f"{checkpoint_config}" ) return CheckpointConfig(**config_kwargs)
def serialization_self_check(self, pretty_print: bool) -> None: test_checkpoint_name: str = "test-name-" + "".join( [random.choice(list("0123456789ABCDEF")) for i in range(20)]) test_checkpoint_configuration: CheckpointConfig = CheckpointConfig( **{"name": test_checkpoint_name}) if self.ge_cloud_mode: test_key: GeCloudIdentifier = self.key_class( resource_type=GeCloudRESTResource.CONTRACT, ge_cloud_id=str(uuid.uuid4()), ) else: test_key: ConfigurationIdentifier = self.key_class( configuration_key=test_checkpoint_name) if pretty_print: print( f"Attempting to add a new test key {test_key} to Checkpoint store..." ) self.set(key=test_key, value=test_checkpoint_configuration) if pretty_print: print( f"\tTest key {test_key} successfully added to Checkpoint store.\n" ) if pretty_print: print( f"Attempting to retrieve the test value associated with key {test_key} from Checkpoint store..." ) # noinspection PyUnusedLocal test_value: CheckpointConfig = self.get(key=test_key) if pretty_print: print("\tTest value successfully retrieved from Checkpoint store.") print() if pretty_print: print( f"Cleaning up test key {test_key} and value from Checkpoint store..." ) # noinspection PyUnusedLocal test_value: CheckpointConfig = self.remove_key(key=test_key) if pretty_print: print( "\tTest key and value successfully removed from Checkpoint store." ) print()
def _substitute_config_variables( self, config: CheckpointConfig) -> CheckpointConfig: substituted_config_variables = substitute_all_config_variables( self.data_context.config_variables, dict(os.environ), self.data_context.DOLLAR_SIGN_ESCAPE_STRING, ) substitutions = { **substituted_config_variables, **dict(os.environ), **self.data_context.runtime_environment, } return CheckpointConfig(**substitute_all_config_variables( config, substitutions, self.data_context.DOLLAR_SIGN_ESCAPE_STRING))
def get_checkpoint_run_usage_statistics( checkpoint: "Checkpoint", # noqa: F821 *args, **kwargs, ) -> dict: usage_statistics_handler: Optional[ UsageStatisticsHandler ] = checkpoint._usage_statistics_handler data_context_id: Optional[str] = None try: data_context_id = checkpoint.data_context.data_context_id except AttributeError: data_context_id = None anonymizer: Optional[Anonymizer] = _anonymizers.get(data_context_id, None) if anonymizer is None: anonymizer = Anonymizer(data_context_id) _anonymizers[data_context_id] = anonymizer payload: dict = {} if usage_statistics_handler: # noinspection PyBroadException try: anonymizer = usage_statistics_handler.anonymizer # noqa: F821 resolved_runtime_kwargs: dict = ( CheckpointConfig.resolve_config_using_acceptable_arguments( *(checkpoint,), **kwargs ) ) payload: dict = anonymizer.anonymize( *(checkpoint,), **resolved_runtime_kwargs ) except Exception as e: logger.debug( f"{UsageStatsExceptionPrefix.EMIT_EXCEPTION.value}: {e} type: {type(e)}, get_checkpoint_run_usage_statistics: Unable to create anonymized_checkpoint_run payload field" ) return payload
def serialization_self_check(self, pretty_print: bool): test_checkpoint_name: str = "test-name-" + "".join( [random.choice(list("0123456789ABCDEF")) for i in range(20)]) test_checkpoint_configuration: CheckpointConfig = CheckpointConfig( **{"name": test_checkpoint_name}) test_key: ConfigurationIdentifier = self._key_class( configuration_key=test_checkpoint_name) if pretty_print: print( f"Attempting to add a new test key {test_key} to Checkpoint store..." ) self.set(key=test_key, value=test_checkpoint_configuration) if pretty_print: print( f"\tTest key {test_key} successfully added to Checkpoint store." ) print() if pretty_print: print( f"Attempting to retrieve the test value associated with key {test_key} from Checkpoint store..." ) # noinspection PyUnusedLocal test_value: CheckpointConfig = self.get(key=test_key) if pretty_print: print("\tTest value successfully retreived from Checkpoint store.") print() if pretty_print: print( f"Cleaning up test key {test_key} and value from Checkpoint store..." ) # noinspection PyUnusedLocal test_value: CheckpointConfig = self.remove_key(key=test_key) if pretty_print: print( "\tTest key and value successfully removed from Checkpoint store." ) print()
def test_checkpoint_store(empty_data_context): store_name: str = "checkpoint_store" base_directory: str = str( Path(empty_data_context.root_directory) / "checkpoints") checkpoint_store: CheckpointStore = build_checkpoint_store_using_filesystem( store_name=store_name, base_directory=base_directory, overwrite_existing=True, ) assert len(checkpoint_store.list_keys()) == 0 with pytest.raises(TypeError): checkpoint_store.set(key="my_first_checkpoint", value="this is not a checkpoint") assert len(checkpoint_store.list_keys()) == 0 checkpoint_name_0: str = "my_checkpoint_0" run_name_template_0: str = "%Y-%M-my-run-template-$VAR" validations_0: Union[List, Dict] = [{ "batch_request": { "datasource_name": "my_pandas_datasource", "data_connector_name": "my_runtime_data_connector", "data_asset_name": "my_website_logs", }, "action_list": [ { "name": "store_validation_result", "action": { "class_name": "StoreValidationResultAction", }, }, { "name": "store_evaluation_params", "action": { "class_name": "StoreEvaluationParametersAction", }, }, { "name": "update_data_docs", "action": { "class_name": "UpdateDataDocsAction", }, }, ], }] expectation_suite_name_0: str = "my.test.expectation_suite.name" evaluation_parameters_0: dict = { "environment": "$GE_ENVIRONMENT", "tolerance": 1.0e-2, "aux_param_0": "$MY_PARAM", "aux_param_1": "1 + $MY_PARAM", } runtime_configuration_0: dict = { "result_format": { "result_format": "BASIC", "partial_unexpected_count": 20, }, } my_checkpoint_config_0: CheckpointConfig = CheckpointConfig( name=checkpoint_name_0, run_name_template=run_name_template_0, expectation_suite_name=expectation_suite_name_0, evaluation_parameters=evaluation_parameters_0, runtime_configuration=runtime_configuration_0, validations=validations_0, ) key_0: ConfigurationIdentifier = ConfigurationIdentifier( configuration_key=checkpoint_name_0, ) checkpoint_store.set(key=key_0, value=my_checkpoint_config_0) assert len(checkpoint_store.list_keys()) == 1 assert filter_properties_dict(properties=checkpoint_store.get( key=key_0).to_json_dict(), ) == filter_properties_dict( properties=my_checkpoint_config_0.to_json_dict(), ) dir_tree: str = gen_directory_tree_str(startpath=base_directory) assert (dir_tree == """checkpoints/ .ge_store_backend_id my_checkpoint_0.yml """) checkpoint_name_1: str = "my_checkpoint_1" run_name_template_1: str = "%Y-%M-my-run-template-$VAR" validations_1: Union[List, Dict] = [{ "action_list": [ { "name": "store_validation_result", "action": { "class_name": "StoreValidationResultAction", }, }, { "name": "store_evaluation_params", "action": { "class_name": "StoreEvaluationParametersAction", }, }, { "name": "update_data_docs", "action": { "class_name": "UpdateDataDocsAction", }, }, ] }] expectation_suite_name_1: str = "my.test.expectation_suite.name" batch_request_1: dict = { "datasource_name": "my_pandas_datasource", "data_connector_name": "my_runtime_data_connector", "data_asset_name": "my_website_logs", } evaluation_parameters_1: dict = { "environment": "$GE_ENVIRONMENT", "tolerance": 1.0e-2, "aux_param_0": "$MY_PARAM", "aux_param_1": "1 + $MY_PARAM", } runtime_configuration_1: dict = { "result_format": { "result_format": "BASIC", "partial_unexpected_count": 20, }, } my_checkpoint_config_1: CheckpointConfig = CheckpointConfig( name=checkpoint_name_1, run_name_template=run_name_template_1, expectation_suite_name=expectation_suite_name_1, batch_request=batch_request_1, evaluation_parameters=evaluation_parameters_1, runtime_configuration=runtime_configuration_1, validations=validations_1, ) key_1: ConfigurationIdentifier = ConfigurationIdentifier( configuration_key=checkpoint_name_1, ) checkpoint_store.set(key=key_1, value=my_checkpoint_config_1) assert len(checkpoint_store.list_keys()) == 2 assert filter_properties_dict(properties=checkpoint_store.get( key=key_1).to_json_dict(), ) == filter_properties_dict( properties=my_checkpoint_config_1.to_json_dict(), ) dir_tree: str = gen_directory_tree_str(startpath=base_directory) assert (dir_tree == """checkpoints/ .ge_store_backend_id my_checkpoint_0.yml my_checkpoint_1.yml """) self_check_report: dict = convert_to_json_serializable( data=checkpoint_store.self_check()) assert self_check_report == { "keys": ["my_checkpoint_0", "my_checkpoint_1"], "len_keys": 2, "config": { "store_name": "checkpoint_store", "class_name": "CheckpointStore", "module_name": "great_expectations.data_context.store.checkpoint_store", "overwrite_existing": True, "store_backend": { "base_directory": f"{empty_data_context.root_directory}/checkpoints", "platform_specific_separator": True, "fixed_length_key": False, "suppress_store_backend_id": False, "module_name": "great_expectations.data_context.store.tuple_store_backend", "class_name": "TupleFilesystemStoreBackend", "filepath_template": "{0}.yml", }, }, } checkpoint_store.remove_key(key=key_0) checkpoint_store.remove_key(key=key_1) assert len(checkpoint_store.list_keys()) == 0
def test_checkpoint_config_deepcopy( titanic_pandas_data_context_with_v013_datasource_stats_enabled_with_checkpoints_v1_with_templates, monkeypatch, ): monkeypatch.setenv("GE_ENVIRONMENT", "my_ge_environment") monkeypatch.setenv("VAR", "test") monkeypatch.setenv("MY_PARAM", "1") monkeypatch.setenv("OLD_PARAM", "2") context: DataContext = titanic_pandas_data_context_with_v013_datasource_stats_enabled_with_checkpoints_v1_with_templates test_df: pd.DataFrame = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) runtime_batch_request: RuntimeBatchRequest = RuntimeBatchRequest( **{ "datasource_name": "my_datasource", "data_connector_name": "my_runtime_data_connector", "data_asset_name": "default_data_asset_name", "batch_identifiers": { "pipeline_stage_name": "core_processing", "airflow_run_id": 1234567890, }, "runtime_parameters": {"batch_data": test_df}, } ) nested_checkpoint_config: CheckpointConfig = CheckpointConfig( name="my_nested_checkpoint", config_version=1, template_name="my_nested_checkpoint_template_2", expectation_suite_name="users.delivery", validations=[ { "batch_request": { "datasource_name": "my_datasource", "data_connector_name": "my_special_data_connector", "data_asset_name": "users", "data_connector_query": {"partition_index": -1}, } }, { "batch_request": { "datasource_name": "my_datasource", "data_connector_name": "my_other_data_connector", "data_asset_name": "users", "data_connector_query": {"partition_index": -2}, } }, ], ) nested_checkpoint: Checkpoint = Checkpoint( data_context=context, **filter_properties_dict( properties=nested_checkpoint_config.to_json_dict(), delete_fields={"class_name", "module_name"}, clean_falsy=True, ), ) substituted_config_template_and_runtime_kwargs: dict = nested_checkpoint.get_substituted_config( runtime_kwargs={ "batch_request": runtime_batch_request, "expectation_suite_name": "runtime_suite_name", "template_name": "my_nested_checkpoint_template_3", "validations": [ { "batch_request": { "datasource_name": "my_datasource", "data_connector_name": "my_other_data_connector_2_runtime", "data_asset_name": "users", "data_connector_query": {"partition_index": -3}, } }, { "batch_request": { "datasource_name": "my_datasource", "data_connector_name": "my_other_data_connector_3_runtime", "data_asset_name": "users", "data_connector_query": {"partition_index": -4}, } }, ], "run_name_template": "runtime_run_template", "action_list": [ { "name": "store_validation_result", "action": { "class_name": "StoreValidationResultAction", }, }, { "name": "store_evaluation_params", "action": { "class_name": "MyCustomRuntimeStoreEvaluationParametersAction", }, }, { "name": "update_data_docs", "action": None, }, { "name": "update_data_docs_deluxe_runtime", "action": { "class_name": "UpdateDataDocsAction", }, }, ], "evaluation_parameters": { "environment": "runtime-$GE_ENVIRONMENT", "tolerance": 1.0e-2, "aux_param_0": "runtime-$MY_PARAM", "aux_param_1": "1 + $MY_PARAM", "new_runtime_eval_param": "bloopy!", }, "runtime_configuration": { "result_format": "BASIC", "partial_unexpected_count": 999, "new_runtime_config_key": "bleepy!", }, } ) checkpoint_config_copy: dict = copy.deepcopy( substituted_config_template_and_runtime_kwargs ) assert deep_filter_properties_iterable( properties=checkpoint_config_copy, clean_falsy=True, ) == deep_filter_properties_iterable( properties=substituted_config_template_and_runtime_kwargs, clean_falsy=True, )
def test_GeCloudStoreBackend(): """ What does this test test and why? Since GeCloudStoreBackend relies on GE Cloud, we mock requests.post, requests.get, and requests.patch and assert that the right calls are made for set, get, list, and remove_key. """ ge_cloud_base_url = "https://app.greatexpectations.io/" ge_cloud_credentials = { "access_token": "1234", "account_id": "51379b8b-86d3-4fe7-84e9-e1a52f4a414c", } ge_cloud_resource_type = "checkpoint" my_simple_checkpoint_config: CheckpointConfig = CheckpointConfig( name="my_minimal_simple_checkpoint", class_name="SimpleCheckpoint", config_version=1, ) my_simple_checkpoint_config_serialized = ( my_simple_checkpoint_config.get_schema_class()().dump( my_simple_checkpoint_config ) ) # test .set with patch("requests.post", autospec=True) as mock_post: my_store_backend = GeCloudStoreBackend( ge_cloud_base_url=ge_cloud_base_url, ge_cloud_credentials=ge_cloud_credentials, ge_cloud_resource_type=ge_cloud_resource_type, ) my_store_backend.set( ("my_checkpoint_name",), my_simple_checkpoint_config_serialized ) mock_post.assert_called_with( "https://app.greatexpectations.io/accounts/51379b8b-86d3-4fe7-84e9-e1a52f4a414c/checkpoints", json={ "data": { "type": "checkpoint", "attributes": { "account_id": "51379b8b-86d3-4fe7-84e9-e1a52f4a414c", "checkpoint_config": OrderedDict( [ ("name", "my_minimal_simple_checkpoint"), ("config_version", 1.0), ("template_name", None), ("module_name", "great_expectations.checkpoint"), ("class_name", "SimpleCheckpoint"), ("run_name_template", None), ("expectation_suite_name", None), ("batch_request", None), ("action_list", []), ("evaluation_parameters", {}), ("runtime_configuration", {}), ("validations", []), ("profilers", []), ("ge_cloud_id", None), ] ), }, } }, headers={ "Content-Type": "application/vnd.api+json", "Authorization": "Bearer 1234", }, ) # test .get with patch("requests.get", autospec=True) as mock_get: my_store_backend = GeCloudStoreBackend( ge_cloud_base_url=ge_cloud_base_url, ge_cloud_credentials=ge_cloud_credentials, ge_cloud_resource_type=ge_cloud_resource_type, ) my_store_backend.get(("0ccac18e-7631-4bdd-8a42-3c35cce574c6",)) mock_get.assert_called_with( "https://app.greatexpectations.io/accounts/51379b8b-86d3-4fe7-84e9-e1a52f4a414c/checkpoints/0ccac18e-7631-4bdd-8a42-3c35cce574c6", headers={ "Content-Type": "application/vnd.api+json", "Authorization": "Bearer 1234", }, ) # test .list_keys with patch("requests.get", autospec=True) as mock_get: my_store_backend = GeCloudStoreBackend( ge_cloud_base_url=ge_cloud_base_url, ge_cloud_credentials=ge_cloud_credentials, ge_cloud_resource_type=ge_cloud_resource_type, ) my_store_backend.list_keys() mock_get.assert_called_with( "https://app.greatexpectations.io/accounts/51379b8b-86d3-4fe7-84e9-e1a52f4a414c/checkpoints", headers={ "Content-Type": "application/vnd.api+json", "Authorization": "Bearer 1234", }, ) # test .remove_key with patch("requests.patch", autospec=True) as mock_patch: mock_response = mock_patch.return_value mock_response.status_code = 200 my_store_backend = GeCloudStoreBackend( ge_cloud_base_url=ge_cloud_base_url, ge_cloud_credentials=ge_cloud_credentials, ge_cloud_resource_type=ge_cloud_resource_type, ) my_store_backend.remove_key(("0ccac18e-7631-4bdd-8a42-3c35cce574c6",)) mock_patch.assert_called_with( "https://app.greatexpectations.io/accounts/51379b8b-86d3-4fe7-84e9-e1a52f4a414c/checkpoints/0ccac18e-7631-4bdd-8a42-3c35cce574c6", json={ "data": { "type": "checkpoint", "id": "0ccac18e-7631-4bdd-8a42-3c35cce574c6", "attributes": {"deleted": True}, } }, headers={ "Content-Type": "application/vnd.api+json", "Authorization": "Bearer 1234", }, )
def test_checkpoint_config_repr_after_substitution(checkpoint): df: pd.DataFrame = pd.DataFrame({"a": [1, 2], "b": [3, 4]}) batch_request_param: dict = { "runtime_parameters": { "batch_data": df }, "batch_identifiers": { "default_identifier_name": "my_simple_df" }, } result_format_param: dict = {"result_format": "SUMMARY"} kwargs: dict = { "batch_request": batch_request_param, "result_format": result_format_param, } # Matching how this is called in usage_statistics.py (parameter style) resolved_runtime_kwargs: dict = ( CheckpointConfig.resolve_config_using_acceptable_arguments( *(checkpoint, ), **kwargs)) json_dict: dict = convert_to_json_serializable( data=resolved_runtime_kwargs) deep_filter_properties_iterable( properties=json_dict, inplace=True, ) keys: List[str] = sorted(list(json_dict.keys())) key: str sorted_json_dict: dict = {key: json_dict[key] for key in keys} checkpoint_config_repr: str = json.dumps(sorted_json_dict, indent=2) assert (checkpoint_config_repr == """{ "action_list": [ { "name": "store_validation_result", "action": { "class_name": "StoreValidationResultAction" } }, { "name": "store_evaluation_params", "action": { "class_name": "StoreEvaluationParametersAction" } }, { "name": "update_data_docs", "action": { "class_name": "UpdateDataDocsAction", "site_names": [] } } ], "batch_request": { "runtime_parameters": { "batch_data": [ { "a": 1, "b": 3 }, { "a": 2, "b": 4 } ] }, "batch_identifiers": { "default_identifier_name": "my_simple_df" } }, "class_name": "Checkpoint", "config_version": 1.0, "evaluation_parameters": {}, "module_name": "great_expectations.checkpoint", "name": "my_checkpoint", "profilers": [], "runtime_configuration": {}, "validations": [ { "batch_request": { "datasource_name": "example_datasource", "data_connector_name": "default_runtime_data_connector_name", "data_asset_name": "my_data_asset", "runtime_parameters": { "batch_data": "<class \'pandas.core.frame.DataFrame\'>" }, "batch_identifiers": { "default_identifier_name": "my_simple_df" } }, "expectation_suite_name": "test_suite", "action_list": [ { "name": "store_validation_result", "action": { "class_name": "StoreValidationResultAction" } }, { "name": "store_evaluation_params", "action": { "class_name": "StoreEvaluationParametersAction" } }, { "name": "update_data_docs", "action": { "class_name": "UpdateDataDocsAction", "site_names": [] } } ] } ] }""")