def exp1(): return ExpectationConfiguration( expectation_type="expect_column_values_to_be_in_set", kwargs={ "column": "a", "value_set": [1, 2, 3], "result_format": "BASIC" }, meta={"notes": "This is an expectation."}, )
def _create_string_length_expectation( self, key: str, details: dict) -> Optional[ExpectationConfiguration]: """https://json-schema.org/understanding-json-schema/reference/string.html#length""" object_types = self._get_object_types(details=details) if JsonSchemaTypes.STRING.value not in object_types: return None type_ = details.get("type", None) any_of = details.get("anyOf", None) if not type_ and not any_of: return None if type_: minimum = details.get("minLength", None) maximum = details.get("maxLength", None) elif any_of: for item in any_of: item_type = item.get("type", None) if item_type == JsonSchemaTypes.STRING.value: minimum = item.get("minLength", None) maximum = item.get("maxLength", None) break if minimum is None and maximum is None: return None kwargs = { "column": key, } if minimum == maximum: kwargs["value"] = minimum return ExpectationConfiguration( "expect_column_value_lengths_to_equal", kwargs) if minimum is not None: kwargs["min_value"] = minimum if maximum is not None: kwargs["max_value"] = maximum return ExpectationConfiguration( "expect_column_value_lengths_to_be_between", kwargs)
def column_pair_expectation(): return ExpectationConfiguration( expectation_type="expect_column_pair_values_to_be_in_set", kwargs={ "column_A": "1", "column_B": "b", "value_set": [(1, 1), (2, 2)], "result_format": "BASIC", }, )
def test_autoinspect_columns_exist(test_backend): df = get_dataset(test_backend, {"a": [1, 2, 3]}, profiler=ge.profile.ColumnsExistProfiler) suite = df.get_expectation_suite() assert len(suite.expectations) == 1 assert suite.expectations == [ ExpectationConfiguration(expectation_type="expect_column_to_exist", kwargs={"column": "a"}) ]
def _check_input_validation( expectation_instance, examples: List[ExpectationTestDataCases], ) -> ExpectationDiagnosticCheckMessage: """Check that the validate_configuration exists and doesn't raise a config error""" passed = False sub_messages = [] rx = re.compile(r"^[\s]+assert", re.MULTILINE) try: first_test = examples[0]["tests"][0] except IndexError: sub_messages.append({ "message": "No example found to get kwargs for ExpectationConfiguration", "passed": passed, }) else: if "validate_configuration" not in expectation_instance.__class__.__dict__: sub_messages.append({ "message": "No validate_configuration method defined on subclass", "passed": passed, }) else: expectation_config = ExpectationConfiguration( expectation_type=expectation_instance.expectation_type, kwargs=first_test.input, ) validate_configuration_source = inspect.getsource( expectation_instance.__class__.validate_configuration) if rx.search(validate_configuration_source): sub_messages.append({ "message": "Custom 'assert' statements in validate_configuration", "passed": True, }) else: sub_messages.append({ "message": "Using default validate_configuration from template", "passed": False, }) try: expectation_instance.validate_configuration( expectation_config) except InvalidExpectationConfigurationError: pass else: passed = True return ExpectationDiagnosticCheckMessage( message="Has basic input validation and type checking", passed=passed, sub_messages=sub_messages, )
def test_graph_validate(basic_datasource): df = pd.DataFrame({"a": [1, 5, 22, 3, 5, 10], "b": [1, 2, 3, 4, 5, None]}) batch = basic_datasource.get_single_batch_from_batch_request( BatchRequest( **{ "datasource_name": "my_datasource", "data_connector_name": "test_runtime_data_connector", "data_asset_name": "IN_MEMORY_DATA_ASSET", "batch_data": df, "partition_request": PartitionRequest( **{ "batch_identifiers": { "pipeline_stage_name": 0, "airflow_run_id": 0, "custom_key_0": 0, } }), })) expectation_configuration = ExpectationConfiguration( expectation_type="expect_column_value_z_scores_to_be_less_than", kwargs={ "column": "b", "mostly": 0.9, "threshold": 4, "double_sided": True, }, ) result = Validator(execution_engine=PandasExecutionEngine(), batches=[batch]).graph_validate( configurations=[expectation_configuration]) assert result == [ ExpectationValidationResult( success=True, expectation_config=None, meta={}, result={ "element_count": 6, "unexpected_count": 0, "unexpected_percent": 0.0, "partial_unexpected_list": [], "missing_count": 1, "missing_percent": 16.666666666666664, "unexpected_percent_nonmissing": 0.0, }, exception_info=None, ) ]
def _create_null_or_not_null_column_expectation( self, key: str, details: dict ) -> Optional[ExpectationConfiguration]: """https://json-schema.org/understanding-json-schema/reference/null.html""" object_types = self._get_object_types(details=details) enum_list = self._get_enum_list(details=details) kwargs = {"column": key} if enum_list: object_types = set(enum_list).union(set(object_types)) if JsonSchemaTypes.NULL.value not in object_types: return ExpectationConfiguration( "expect_column_values_to_not_be_null", kwargs ) if len(object_types) == 1: return ExpectationConfiguration("expect_column_values_to_be_null", kwargs) return None
def test__find_evr_by_type(titanic_profiled_evrs_1): # TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite.results found_evr = Renderer()._find_evr_by_type(titanic_profiled_evrs_1.results, "expect_column_to_exist") print(found_evr) assert found_evr is None # TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite.results found_evr = Renderer()._find_evr_by_type( titanic_profiled_evrs_1.results, "expect_column_distinct_values_to_be_in_set") print(found_evr) assert found_evr == ExpectationValidationResult( success=True, result={ "observed_value": ["*", "1st", "2nd", "3rd"], "element_count": 1313, "missing_count": 0, "missing_percent": 0.0, "details": { "value_counts": [ { "value": "*", "count": 1 }, { "value": "1st", "count": 322 }, { "value": "2nd", "count": 279 }, { "value": "3rd", "count": 711 }, ] }, }, exception_info={ "raised_exception": False, "exception_message": None, "exception_traceback": None, }, expectation_config=ExpectationConfiguration( expectation_type="expect_column_distinct_values_to_be_in_set", kwargs={ "column": "PClass", "value_set": None, "result_format": "SUMMARY" }, ), )
def test_expectation_configuration_get_evaluation_parameter_dependencies(): # Getting evaluation parameter dependencies relies on pyparsing, but the expectation # configuration is responsible for ensuring that it only returns one copy of required metrics. # If different expectations rely on the same upstream dependency,then it is possible for duplicates # to be present nonetheless ec = ExpectationConfiguration( expectation_type="expect_column_values_to_be_between", kwargs={ "column": "norm", "min_value": { "$PARAMETER": "(-3 * urn:great_expectations:validations:profile:expect_column_stdev_to_be_between" ".result.observed_value:column=norm) + " "urn:great_expectations:validations:profile:expect_column_mean_to_be_between.result.observed_value" ":column=norm" }, "max_value": { "$PARAMETER": "(3 * urn:great_expectations:validations:profile:expect_column_stdev_to_be_between" ".result.observed_value:column=norm) + " "urn:great_expectations:validations:profile:expect_column_mean_to_be_between.result.observed_value" ":column=norm" }, }, ) dependencies = ec.get_evaluation_parameter_dependencies() dependencies["profile"][0]["metric_kwargs_id"]["column=norm"] = set( dependencies["profile"][0]["metric_kwargs_id"]["column=norm"]) assert { "profile": [{ "metric_kwargs_id": { "column=norm": { "expect_column_stdev_to_be_between.result.observed_value", "expect_column_mean_to_be_between.result.observed_value", } } }] } == dependencies
def _create_boolean_expectation( self, key: str, details: dict ) -> Optional[ExpectationConfiguration]: """https://json-schema.org/understanding-json-schema/reference/boolean.html""" object_types = self._get_object_types(details=details) if JsonSchemaTypes.BOOLEAN.value not in object_types: return None # TODO map JSONSchema types to which type backend? Pandas? Should this value set be parameterized per back end? kwargs = {"column": key, "value_set": [True, False]} return ExpectationConfiguration("expect_column_values_to_be_in_set", kwargs)
def test_expectation_string_renderer_styling(): renderer = ExpectationStringRenderer() result = renderer.render( ExpectationConfiguration( expectation_type="expect_column_values_to_be_unique", kwargs={"column": "Name"}, )) assert len(result) == 1 assert result[0].string_template[ "template"] == "$column values must be unique." result = renderer.render( ExpectationConfiguration( expectation_type="expect_column_values_to_be_unique", kwargs={ "column": "Name", "mostly": 0.3 }, )) assert len(result) == 1 template = result[0].string_template assert ( template["template"] == "$column values must be unique, at least $mostly_pct % of the time.") assert template["params"]["mostly_pct"] == "30" result = renderer.render( ExpectationConfiguration( expectation_type="expect_column_values_to_be_unique", kwargs={ "column": "Name", "mostly": 0.32345 }, )) assert len(result) == 1 template = result[0].string_template assert ( template["template"] == "$column values must be unique, at least $mostly_pct % of the time.") assert template["params"]["mostly_pct"] == "32.345"
def _create_existence_expectation( self, key: str, details: dict ) -> ExpectationConfiguration: kwargs = {"column": key} description = details.get("description", None) meta = None if description: meta = { "notes": { "format": "markdown", "content": [f"### Description:\n{description}"], } } return ExpectationConfiguration("expect_column_to_exist", kwargs, meta=meta)
def _create_set_expectation( self, key: str, details: dict ) -> Optional[ExpectationConfiguration]: """https://json-schema.org/understanding-json-schema/reference/generic.html#enumerated-values""" enum_list = self._get_enum_list(details=details) if not enum_list: return None enum_list = list( filter(lambda item: item is not JsonSchemaTypes.NULL.value, enum_list) ) kwargs = {"column": key, "value_set": enum_list} return ExpectationConfiguration("expect_column_values_to_be_in_set", kwargs)
def test_atomic_diagnostic_observed_value_without_result( snapshot, get_diagnostic_rendered_content): # Please note that the vast majority of Expectations are calling `Expectation._atomic_diagnostic_observed_value()` # As such, the specific expectation_type used here is irrelevant and is simply used to trigger the parent class. expectation_config = { "expectation_type": "expect_table_row_count_to_equal", "kwargs": {}, } update_dict = { "expectation_config": ExpectationConfiguration(**expectation_config), } rendered_content = get_diagnostic_rendered_content(update_dict) res = rendered_content.to_json_dict() pprint(res) snapshot.assert_match(res)
def test_expect_column_value_z_scores_to_be_less_than_impl(): df = pd.DataFrame({"a": [1, 5, 22, 3, 5, 10]}) expectationConfiguration = ExpectationConfiguration( expectation_type="expect_column_value_z_scores_to_be_less_than", kwargs={ "column": "a", "mostly": 0.9, "threshold": 4, "double_sided": True, }, ) expectation = ExpectColumnValueZScoresToBeLessThan( expectationConfiguration) engine = PandasExecutionEngine(batch_data_dict={"my_id": df}) result = expectation.validate(Validator(execution_engine=engine)) assert result == ExpectationValidationResult(success=True, )
def test_replace_expectation_finds_too_many_matches(ge_cloud_suite, ge_cloud_id): new_expectation_configuration = ExpectationConfiguration( expectation_type="expect_column_values_to_be_in_set", kwargs={"column": "b", "value_set": [4, 5, 6], "result_format": "BASIC"}, meta={"notes": "This is a new expectation."}, ) with pytest.raises(ValueError) as err: ge_cloud_suite.replace_expectation( new_expectation_configuration=new_expectation_configuration, existing_expectation_configuration=None, ge_cloud_id=ge_cloud_id, ) assert ( str(err.value) == "More than one matching expectation was found. Please be more specific with your search criteria" )
def test_add_expectation_with_ge_cloud_id( mock_emit, single_expectation_suite_with_expectation_ge_cloud_id, ): """ This test ensures that expectation does not lose ge_cloud_id attribute when updated """ expectation_ge_cloud_id = ( single_expectation_suite_with_expectation_ge_cloud_id.expectations[ 0 ].ge_cloud_id ) # updated expectation does not have ge_cloud_id updated_expectation = ExpectationConfiguration( expectation_type="expect_column_values_to_be_in_set", kwargs={ "column": "a", "value_set": [11, 22, 33, 44, 55], "result_format": "BASIC", }, meta={"notes": "This is an expectation."}, ) single_expectation_suite_with_expectation_ge_cloud_id.add_expectation( updated_expectation, overwrite_existing=True ) assert ( single_expectation_suite_with_expectation_ge_cloud_id.expectations[ 0 ].ge_cloud_id == expectation_ge_cloud_id ) # make sure expectation config was actually updated assert single_expectation_suite_with_expectation_ge_cloud_id.expectations[0].kwargs[ "value_set" ] == [11, 22, 33, 44, 55] # ensure usage statistics are being emitted correctly assert mock_emit.call_count == 1 assert mock_emit.call_args_list == [ mock.call( { "event": "expectation_suite.add_expectation", "event_payload": {}, "success": True, } ) ]
def _get_prescriptive_rendered_content( update_dict: Dict[str, Union[str, dict]], ) -> RenderedAtomicContent: # Overwrite any fields passed in from test and instantiate ExpectationConfiguration expectation_configuration_kwargs.update(update_dict) config = ExpectationConfiguration(**expectation_configuration_kwargs) expectation_type = expectation_configuration_kwargs["expectation_type"] # Programatically determine the renderer implementations renderer_impl = get_renderer_impl( object_name=expectation_type, renderer_type="atomic.prescriptive.summary", )[1] # Determine RenderedAtomicContent output source_obj = {"configuration": config} res = renderer_impl(**source_obj) return res
def test_atomic_diagnostic_observed_value_expect_column_kl_divergence_to_be_less_than( snapshot, get_diagnostic_rendered_content ): # Please note that the vast majority of Expectations are calling `Expectation._atomic_diagnostic_observed_value()` # As such, the specific expectation_type used here is irrelevant and is simply used to trigger the parent class. expectation_config = { "expectation_type": "expect_column_kl_divergence_to_be_less_than", "kwargs": { "column": "min_event_time", "partition_object": { "bins": [0, 5, 10, 30, 50], "weights": [0.2, 0.3, 0.1, 0.4], }, "threshold": 0.1, }, "meta": {}, "ge_cloud_id": "4b53c4d5-90ba-467a-b7a7-379640bbd729", } update_dict = { "expectation_config": ExpectationConfiguration(**expectation_config), "result": { "observed_value": 0.0, "details": { "observed_partition": { "values": [1, 2, 4], "weights": [0.3754, 0.615, 0.0096], }, "expected_partition": { "values": [1, 2, 4], "weights": [0.3754, 0.615, 0.0096], }, }, }, } rendered_content = get_diagnostic_rendered_content(update_dict) res = rendered_content.to_json_dict() pprint(res) # replace version of vega-lite in res to match snapshot test res["value"]["graph"]["$schema"] = re.sub( r"v\d*\.\d*\.\d*", "v4.8.1", res["value"]["graph"]["$schema"] ) snapshot.assert_match(res)
def _get_column_type_with_caching(cls, dataset, column_name, cache): column_cache_entry = cache.get(column_name) if not column_cache_entry: column_cache_entry = {} cache[column_name] = column_cache_entry column_type = column_cache_entry.get("type") if not column_type: column_type = cls._get_column_type(dataset, column_name) column_cache_entry["type"] = column_type # remove the expectation # Does this change with different config format? dataset.remove_expectation( ExpectationConfiguration( expectation_type="expect_column_values_to_be_in_type_list", kwargs={"column": column_name}, )) dataset.set_config_value("interactive_evaluation", True) return column_type
def test_atomic_diagnostic_observed_value_expect_column_quantile_values_to_be_between( snapshot, get_diagnostic_rendered_content): # Please note that the vast majority of Expectations are calling `Expectation._atomic_diagnostic_observed_value()` # As such, the specific expectation_type used here is irrelevant and is simply used to trigger the parent class. expectation_config = { "expectation_type": "expect_column_quantile_values_to_be_between", "kwargs": { "column": "Unnamed: 0", "quantile_ranges": { "quantiles": [0.05, 0.25, 0.5, 0.75, 0.95], "value_ranges": [ [66, 68], [328, 330], [656, 658], [984, 986], [1246, 1248], ], }, "allow_relative_error": False, }, "meta": {}, "ge_cloud_id": "cd6b4f19-8167-4984-b495-54bffcb070da", } update_dict = { "expectation_config": ExpectationConfiguration(**expectation_config), "result": { "observed_value": { "quantiles": [0.05, 0.25, 0.5, 0.75, 0.95], "values": [67, 329, 657, 985, 1247], }, "element_count": 1313, "missing_count": None, "missing_percent": None, "details": { "success_details": [True, True, True, True, True] }, }, } rendered_content = get_diagnostic_rendered_content(update_dict) res = rendered_content.to_json_dict() pprint(res) snapshot.assert_match(res)
def test_graph_validate_with_bad_config_catch_exceptions_false( basic_datasource): df = pd.DataFrame({"a": [1, 5, 22, 3, 5, 10], "b": [1, 2, 3, 4, 5, None]}) batch = basic_datasource.get_single_batch_from_batch_request( RuntimeBatchRequest( **{ "datasource_name": "my_datasource", "data_connector_name": "test_runtime_data_connector", "data_asset_name": "IN_MEMORY_DATA_ASSET", "runtime_parameters": { "batch_data": df, }, "batch_identifiers": { "pipeline_stage_name": 0, "airflow_run_id": 0, "custom_key_0": 0, }, })) expectation_configuration = ExpectationConfiguration( expectation_type="expect_column_max_to_be_between", kwargs={ "column": "not_in_table", "min_value": 1, "max_value": 29 }, ) with pytest.raises(ge_exceptions.MetricResolutionError) as eee: # noinspection PyUnusedLocal result = Validator(execution_engine=PandasExecutionEngine(), batches=[batch]).graph_validate( configurations=[expectation_configuration], runtime_configuration={ "catch_exceptions": False, "result_format": { "result_format": "BASIC" }, }, ) assert (str(eee.value) == 'Error: The column "not_in_table" in BatchData does not exist.')
def test_graph_validate_with_exception(basic_datasource): def mock_error(*args, **kwargs): raise Exception("Mock Error") df = pd.DataFrame({"a": [1, 5, 22, 3, 5, 10], "b": [1, 2, 3, 4, 5, None]}) batch = basic_datasource.get_single_batch_from_batch_request( RuntimeBatchRequest( **{ "datasource_name": "my_datasource", "data_connector_name": "test_runtime_data_connector", "data_asset_name": "IN_MEMORY_DATA_ASSET", "runtime_parameters": { "batch_data": df, }, "batch_identifiers": { "pipeline_stage_name": 0, "airflow_run_id": 0, "custom_key_0": 0, }, })) expectation_configuration = ExpectationConfiguration( expectation_type="expect_column_value_z_scores_to_be_less_than", kwargs={ "column": "b", "mostly": 0.9, "threshold": 4, "double_sided": True, }, ) validator = Validator(execution_engine=PandasExecutionEngine(), batches=[batch]) validator.build_metric_dependency_graph = mock_error result = validator.graph_validate( configurations=[expectation_configuration]) assert len(result) == 1 assert result[0].expectation_config is not None
def test_populate_dependencies_with_incorrect_metric_name(): df = pd.DataFrame({"a": [1, 5, 22, 3, 5, 10], "b": [1, 2, 3, 4, 5, 6]}) expectation_configuration = ExpectationConfiguration( expectation_type="expect_column_value_z_scores_to_be_less_than", kwargs={ "column": "a", "mostly": 0.9, "threshold": 4, "double_sided": True, }, ) # noinspection PyUnusedLocal expectation = ExpectColumnValueZScoresToBeLessThan(expectation_configuration) # noinspection PyUnusedLocal batch = Batch(data=df) graph = ValidationGraph() engine = PandasExecutionEngine() for configuration in [expectation_configuration]: expectation_impl = get_expectation_impl( "expect_column_value_z_scores_to_be_less_than" ) validation_dependencies = expectation_impl( configuration ).get_validation_dependencies( configuration, engine, ) try: Validator(execution_engine=engine).build_metric_dependency_graph( graph=graph, execution_engine=engine, metric_configuration=MetricConfiguration( "column_values.not_a_metric", IDDict() ), configuration=configuration, ) except ge_exceptions.MetricProviderError as e: graph = e assert isinstance(graph, ge_exceptions.MetricProviderError)
def test_sa_expect_column_value_z_scores_to_be_less_than_impl( postgresql_engine): df = pd.DataFrame({"a": [1, 5, 22, 3, 5, 10]}) df.to_sql("z_score_test_data", postgresql_engine, if_exists="replace") expectationConfiguration = ExpectationConfiguration( expectation_type="expect_column_value_z_scores_to_be_less_than", kwargs={ "column": "a", "mostly": 0.9, "threshold": 4, "double_sided": True, }, ) expectation = ExpectColumnValueZScoresToBeLessThan( expectationConfiguration) batch_data = SqlAlchemyBatchData(engine=postgresql_engine, table_name="z_score_test_data") engine = SqlAlchemyExecutionEngine(engine=postgresql_engine, batch_data_dict={"my_id": batch_data}) result = expectation.validate(Validator(execution_engine=engine)) assert result == ExpectationValidationResult(success=True, )
def test_graph_validate_with_bad_config(basic_datasource): df = pd.DataFrame({"a": [1, 5, 22, 3, 5, 10], "b": [1, 2, 3, 4, 5, None]}) batch = basic_datasource.get_single_batch_from_batch_request( BatchRequest( **{ "datasource_name": "my_datasource", "data_connector_name": "test_runtime_data_connector", "data_asset_name": "IN_MEMORY_DATA_ASSET", "batch_data": df, "partition_request": PartitionRequest( **{ "partition_identifiers": { "pipeline_stage_name": 0, "airflow_run_id": 0, "custom_key_0": 0, } }), })) expectation_configuration = ExpectationConfiguration( expectation_type="expect_column_max_to_be_between", kwargs={ "column": "not_in_table", "min_value": 1, "max_value": 29 }, ) try: result = Validator(execution_engine=PandasExecutionEngine(), batches=[batch]).graph_validate( configurations=[expectation_configuration]) except KeyError as e: result = e assert isinstance(result, KeyError)
def test_catch_exceptions_with_bad_expectation_type(): # We want to catch degenerate cases where an expectation suite is incompatible with my_df = PandasDataset({"x": range(10)}) my_df._expectation_suite.append_expectation( ExpectationConfiguration(expectation_type="foobar", kwargs={})) result = my_df.validate(catch_exceptions=True) # Find the foobar result idx = 0 for idx, val_result in enumerate(result.results): if val_result.expectation_config.expectation_type == "foobar": break assert result.results[idx].success is False assert result.results[idx].expectation_config.expectation_type == "foobar" assert result.results[idx].expectation_config.kwargs == {} assert result.results[idx].exception_info["raised_exception"] is True assert ("AttributeError: 'PandasDataset' object has no attribute 'foobar'" in result.results[idx].exception_info["exception_traceback"]) with pytest.raises(AttributeError): result = my_df.validate(catch_exceptions=False)
def __init__( self, expectation_suite_name, data_context=None, expectations=None, evaluation_parameters=None, data_asset_type=None, execution_engine_type=None, meta=None, ge_cloud_id=None, ) -> None: self.expectation_suite_name = expectation_suite_name self.ge_cloud_id = ge_cloud_id self._data_context = data_context if expectations is None: expectations = [] self.expectations = [ ExpectationConfiguration(**expectation) if isinstance(expectation, dict) else expectation for expectation in expectations ] if evaluation_parameters is None: evaluation_parameters = {} self.evaluation_parameters = evaluation_parameters self.data_asset_type = data_asset_type self.execution_engine_type = execution_engine_type if meta is None: meta = {"great_expectations_version": ge_version} if ( "great_expectations.__version__" not in meta.keys() and "great_expectations_version" not in meta.keys() ): meta["great_expectations_version"] = ge_version # We require meta information to be serializable, but do not convert until necessary ensure_json_serializable(meta) self.meta = meta
def _create_type_expectation( self, key: str, details: dict ) -> Optional[ExpectationConfiguration]: object_types = self._get_object_types(details=details) object_types = list( filter( lambda object_type: object_type not in [JsonSchemaTypes.NULL.value], object_types, ) ) if len(object_types) == 0: return None type_list = [] for type_ in object_types: type_list.extend(self.PROFILER_TYPE_LIST_BY_JSON_SCHEMA_TYPE[type_]) kwargs = {"column": key, "type_list": type_list} return ExpectationConfiguration( "expect_column_values_to_be_in_type_list", kwargs )
def multi_batch_taxi_validator_ge_cloud_mode( yellow_trip_pandas_data_context, ) -> Validator: context: DataContext = yellow_trip_pandas_data_context context._ge_cloud_mode = True suite: ExpectationSuite = ExpectationSuite( expectation_suite_name="validating_taxi_data", expectations=[ ExpectationConfiguration( expectation_type="expect_column_values_to_be_between", kwargs={ "column": "passenger_count", "min_value": 0, "max_value": 99, "result_format": "BASIC", }, meta={"notes": "This is an expectation."}, ge_cloud_id=UUID("0faf94a9-f53a-41fb-8e94-32f218d4a774"), ) ], data_context=context, meta={"notes": "This is an expectation suite."}, ) multi_batch_request: BatchRequest = BatchRequest( datasource_name="taxi_pandas", data_connector_name="monthly", data_asset_name="my_reports", data_connector_query={"batch_filter_parameters": { "year": "2019" }}, ) validator_multi_batch: Validator = context.get_validator( batch_request=multi_batch_request, expectation_suite=suite) return validator_multi_batch