def test_stats_mixed_expectations(): expectation_results = [ ExpectationValidationResult(success=False), ExpectationValidationResult(success=True), ] actual = _calc_validation_statistics(expectation_results) expected = ValidationStatistics(2, 1, 1, 50.0, False) assert expected == actual
def _build_evr(self, raw_response, configuration): """_build_evr is a lightweight convenience wrapper handling cases where an Expectation implementor fails to return an EVR but returns the necessary components in a dictionary.""" if not isinstance(raw_response, ExpectationValidationResult): if isinstance(raw_response, dict): evr = ExpectationValidationResult(**raw_response) evr.expectation_config = configuration else: raise GreatExpectationsError("Unable to build EVR") else: evr = raw_response evr.expectation_config = configuration return evr
def test_stats_no_successful_expectations(): expectation_results = [ExpectationValidationResult(success=False)] actual = _calc_validation_statistics(expectation_results) expected = ValidationStatistics(1, 0, 1, 0.0, False) assert expected == actual expectation_results = [ ExpectationValidationResult(success=False), ExpectationValidationResult(success=False), ExpectationValidationResult(success=False), ] actual = _calc_validation_statistics(expectation_results) expected = ValidationStatistics(3, 0, 3, 0.0, False) assert expected == actual
def test_test_column_map_expectation_function(): asset = ge.dataset.PandasDataset( { "x": [1, 3, 5, 7, 9], "y": [1, 2, None, 7, 9], } ) def is_odd( self, column, mostly=None, result_format=None, include_config=True, catch_exceptions=None, meta=None, ): return column % 2 == 1 assert asset.test_column_map_expectation_function( is_odd, column="x", include_config=False ) == ExpectationValidationResult( result={ "element_count": 5, "missing_count": 0, "missing_percent": 0, "unexpected_percent": 0.0, "partial_unexpected_list": [], "unexpected_percent_nonmissing": 0.0, "unexpected_count": 0, }, success=True, ) assert asset.test_column_map_expectation_function( is_odd, "x", result_format="BOOLEAN_ONLY", include_config=False ) == ExpectationValidationResult(success=True) assert asset.test_column_map_expectation_function( is_odd, column="y", result_format="BOOLEAN_ONLY", include_config=False ) == ExpectationValidationResult(success=False) assert asset.test_column_map_expectation_function( is_odd, column="y", result_format="BOOLEAN_ONLY", mostly=0.7, include_config=False, ) == ExpectationValidationResult(success=True)
def z_score_validation_result(): return ExpectationValidationResult( success=True, expectation_config={ "expectation_type": "expect_column_value_z_scores_to_be_less_than", "kwargs": { "column": "a", "mostly": 0.9, "threshold": 4, "double_sided": True, }, "meta": {}, }, result={ "element_count": 6, "unexpected_count": 0, "unexpected_percent": 0.0, "partial_unexpected_list": [], "missing_count": 0, "missing_percent": 0.0, "unexpected_percent_total": 0.0, "unexpected_percent_nonmissing": 0.0, }, exception_info={ "raised_exception": False, "exception_traceback": None, "exception_message": None, }, meta={}, )
def test_spark_expect_column_value_z_scores_to_be_less_than_impl( spark_session, basic_spark_df_execution_engine ): df = pd.DataFrame({"a": [1, 5, 22, 3, 5, 10]}) spark = get_or_create_spark_application( spark_config={ "spark.sql.catalogImplementation": "hive", "spark.executor.memory": "450m", # "spark.driver.allowMultipleContexts": "true", # This directive does not appear to have any effect. } ) df = spark.createDataFrame(df) expectationConfiguration = ExpectationConfiguration( expectation_type="expect_column_value_z_scores_to_be_less_than", kwargs={ "column": "a", "mostly": 0.9, "threshold": 4, "double_sided": True, }, ) expectation = ExpectColumnValueZScoresToBeLessThan(expectationConfiguration) engine = basic_spark_df_execution_engine engine.load_batch_data(batch_id="my_id", batch_data=df) result = expectation.validate(Validator(execution_engine=engine)) assert result == ExpectationValidationResult( success=True, )
def test_stats_all_successful_expectations(): expectation_results = [ ExpectationValidationResult(success=True), ] actual = _calc_validation_statistics(expectation_results) expected = ValidationStatistics(1, 1, 0, 100.0, True) assert expected == actual expectation_results = [ ExpectationValidationResult(success=True), ExpectationValidationResult(success=True), ExpectationValidationResult(success=True), ] actual = _calc_validation_statistics(expectation_results) expected = ValidationStatistics(3, 3, 0, 100.0, True) assert expected == actual
def test_sa_expect_column_value_z_scores_to_be_less_than_impl(postgresql_engine): df = pd.DataFrame({"a": [1, 5, 22, 3, 5, 10]}) df.to_sql( name="z_score_test_data", con=postgresql_engine, index=False, if_exists="replace", ) expectationConfiguration = ExpectationConfiguration( expectation_type="expect_column_value_z_scores_to_be_less_than", kwargs={ "column": "a", "mostly": 0.9, "threshold": 4, "double_sided": True, }, ) expectation = ExpectColumnValueZScoresToBeLessThan(expectationConfiguration) engine = SqlAlchemyExecutionEngine(engine=postgresql_engine) engine.load_batch_data( "my_id", SqlAlchemyBatchData(execution_engine=engine, table_name="z_score_test_data"), ) result = expectation.validate(Validator(execution_engine=engine)) assert result == ExpectationValidationResult( success=True, )
def test_expect_column_values_to_be_of_type_string_dialect_sqlite(sa): df = pd.DataFrame({"col": ["test_val1", "test_val2"]}) validator = build_sa_validator_with_data(df, "sqlite") result = validator.expect_column_values_to_be_of_type("col", type_="TEXT") assert result == ExpectationValidationResult( success=True, expectation_config={ "expectation_type": "expect_column_values_to_be_of_type", "kwargs": { "column": "col", "type_": "TEXT", }, "meta": {}, }, result={ "element_count": 2, "unexpected_count": 0, "unexpected_percent": 0.0, "partial_unexpected_list": [], "missing_count": 0, "missing_percent": 0.0, "unexpected_percent_total": 0.0, "unexpected_percent_nonmissing": 0.0, }, exception_info={ "raised_exception": False, "exception_traceback": None, "exception_message": None, }, meta={}, )
def test_graph_validate_with_runtime_config(basic_datasource): df = pd.DataFrame( {"a": [1, 5, 22, 3, 5, 10, 2, 3], "b": [97, 332, 3, 4, 5, 6, 7, None]} ) batch = basic_datasource.get_single_batch_from_batch_request( BatchRequest( **{ "datasource_name": "my_datasource", "data_connector_name": "test_runtime_data_connector", "data_asset_name": "IN_MEMORY_DATA_ASSET", "batch_data": df, "partition_request": PartitionRequest( **{ "batch_identifiers": { "pipeline_stage_name": 0, "airflow_run_id": 0, "custom_key_0": 0, } } ), } ) ) expectation_configuration = ExpectationConfiguration( expectation_type="expect_column_value_z_scores_to_be_less_than", kwargs={"column": "b", "mostly": 1, "threshold": 2, "double_sided": True}, ) try: result = Validator( execution_engine=PandasExecutionEngine(), batches=(batch,) ).graph_validate( configurations=[expectation_configuration], runtime_configuration={"result_format": "COMPLETE"}, ) except AssertionError as e: result = e assert result == [ ExpectationValidationResult( success=False, meta={}, result={ "element_count": 8, "unexpected_count": 1, "unexpected_percent": 12.5, "partial_unexpected_list": [332.0], "missing_count": 1, "missing_percent": 12.5, "unexpected_percent_nonmissing": 14.285714285714285, "partial_unexpected_index_list": None, "partial_unexpected_counts": [{"value": 332.0, "count": 1}], "unexpected_list": [332.0], "unexpected_index_list": None, }, expectation_config=None, exception_info=None, ) ]
def test_test_expectation_function(): asset = ge.dataset.PandasDataset({ "x": [1, 3, 5, 7, 9], "y": [1, 2, None, 7, 9], }) asset_2 = ge.dataset.PandasDataset({ "x": [1, 3, 5, 6, 9], "y": [1, 2, None, 6, 9], }) def expect_dataframe_to_contain_7(self): return {"success": bool((self == 7).sum().sum() > 0)} assert asset.test_expectation_function( expect_dataframe_to_contain_7, include_config=False) == ExpectationValidationResult(success=True) assert asset_2.test_expectation_function( expect_dataframe_to_contain_7, include_config=False) == ExpectationValidationResult(success=False)
def test_graph_validate(basic_datasource): df = pd.DataFrame({"a": [1, 5, 22, 3, 5, 10], "b": [1, 2, 3, 4, 5, None]}) batch = basic_datasource.get_single_batch_from_batch_request( BatchRequest( **{ "datasource_name": "my_datasource", "data_connector_name": "test_runtime_data_connector", "data_asset_name": "IN_MEMORY_DATA_ASSET", "batch_data": df, "partition_request": PartitionRequest( **{ "batch_identifiers": { "pipeline_stage_name": 0, "airflow_run_id": 0, "custom_key_0": 0, } }), })) expectation_configuration = ExpectationConfiguration( expectation_type="expect_column_value_z_scores_to_be_less_than", kwargs={ "column": "b", "mostly": 0.9, "threshold": 4, "double_sided": True, }, ) result = Validator(execution_engine=PandasExecutionEngine(), batches=[batch]).graph_validate( configurations=[expectation_configuration]) assert result == [ ExpectationValidationResult( success=True, expectation_config=None, meta={}, result={ "element_count": 6, "unexpected_count": 0, "unexpected_percent": 0.0, "partial_unexpected_list": [], "missing_count": 1, "missing_percent": 16.666666666666664, "unexpected_percent_nonmissing": 0.0, }, exception_info=None, ) ]
def test__find_evr_by_type(titanic_profiled_evrs_1): # TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite.results found_evr = Renderer()._find_evr_by_type(titanic_profiled_evrs_1.results, "expect_column_to_exist") print(found_evr) assert found_evr is None # TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite.results found_evr = Renderer()._find_evr_by_type( titanic_profiled_evrs_1.results, "expect_column_distinct_values_to_be_in_set") print(found_evr) assert found_evr == ExpectationValidationResult( success=True, result={ "observed_value": ["*", "1st", "2nd", "3rd"], "element_count": 1313, "missing_count": 0, "missing_percent": 0.0, "details": { "value_counts": [ { "value": "*", "count": 1 }, { "value": "1st", "count": 322 }, { "value": "2nd", "count": 279 }, { "value": "3rd", "count": 711 }, ] }, }, exception_info={ "raised_exception": False, "exception_message": None, "exception_traceback": None, }, expectation_config=ExpectationConfiguration( expectation_type="expect_column_distinct_values_to_be_in_set", kwargs={ "column": "PClass", "value_set": None, "result_format": "SUMMARY" }, ), )
def test_expectation_decorator_meta(): metadata = {"meta_key": "meta_value"} eds = ExpectationOnlyDataAsset() out = eds.no_op_value_expectation("a", meta=metadata) config = eds.get_expectation_suite() assert (ExpectationValidationResult( success=True, meta=metadata, expectation_config=config.expectations[0]) == out) assert (ExpectationConfiguration( expectation_type="no_op_value_expectation", kwargs={"value": "a"}, meta=metadata, ) == config.expectations[0])
def test_expect_column_value_z_scores_to_be_less_than_impl(): df = pd.DataFrame({"a": [1, 5, 22, 3, 5, 10]}) expectationConfiguration = ExpectationConfiguration( expectation_type="expect_column_value_z_scores_to_be_less_than", kwargs={ "column": "a", "mostly": 0.9, "threshold": 4, "double_sided": True, }, ) expectation = ExpectColumnValueZScoresToBeLessThan( expectationConfiguration) engine = PandasExecutionEngine(batch_data_dict={"my_id": df}) result = expectation.validate(Validator(execution_engine=engine)) assert result == ExpectationValidationResult(success=True, )
def _validate( self, configuration: ExpectationConfiguration, metrics: Dict, runtime_configuration: dict = None, execution_engine: ExecutionEngine = None, ) -> Dict: string_integers_increasing = metrics.get( "column_values.string_integers.increasing.map") success = all(string_integers_increasing[0]) return ExpectationValidationResult( result={ "observed_value": np.unique(string_integers_increasing[0], return_counts=True) }, success=success, )
def test_expect_column_values_to_be_in_type_list_nullable_int(): from packaging.version import parse pandas_version = parse(pd.__version__) if pandas_version < parse("0.24"): # Prior to 0.24, Pandas did not have pytest.skip( "Prior to 0.24, Pandas did not have `Int32Dtype` or related.") df = pd.DataFrame({"col": pd.Series([1, 2, None], dtype=pd.Int32Dtype())}) validator = build_pandas_validator_with_data(df) result = validator.expect_column_values_to_be_in_type_list( "col", type_list=["Int32Dtype"]) assert result == ExpectationValidationResult( success=True, expectation_config={ "expectation_type": "expect_column_values_to_be_in_type_list", "kwargs": { "column": "col", "type_list": ["Int32Dtype"], }, "meta": {}, }, result={ "element_count": 3, "unexpected_count": 0, "unexpected_percent": 0.0, "partial_unexpected_list": [], "missing_count": 0, "missing_percent": 0.0, "unexpected_percent_total": 0.0, "unexpected_percent_nonmissing": 0.0, }, exception_info={ "raised_exception": False, "exception_traceback": None, "exception_message": None, }, meta={}, )
def test_expect_table_row_count_to_be_between_runtime_custom_query_with_where_no_temp_table_sa( titanic_v013_multi_datasource_multi_execution_engine_data_context_with_checkpoints_v1_with_empty_store_stats_enabled, ): context: DataContext = titanic_v013_multi_datasource_multi_execution_engine_data_context_with_checkpoints_v1_with_empty_store_stats_enabled batch_request = RuntimeBatchRequest( datasource_name="my_sqlite_db_datasource", data_connector_name="default_runtime_data_connector_name", data_asset_name="titanic", runtime_parameters={ "query": "select * from titanic where sexcode = 1" }, batch_identifiers={"default_identifier_name": "test_identifier"}, batch_spec_passthrough={"create_temp_table": False}, ) validator = context.get_validator( batch_request=batch_request, create_expectation_suite_with_name="test", ) results = validator.expect_table_row_count_to_be_between(min_value=100, max_value=2000) assert results == ExpectationValidationResult( success=True, result={"observed_value": 462}, meta={}, expectation_config={ "kwargs": { "min_value": 100, "max_value": 2000, "batch_id": "a47a711a9984cb2a482157adf54c3cb6", }, "ge_cloud_id": None, "meta": {}, "expectation_type": "expect_table_row_count_to_be_between", }, exception_info={ "raised_exception": False, "exception_traceback": None, "exception_message": None, }, )
def test_expect_column_values_to_be_of_type_string_dialect_pyathena(sa): from pyathena import sqlalchemy_athena df = pd.DataFrame({"col": ["test_val1", "test_val2"]}) validator = build_sa_validator_with_data(df, "sqlite") # Monkey-patch dialect for testing purposes. validator.execution_engine.dialect_module = sqlalchemy_athena result = validator.expect_column_values_to_be_of_type("col", type_="STRINGTYPE") assert result == ExpectationValidationResult( success=True, expectation_config={ "expectation_type": "expect_column_values_to_be_of_type", "kwargs": { "column": "col", "type_": "STRINGTYPE", }, "meta": {}, }, result={ "element_count": 2, "unexpected_count": 0, "unexpected_percent": 0.0, "partial_unexpected_list": [], "missing_count": 0, "missing_percent": 0.0, "unexpected_percent_total": 0.0, "unexpected_percent_nonmissing": 0.0, }, exception_info={ "raised_exception": False, "exception_traceback": None, "exception_message": None, }, meta={}, )
def test_column_map_expectation_decorator(): # Create a new CustomPandasDataset to # (1) demonstrate that custom subclassing works, and # (2) Test expectation business logic without dependencies on any other functions. class CustomPandasDataset(PandasDataset): @MetaPandasDataset.column_map_expectation def expect_column_values_to_be_odd(self, column): return column.map(lambda x: x % 2) @MetaPandasDataset.column_map_expectation def expectation_that_crashes_on_sixes(self, column): return column.map(lambda x: (x - 6) / 0 != "duck") df = CustomPandasDataset({ "all_odd": [1, 3, 5, 5, 5, 7, 9, 9, 9, 11], "mostly_odd": [1, 3, 5, 7, 9, 2, 4, 1, 3, 5], "all_even": [2, 4, 4, 6, 6, 6, 8, 8, 8, 8], "odd_missing": [1, 3, 5, None, None, None, None, 1, 3, None], "mixed_missing": [1, 3, 5, None, None, 2, 4, 1, 3, None], "all_missing": [None, None, None, None, None, None, None, None, None, None], }) df.set_default_expectation_argument("result_format", "COMPLETE") df.set_default_expectation_argument("include_config", False) assert df.expect_column_values_to_be_odd( "all_odd") == ExpectationValidationResult( result={ "element_count": 10, "missing_count": 0, "missing_percent": 0.0, "partial_unexpected_counts": [], "partial_unexpected_index_list": [], "partial_unexpected_list": [], "unexpected_count": 0, "unexpected_index_list": [], "unexpected_list": [], "unexpected_percent": 0.0, "unexpected_percent_nonmissing": 0.0, }, success=True, ) assert df.expect_column_values_to_be_odd( "all_missing") == ExpectationValidationResult( result={ "element_count": 10, "missing_count": 10, "missing_percent": 100.0, "partial_unexpected_counts": [], "partial_unexpected_index_list": [], "partial_unexpected_list": [], "unexpected_count": 0, "unexpected_index_list": [], "unexpected_list": [], "unexpected_percent": None, "unexpected_percent_nonmissing": None, "unexpected_percent_total": None, }, success=True, ) assert df.expect_column_values_to_be_odd( "odd_missing") == ExpectationValidationResult( result={ "element_count": 10, "missing_count": 5, "missing_percent": 50.0, "partial_unexpected_counts": [], "partial_unexpected_index_list": [], "partial_unexpected_list": [], "unexpected_count": 0, "unexpected_index_list": [], "unexpected_list": [], "unexpected_percent": 0.0, "unexpected_percent_nonmissing": 0.0, }, success=True, ) assert df.expect_column_values_to_be_odd( "mixed_missing") == ExpectationValidationResult( result={ "element_count": 10, "missing_count": 3, "missing_percent": 30.0, "partial_unexpected_counts": [ { "value": 2.0, "count": 1 }, { "value": 4.0, "count": 1 }, ], "partial_unexpected_index_list": [5, 6], "partial_unexpected_list": [2.0, 4.0], "unexpected_count": 2, "unexpected_index_list": [5, 6], "unexpected_list": [2, 4], "unexpected_percent": ((2.0 / 7) * 100), "unexpected_percent_nonmissing": ((2.0 / 7) * 100), }, success=False, ) assert df.expect_column_values_to_be_odd( "mostly_odd") == ExpectationValidationResult( result={ "element_count": 10, "missing_count": 0, "missing_percent": 0, "partial_unexpected_counts": [ { "value": 2.0, "count": 1 }, { "value": 4.0, "count": 1 }, ], "partial_unexpected_index_list": [5, 6], "partial_unexpected_list": [2.0, 4.0], "unexpected_count": 2, "unexpected_index_list": [5, 6], "unexpected_list": [2, 4], "unexpected_percent": 20.0, "unexpected_percent_nonmissing": 20.0, }, success=False, ) assert df.expect_column_values_to_be_odd( "mostly_odd", mostly=0.6) == ExpectationValidationResult( result={ "element_count": 10, "missing_count": 0, "missing_percent": 0, "partial_unexpected_counts": [ { "value": 2.0, "count": 1 }, { "value": 4.0, "count": 1 }, ], "partial_unexpected_index_list": [5, 6], "partial_unexpected_list": [2.0, 4.0], "unexpected_count": 2, "unexpected_index_list": [5, 6], "unexpected_list": [2, 4], "unexpected_percent": 20.0, "unexpected_percent_nonmissing": 20.0, }, success=True, ) assert df.expect_column_values_to_be_odd( "mostly_odd", result_format="BOOLEAN_ONLY") == ExpectationValidationResult( success=False) df.default_expectation_args["result_format"] = "BOOLEAN_ONLY" assert df.expect_column_values_to_be_odd( "mostly_odd") == ExpectationValidationResult(success=False) df.default_expectation_args["result_format"] = "BASIC" assert df.expect_column_values_to_be_odd( "mostly_odd", include_config=True) == ExpectationValidationResult( expectation_config=ExpectationConfiguration( expectation_type="expect_column_values_to_be_odd", kwargs={ "column": "mostly_odd", "result_format": "BASIC" }, ), result={ "element_count": 10, "missing_count": 0, "missing_percent": 0, "partial_unexpected_list": [2, 4], "unexpected_count": 2, "unexpected_percent": 20.0, "unexpected_percent_nonmissing": 20.0, }, success=False, )
def test_file_format_map_output(): incomplete_file_path = file_relative_path( __file__, "../test_sets/toy_data_incomplete.csv") incomplete_file_dat = ge.data_asset.FileDataAsset(incomplete_file_path) null_file_path = file_relative_path(__file__, "../test_sets/null_file.csv") null_file_dat = ge.data_asset.FileDataAsset(null_file_path) white_space_path = file_relative_path(__file__, "../test_sets/white_space.txt") white_space_dat = ge.data_asset.FileDataAsset(white_space_path) # Boolean Expectation Output expectation = incomplete_file_dat.expect_file_line_regex_match_count_to_equal( regex=r",\S", expected_count=3, skip=1, result_format="BOOLEAN_ONLY", include_config=False, ) expected_result = ExpectationValidationResult(success=False) assert expected_result == expectation # Empty File Expectations expectation = null_file_dat.expect_file_line_regex_match_count_to_equal( regex=r",\S", expected_count=3, skip=1, result_format="BASIC", include_config=False, ) expected_result = ExpectationValidationResult( success=None, result={ "element_count": 0, "missing_count": 0, "missing_percent": None, "unexpected_count": 0, "unexpected_percent": None, "unexpected_percent_nonmissing": None, "partial_unexpected_list": [], }, ) assert expected_result == expectation # White Space File expectation = white_space_dat.expect_file_line_regex_match_count_to_equal( regex=r",\S", expected_count=3, result_format="BASIC", include_config=False) expected_result = ExpectationValidationResult( success=None, result={ "element_count": 0, "missing_count": 0, "missing_percent": None, "unexpected_count": 0, "unexpected_percent": None, "unexpected_percent_nonmissing": None, "unexpected_percent_total": None, "partial_unexpected_list": [], }, ) assert expected_result == expectation # Complete Result Format expectation = incomplete_file_dat.expect_file_line_regex_match_count_to_equal( regex=r",\S", expected_count=3, skip=1, result_format="COMPLETE", include_config=False, ) expected_result = ExpectationValidationResult( success=False, result={ "element_count": 9, "missing_count": 2, "missing_percent": ((2.0 / 9) * 100), "unexpected_count": 3, "unexpected_percent": ((3.0 / 7) * 100), "unexpected_percent_total": ((3.0 / 9) * 100), "unexpected_percent_nonmissing": ((3.0 / 7) * 100), "partial_unexpected_list": ["A,C,1\n", "B,1,4\n", "A,1,4\n"], "partial_unexpected_counts": [ { "value": "A,1,4\n", "count": 1 }, { "value": "A,C,1\n", "count": 1 }, { "value": "B,1,4\n", "count": 1 }, ], "partial_unexpected_index_list": [0, 3, 5], "unexpected_list": ["A,C,1\n", "B,1,4\n", "A,1,4\n"], "unexpected_index_list": [0, 3, 5], }, ) assert expected_result == expectation # Invalid Result Format with pytest.raises(ValueError): expectation = incomplete_file_dat.expect_file_line_regex_match_count_to_equal( regex=r",\S", expected_count=3, skip=1, result_format="JOKE", include_config=False, )
def test_validate(): with open( file_relative_path(__file__, "./test_sets/titanic_expectations.json") ) as f: my_expectation_suite = expectationSuiteSchema.loads(f.read()) with mock.patch("uuid.uuid1") as uuid: uuid.return_value = "1234" my_df = ge.read_csv( file_relative_path(__file__, "./test_sets/Titanic.csv"), expectation_suite=my_expectation_suite, ) my_df.set_default_expectation_argument("result_format", "COMPLETE") results = my_df.validate(catch_exceptions=False) with open( file_relative_path( __file__, "./test_sets/titanic_expected_data_asset_validate_results.json" ) ) as f: expected_results = expectationSuiteValidationResultSchema.loads(f.read()) del results.meta["great_expectations_version"] del results.meta["expectation_suite_meta"]["great_expectations_version"] assert results.to_json_dict() == expected_results.to_json_dict() # Now, change the results and ensure they are no longer equal results.results[0] = ExpectationValidationResult() assert results.to_json_dict() != expected_results.to_json_dict() # Finally, confirm that only_return_failures works # and does not affect the "statistics" field. validation_results = my_df.validate(only_return_failures=True) del validation_results.meta["great_expectations_version"] del validation_results.meta["expectation_suite_meta"]["great_expectations_version"] expected_results = ExpectationSuiteValidationResult( meta={ "expectation_suite_name": "titanic", "run_id": {"run_name": None, "run_time": "1955-11-05T00:00:00+00:00"}, "validation_time": "19551105T000000.000000Z", "batch_kwargs": {"ge_batch_id": "1234"}, "expectation_suite_meta": {}, "batch_markers": {}, "batch_parameters": {}, }, results=[ ExpectationValidationResult( expectation_config=ExpectationConfiguration( expectation_type="expect_column_values_to_be_in_set", kwargs={"column": "PClass", "value_set": ["1st", "2nd", "3rd"]}, ), success=False, exception_info={ "exception_message": None, "exception_traceback": None, "raised_exception": False, }, result={ "partial_unexpected_index_list": [456], "unexpected_count": 1, "unexpected_list": ["*"], "unexpected_percent": 0.07616146230007616, "element_count": 1313, "missing_percent": 0.0, "partial_unexpected_counts": [{"count": 1, "value": "*"}], "partial_unexpected_list": ["*"], "unexpected_percent_total": 0.07616146230007616, "unexpected_percent_nonmissing": 0.07616146230007616, "missing_count": 0, "unexpected_index_list": [456], }, ) ], success=expected_results.success, # unaffected statistics=expected_results["statistics"], # unaffected ) assert validation_results.to_json_dict() == expected_results.to_json_dict()
def test_ValidationResultsTableContentBlockRenderer_get_observed_value( evr_success): evr_no_result_key = ExpectationValidationResult( success=True, exception_info={ "raised_exception": False, "exception_message": None, "exception_traceback": None, }, expectation_config=ExpectationConfiguration( expectation_type="expect_table_row_count_to_be_between", kwargs={ "min_value": 0, "max_value": None, "result_format": "SUMMARY" }, ), ) evr_expect_column_values_to_not_be_null = ExpectationValidationResult( success=True, result={ "element_count": 1313, "unexpected_count": 1050, "unexpected_percent": 79.96953541508, "partial_unexpected_list": [], }, exception_info={ "raised_exception": False, "exception_message": None, "exception_traceback": None, }, expectation_config=ExpectationConfiguration( expectation_type="expect_column_values_to_not_be_null", kwargs={ "column": "Unnamed: 0", "mostly": 0.5, "result_format": "SUMMARY" }, ), ) evr_expect_column_values_to_be_null = ExpectationValidationResult( success=True, result={ "element_count": 1313, "unexpected_count": 0, "unexpected_percent": 0.0, "partial_unexpected_list": [], }, exception_info={ "raised_exception": False, "exception_message": None, "exception_traceback": None, }, expectation_config=ExpectationConfiguration( expectation_type="expect_column_values_to_be_null", kwargs={ "column": "Unnamed: 0", "mostly": 0.5, "result_format": "SUMMARY" }, ), ) # test _get_observed_value when evr.result["observed_value"] exists output_1 = get_renderer_impl( object_name=evr_success.expectation_config.expectation_type, renderer_type="renderer.diagnostic.observed_value", )[1](result=evr_success) assert output_1 == "1,313" # test _get_observed_value when evr.result does not exist output_2 = get_renderer_impl( object_name=evr_no_result_key.expectation_config.expectation_type, renderer_type="renderer.diagnostic.observed_value", )[1](result=evr_no_result_key) assert output_2 == "--" # test _get_observed_value for expect_column_values_to_not_be_null expectation type output_3 = get_renderer_impl( object_name=evr_expect_column_values_to_not_be_null.expectation_config. expectation_type, renderer_type="renderer.diagnostic.observed_value", )[1](result=evr_expect_column_values_to_not_be_null) assert output_3 == "≈20.03% not null" # test _get_observed_value for expect_column_values_to_be_null expectation type output_4 = get_renderer_impl( object_name=evr_expect_column_values_to_be_null.expectation_config. expectation_type, renderer_type="renderer.diagnostic.observed_value", )[1](result=evr_expect_column_values_to_be_null) assert output_4 == "100% null"
def test_test_column_aggregate_expectation_function(): asset = ge.dataset.PandasDataset({ "x": [1, 3, 5, 7, 9], "y": [1, 2, None, 7, 9], }) def expect_second_value_to_be( self, column, value, result_format=None, include_config=True, catch_exceptions=None, meta=None, ): return { "success": self[column].iloc[1] == value, "result": { "observed_value": self[column].iloc[1], }, } assert asset.test_column_aggregate_expectation_function( expect_second_value_to_be, "x", 2, include_config=False) == ExpectationValidationResult( result={ "observed_value": 3, "element_count": 5, "missing_count": 0, "missing_percent": 0.0, }, success=False, ) assert asset.test_column_aggregate_expectation_function( expect_second_value_to_be, column="x", value=3, include_config=False) == ExpectationValidationResult( result={ "observed_value": 3.0, "element_count": 5, "missing_count": 0, "missing_percent": 0.0, }, success=True, ) assert asset.test_column_aggregate_expectation_function( expect_second_value_to_be, "y", value=3, result_format="BOOLEAN_ONLY", include_config=False, ) == ExpectationValidationResult(success=False) assert asset.test_column_aggregate_expectation_function( expect_second_value_to_be, "y", 2, result_format="BOOLEAN_ONLY", include_config=False, ) == ExpectationValidationResult(success=True)
def test_evaluation_parameter_store_methods( data_context_parameterized_expectation_suite: DataContext, ): run_id = RunIdentifier(run_name="20191125T000000.000000Z") source_patient_data_results = ExpectationSuiteValidationResult( meta={ "expectation_suite_name": "source_patient_data.default", "run_id": run_id, }, results=[ ExpectationValidationResult( expectation_config=ExpectationConfiguration( expectation_type="expect_table_row_count_to_equal", kwargs={ "value": 1024, }, ), success=True, exception_info={ "exception_message": None, "exception_traceback": None, "raised_exception": False, }, result={ "observed_value": 1024, "element_count": 1024, "missing_percent": 0.0, "missing_count": 0, }, ) ], success=True, ) data_context_parameterized_expectation_suite.store_evaluation_parameters( source_patient_data_results) bound_parameters = data_context_parameterized_expectation_suite.evaluation_parameter_store.get_bind_params( run_id) assert bound_parameters == { "urn:great_expectations:validations:source_patient_data.default:expect_table_row_count_to_equal.result" ".observed_value": 1024 } source_diabetes_data_results = ExpectationSuiteValidationResult( meta={ "expectation_suite_name": "source_diabetes_data.default", "run_id": run_id, }, results=[ ExpectationValidationResult( expectation_config=ExpectationConfiguration( expectation_type= "expect_column_unique_value_count_to_be_between", kwargs={ "column": "patient_nbr", "min": 2048, "max": 2048 }, ), success=True, exception_info={ "exception_message": None, "exception_traceback": None, "raised_exception": False, }, result={ "observed_value": 2048, "element_count": 5000, "missing_percent": 0.0, "missing_count": 0, }, ) ], success=True, ) data_context_parameterized_expectation_suite.store_evaluation_parameters( source_diabetes_data_results) bound_parameters = data_context_parameterized_expectation_suite.evaluation_parameter_store.get_bind_params( run_id) assert bound_parameters == { "urn:great_expectations:validations:source_patient_data.default:expect_table_row_count_to_equal.result" ".observed_value": 1024, "urn:great_expectations:validations:source_diabetes_data.default" ":expect_column_unique_value_count_to_be_between.result.observed_value:column=patient_nbr": 2048, }
def test_StoreMetricsAction_column_metric( basic_in_memory_data_context_for_validation_operator, ): action = StoreMetricsAction( data_context=basic_in_memory_data_context_for_validation_operator, requested_metrics={ "*": [ { "column": { "provider_id": [ "expect_column_values_to_be_unique.result.unexpected_count" ] } }, "statistics.evaluated_expectations", "statistics.successful_expectations", ] }, target_store_name="metrics_store", ) run_id = RunIdentifier(run_name="bar") validation_result = ExpectationSuiteValidationResult( success=False, meta={"expectation_suite_name": "foo", "run_id": run_id}, results=[ ExpectationValidationResult( meta={}, result={ "element_count": 10, "missing_count": 0, "missing_percent": 0.0, "unexpected_count": 7, "unexpected_percent": 0.0, "unexpected_percent_nonmissing": 0.0, "partial_unexpected_list": [], }, success=True, expectation_config=ExpectationConfiguration( expectation_type="expect_column_values_to_be_unique", kwargs={"column": "provider_id", "result_format": "BASIC"}, ), exception_info=None, ) ], statistics={"evaluated_expectations": 5, "successful_expectations": 3}, ) action.run( validation_result, ValidationResultIdentifier.from_object(validation_result), data_asset=None, ) assert ( basic_in_memory_data_context_for_validation_operator.stores[ "metrics_store" ].get( ValidationMetricIdentifier( run_id=run_id, data_asset_name=None, expectation_suite_identifier=ExpectationSuiteIdentifier("foo"), metric_name="expect_column_values_to_be_unique.result.unexpected_count", metric_kwargs_id="column=provider_id", ) ) == 7 )
def test_ValidationResultsTableContentBlockRenderer_get_unexpected_statement( evr_success, evr_failed): evr_no_result = ExpectationValidationResult( success=True, exception_info={ "raised_exception": False, "exception_message": None, "exception_traceback": None, }, expectation_config=ExpectationConfiguration( expectation_type="expect_table_row_count_to_be_between", kwargs={ "min_value": 0, "max_value": None, "result_format": "SUMMARY" }, ), ) evr_failed_no_unexpected_count = ExpectationValidationResult( success=False, result={ "element_count": 1313, "missing_count": 0, "missing_percent": 0.0, "unexpected_percent": 0.2284843869002285, "unexpected_percent_nonmissing": 0.2284843869002285, "partial_unexpected_list": [ "Daly, Mr Peter Denis ", "Barber, Ms ", "Geiger, Miss Emily ", ], "partial_unexpected_index_list": [77, 289, 303], "partial_unexpected_counts": [ { "value": "Barber, Ms ", "count": 1 }, { "value": "Daly, Mr Peter Denis ", "count": 1 }, { "value": "Geiger, Miss Emily ", "count": 1 }, ], }, exception_info={ "raised_exception": False, "exception_message": None, "exception_traceback": None, }, expectation_config=ExpectationConfiguration( expectation_type="expect_column_values_to_not_match_regex", kwargs={ "column": "Name", "regex": "^\\s+|\\s+$", "result_format": "SUMMARY", }, ), ) # test for succeeded evr output_1 = get_renderer_impl( object_name=evr_success.expectation_config.expectation_type, renderer_type="renderer.diagnostic.unexpected_statement", )[1](result=evr_success) assert output_1 == [] # test for failed evr output_2 = get_renderer_impl( object_name=evr_failed.expectation_config.expectation_type, renderer_type="renderer.diagnostic.unexpected_statement", )[1](result=evr_failed) assert output_2 == [ RenderedStringTemplateContent( **{ "content_block_type": "string_template", "string_template": { "template": "\n\n$unexpected_count unexpected values found. $unexpected_percent of $element_count total rows.", "params": { "unexpected_count": "3", "unexpected_percent": "≈0.2285%", "element_count": "1,313", }, "tag": "strong", "styling": { "classes": ["text-danger"] }, }, }) ] # test for evr with no "result" key output_3 = get_renderer_impl( object_name=evr_no_result.expectation_config.expectation_type, renderer_type="renderer.diagnostic.unexpected_statement", )[1](result=evr_no_result) print(json.dumps(output_3, indent=2)) assert output_3 == [] # test for evr with no unexpected count output_4 = get_renderer_impl( object_name=evr_failed_no_unexpected_count.expectation_config. expectation_type, renderer_type="renderer.diagnostic.unexpected_statement", )[1](result=evr_failed_no_unexpected_count) print(output_4) assert output_4 == [] # test for evr with exception evr_failed_exception = ExpectationValidationResult( success=False, exception_info={ "raised_exception": True, "exception_message": "Unrecognized column: not_a_real_column", "exception_traceback": "Traceback (most recent call last):\n...more_traceback...", }, expectation_config=ExpectationConfiguration( expectation_type="expect_column_values_to_not_match_regex", kwargs={ "column": "Name", "regex": "^\\s+|\\s+$", "result_format": "SUMMARY", }, ), ) output_5 = get_renderer_impl( object_name=evr_failed_exception.expectation_config.expectation_type, renderer_type="renderer.diagnostic.unexpected_statement", )[1](result=evr_failed_exception) output_5 = [content.to_json_dict() for content in output_5] expected_output_5 = [ { "content_block_type": "string_template", "string_template": { "template": "\n\n$expectation_type raised an exception:\n$exception_message", "params": { "expectation_type": "expect_column_values_to_not_match_regex", "exception_message": "Unrecognized column: not_a_real_column", }, "tag": "strong", "styling": { "classes": ["text-danger"], "params": { "exception_message": { "tag": "code" }, "expectation_type": { "classes": ["badge", "badge-danger", "mb-2"] }, }, }, }, }, { "content_block_type": "collapse", "collapse_toggle_link": "Show exception traceback...", "collapse": [{ "content_block_type": "string_template", "string_template": { "template": "Traceback (most recent call last):\n...more_traceback...", "tag": "code", }, }], "inline_link": False, }, ] assert output_5 == expected_output_5
def test_ValidationResultsTableContentBlockRenderer_get_unexpected_table( evr_success): evr_failed_no_result = ExpectationValidationResult( success=False, exception_info={ "raised_exception": False, "exception_message": None, "exception_traceback": None, }, expectation_config=ExpectationConfiguration( expectation_type="expect_column_values_to_be_in_set", kwargs={ "column": "Unnamed: 0", "value_set": [], "result_format": "SUMMARY", }, ), ) evr_failed_no_unexpected_list_or_counts = ExpectationValidationResult( success=False, result={ "element_count": 1313, "missing_count": 0, "missing_percent": 0.0, "unexpected_count": 1313, "unexpected_percent": 100.0, "unexpected_percent_nonmissing": 100.0, }, exception_info={ "raised_exception": False, "exception_message": None, "exception_traceback": None, }, expectation_config=ExpectationConfiguration( expectation_type="expect_column_values_to_be_in_set", kwargs={ "column": "Unnamed: 0", "value_set": [], "result_format": "SUMMARY", }, ), ) evr_failed_partial_unexpected_list = ExpectationValidationResult( success=False, result={ "element_count": 1313, "missing_count": 0, "missing_percent": 0.0, "unexpected_count": 1313, "unexpected_percent": 100.0, "unexpected_percent_nonmissing": 100.0, "partial_unexpected_list": [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ], }, exception_info={ "raised_exception": False, "exception_message": None, "exception_traceback": None, }, expectation_config=ExpectationConfiguration( expectation_type="expect_column_values_to_be_in_set", kwargs={ "column": "Unnamed: 0", "value_set": [], "result_format": "SUMMARY", }, ), ) evr_failed_partial_unexpected_counts = ExpectationValidationResult( success=False, result={ "element_count": 1313, "missing_count": 0, "missing_percent": 0.0, "unexpected_count": 1313, "unexpected_percent": 100.0, "unexpected_percent_nonmissing": 100.0, "partial_unexpected_list": [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ], "partial_unexpected_index_list": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, ], "partial_unexpected_counts": [ { "value": 1, "count": 1 }, { "value": 2, "count": 1 }, { "value": 3, "count": 1 }, { "value": 4, "count": 1 }, { "value": 5, "count": 1 }, { "value": 6, "count": 1 }, { "value": 7, "count": 1 }, { "value": 8, "count": 1 }, { "value": 9, "count": 1 }, { "value": 10, "count": 1 }, { "value": 11, "count": 1 }, { "value": 12, "count": 1 }, { "value": 13, "count": 1 }, { "value": 14, "count": 1 }, { "value": 15, "count": 1 }, { "value": 16, "count": 1 }, { "value": 17, "count": 1 }, { "value": 18, "count": 1 }, { "value": 19, "count": 1 }, { "value": 20, "count": 1 }, ], }, exception_info={ "raised_exception": False, "exception_message": None, "exception_traceback": None, }, expectation_config=ExpectationConfiguration( expectation_type="expect_column_values_to_be_in_set", kwargs={ "column": "Unnamed: 0", "value_set": [], "result_format": "SUMMARY", }, ), ) # test for succeeded evr output_1 = get_renderer_impl( object_name=evr_success.expectation_config.expectation_type, renderer_type="renderer.diagnostic.unexpected_table", )[1](result=evr_success) assert output_1 is None # test for failed evr with no "result" key output_2 = get_renderer_impl( object_name=evr_failed_no_result.expectation_config.expectation_type, renderer_type="renderer.diagnostic.unexpected_table", )[1](result=evr_failed_no_result) assert output_2 is None # test for failed evr with no unexpected list or unexpected counts output_3 = get_renderer_impl( object_name=evr_failed_no_unexpected_list_or_counts.expectation_config. expectation_type, renderer_type="renderer.diagnostic.unexpected_table", )[1](result=evr_failed_no_unexpected_list_or_counts) assert output_3 is None # test for failed evr with partial unexpected list output_4 = get_renderer_impl( object_name=evr_failed_partial_unexpected_list.expectation_config. expectation_type, renderer_type="renderer.diagnostic.unexpected_table", )[1](result=evr_failed_partial_unexpected_list) assert output_4.to_json_dict() == { "content_block_type": "table", "table": [ [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], ], "header_row": ["Sampled Unexpected Values"], "styling": { "body": { "classes": ["table-bordered", "table-sm", "mt-3"] } }, } # test for failed evr with partial unexpected counts output_5 = get_renderer_impl( object_name=evr_failed_partial_unexpected_counts.expectation_config. expectation_type, renderer_type="renderer.diagnostic.unexpected_table", )[1](result=evr_failed_partial_unexpected_counts) assert output_5.to_json_dict() == { "content_block_type": "table", "table": [ [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], ], "header_row": ["Sampled Unexpected Values"], "styling": { "body": { "classes": ["table-bordered", "table-sm", "mt-3"] } }, }
def test_column_pair_map_expectation_decorator(): # Create a new CustomPandasDataset to # (1) Demonstrate that custom subclassing works, and # (2) Test expectation business logic without dependencies on any other functions. class CustomPandasDataset(PandasDataset): @PandasDataset.column_pair_map_expectation def expect_column_pair_values_to_be_different( self, column_A, column_B, keep_missing="either", output_format=None, include_config=True, catch_exceptions=None, ): return column_A != column_B df = CustomPandasDataset({ "all_odd": [1, 3, 5, 7, 9], "all_even": [2, 4, 6, 8, 10], "odd_missing": [1, 3, 5, None, None], "mixed_missing": [1, 2, None, None, 6], "mixed_missing_2": [1, 3, None, None, 6], "all_missing": [ None, None, None, None, None, ], }) df.set_default_expectation_argument("result_format", "COMPLETE") df.set_default_expectation_argument("include_config", False) assert df.expect_column_pair_values_to_be_different( "all_odd", "all_even") == ExpectationValidationResult( success=True, result={ "element_count": 5, "missing_count": 0, "unexpected_count": 0, "missing_percent": 0.0, "unexpected_percent": 0.0, "unexpected_percent_nonmissing": 0.0, "unexpected_list": [], "unexpected_index_list": [], "partial_unexpected_list": [], "partial_unexpected_index_list": [], "partial_unexpected_counts": [], }, ) assert df.expect_column_pair_values_to_be_different( "all_odd", "all_even", ignore_row_if="both_values_are_missing", ) == ExpectationValidationResult( success=True, result={ "element_count": 5, "missing_count": 0, "unexpected_count": 0, "missing_percent": 0.0, "unexpected_percent": 0.0, "unexpected_percent_nonmissing": 0.0, "unexpected_list": [], "unexpected_index_list": [], "partial_unexpected_list": [], "partial_unexpected_index_list": [], "partial_unexpected_counts": [], }, ) assert df.expect_column_pair_values_to_be_different( "all_odd", "odd_missing") == ExpectationValidationResult( success=False, result={ "element_count": 5, "missing_count": 0, "unexpected_count": 3, "missing_percent": 0.0, "unexpected_percent": 60.0, "unexpected_percent_nonmissing": 60.0, "unexpected_list": [(1, 1.0), (3, 3.0), (5, 5.0)], "unexpected_index_list": [0, 1, 2], "partial_unexpected_list": [(1, 1.0), (3, 3.0), (5, 5.0)], "partial_unexpected_index_list": [0, 1, 2], "partial_unexpected_counts": [ { "count": 1, "value": (1, 1.0) }, { "count": 1, "value": (3, 3.0) }, { "count": 1, "value": (5, 5.0) }, ], }, ) assert df.expect_column_pair_values_to_be_different( "all_odd", "odd_missing", ignore_row_if="both_values_are_missing" ) == ExpectationValidationResult( success=False, result={ "element_count": 5, "missing_count": 0, "unexpected_count": 3, "missing_percent": 0.0, "unexpected_percent": 60.0, "unexpected_percent_nonmissing": 60.0, "unexpected_list": [(1, 1.0), (3, 3.0), (5, 5.0)], "unexpected_index_list": [0, 1, 2], "partial_unexpected_list": [(1, 1.0), (3, 3.0), (5, 5.0)], "partial_unexpected_index_list": [0, 1, 2], "partial_unexpected_counts": [ { "count": 1, "value": (1, 1.0) }, { "count": 1, "value": (3, 3.0) }, { "count": 1, "value": (5, 5.0) }, ], }, ) assert df.expect_column_pair_values_to_be_different( "all_odd", "odd_missing", ignore_row_if="either_value_is_missing" ) == ExpectationValidationResult( success=False, result={ "element_count": 5, "missing_count": 2, "unexpected_count": 3, "missing_percent": 40.0, "unexpected_percent": 100.0, "unexpected_percent_nonmissing": 100.0, "unexpected_list": [(1, 1.0), (3, 3.0), (5, 5.0)], "unexpected_index_list": [0, 1, 2], "partial_unexpected_list": [(1, 1.0), (3, 3.0), (5, 5.0)], "partial_unexpected_index_list": [0, 1, 2], "partial_unexpected_counts": [ { "count": 1, "value": (1, 1.0) }, { "count": 1, "value": (3, 3.0) }, { "count": 1, "value": (5, 5.0) }, ], }, ) with pytest.raises(ValueError): df.expect_column_pair_values_to_be_different( "all_odd", "odd_missing", ignore_row_if="blahblahblah") # Test SUMMARY, BASIC, and BOOLEAN_ONLY output_formats assert df.expect_column_pair_values_to_be_different( "all_odd", "all_even", result_format="SUMMARY") == ExpectationValidationResult( success=True, result={ "element_count": 5, "missing_count": 0, "unexpected_count": 0, "missing_percent": 0.0, "unexpected_percent": 0.0, "unexpected_percent_nonmissing": 0.0, "partial_unexpected_list": [], "partial_unexpected_index_list": [], "partial_unexpected_counts": [], }, ) assert df.expect_column_pair_values_to_be_different( "all_odd", "all_even", result_format="BASIC") == ExpectationValidationResult( success=True, result={ "element_count": 5, "missing_count": 0, "unexpected_count": 0, "missing_percent": 0.0, "unexpected_percent": 0.0, "unexpected_percent_nonmissing": 0.0, "partial_unexpected_list": [], }, ) assert df.expect_column_pair_values_to_be_different( "all_odd", "all_even", result_format="BOOLEAN_ONLY") == ExpectationValidationResult( success=True)
def test_column_aggregate_expectation_decorator(): # Create a new CustomPandasDataset to # (1) demonstrate that custom subclassing works, and # (2) Test expectation business logic without dependencies on any other functions. class CustomPandasDataset(PandasDataset): @PandasDataset.column_aggregate_expectation def expect_column_median_to_be_odd(self, column): median = self.get_column_median(column) return { "success": median % 2, "result": { "observed_value": median } } df = CustomPandasDataset({ "all_odd": [1, 3, 5, 7, 9], "all_even": [2, 4, 6, 8, 10], "odd_missing": [1, 3, 5, None, None], "mixed_missing": [1, 2, None, None, 6], "mixed_missing_2": [1, 3, None, None, 6], "all_missing": [ None, None, None, None, None, ], }) df.set_default_expectation_argument("result_format", "COMPLETE") df.set_default_expectation_argument("include_config", False) assert df.expect_column_median_to_be_odd( "all_odd") == ExpectationValidationResult( result={ "observed_value": 5.0, "element_count": 5, "missing_count": None, "missing_percent": None, }, success=True, ) assert df.expect_column_median_to_be_odd( "all_even") == ExpectationValidationResult( result={ "observed_value": 6, "element_count": 5, "missing_count": None, "missing_percent": None, }, success=False, ) assert df.expect_column_median_to_be_odd( "all_even", result_format="SUMMARY") == ExpectationValidationResult( result={ "observed_value": 6.0, "element_count": 5, "missing_count": None, "missing_percent": None, }, success=False, ) assert df.expect_column_median_to_be_odd( "all_even", result_format="BOOLEAN_ONLY") == ExpectationValidationResult( success=False) df.default_expectation_args["result_format"] = "BOOLEAN_ONLY" assert df.expect_column_median_to_be_odd( "all_even") == ExpectationValidationResult(success=False) assert df.expect_column_median_to_be_odd( "all_even", result_format="BASIC") == ExpectationValidationResult( result={ "observed_value": 6.0, "element_count": 5, "missing_count": None, "missing_percent": None, }, success=False, )