def test_test_column_aggregate_expectation_function(): asset = ge.dataset.PandasDataset({ 'x': [1, 3, 5, 7, 9], 'y': [1, 2, None, 7, 9], }) def expect_second_value_to_be(self, column, value, result_format=None, include_config=True, catch_exceptions=None, meta=None): return { "success": self[column].iloc[1] == value, "result": { "observed_value": self[column].iloc[1], } } assert asset.test_column_aggregate_expectation_function(expect_second_value_to_be, 'x', 2, include_config=False) \ == \ ExpectationValidationResult( result={'observed_value': 3, 'element_count': 5, 'missing_count': 0, 'missing_percent': 0.0}, success=False ) assert asset.test_column_aggregate_expectation_function(expect_second_value_to_be, column='x', value=3, include_config=False) == \ ExpectationValidationResult( result={'observed_value': 3.0, 'element_count': 5, 'missing_count': 0, 'missing_percent': 0.0}, success=True ) assert asset.test_column_aggregate_expectation_function( expect_second_value_to_be, 'y', value=3, result_format="BOOLEAN_ONLY", include_config=False) == \ ExpectationValidationResult(success=False) assert asset.test_column_aggregate_expectation_function( expect_second_value_to_be, 'y', 2, result_format="BOOLEAN_ONLY", include_config=False) == ExpectationValidationResult(success=True)
def test_stats_mixed_expectations(): expectation_results = [ ExpectationValidationResult(success=False), ExpectationValidationResult(success=True), ] actual = _calc_validation_statistics(expectation_results) expected = ValidationStatistics(2, 1, 1, 50.0, False) assert expected == actual
def test_test_expectation_function(): asset = ge.dataset.PandasDataset({"x": [1, 3, 5, 7, 9], "y": [1, 2, None, 7, 9],}) asset_2 = ge.dataset.PandasDataset({"x": [1, 3, 5, 6, 9], "y": [1, 2, None, 6, 9],}) def expect_dataframe_to_contain_7(self): return {"success": bool((self == 7).sum().sum() > 0)} assert asset.test_expectation_function( expect_dataframe_to_contain_7, include_config=False ) == ExpectationValidationResult(success=True) assert asset_2.test_expectation_function( expect_dataframe_to_contain_7, include_config=False ) == ExpectationValidationResult(success=False)
def test_stats_no_successful_expectations(): expectation_results = [ExpectationValidationResult(success=False)] actual = _calc_validation_statistics(expectation_results) expected = ValidationStatistics(1, 0, 1, 0., False) assert expected == actual expectation_results = [ ExpectationValidationResult(success=False), ExpectationValidationResult(success=False), ExpectationValidationResult(success=False) ] actual = _calc_validation_statistics(expectation_results) expected = ValidationStatistics(3, 0, 3, 0., False) assert expected == actual
def test_stats_all_successful_expectations(): expectation_results = [ ExpectationValidationResult(success=True), ] actual = _calc_validation_statistics(expectation_results) expected = ValidationStatistics(1, 1, 0, 100.0, True) assert expected == actual expectation_results = [ ExpectationValidationResult(success=True), ExpectationValidationResult(success=True), ExpectationValidationResult(success=True) ] actual = _calc_validation_statistics(expectation_results) expected = ValidationStatistics(3, 3, 0, 100.0, True) assert expected == actual
def test_StoreMetricsAction_column_metric(basic_in_memory_data_context_for_validation_operator): action = StoreMetricsAction( data_context=basic_in_memory_data_context_for_validation_operator, requested_metrics={ "*": [ { "column": { "provider_id": ["expect_column_values_to_be_unique.result.unexpected_count"] } }, "statistics.evaluated_expectations", "statistics.successful_expectations" ] }, target_store_name="metrics_store" ) validation_result = ExpectationSuiteValidationResult( success=False, meta={ "expectation_suite_name": "foo", "run_id": "bar" }, results=[ ExpectationValidationResult( meta={}, result={ "element_count": 10, "missing_count": 0, "missing_percent": 0.0, "unexpected_count": 7, "unexpected_percent": 0.0, "unexpected_percent_nonmissing": 0.0, "partial_unexpected_list": [] }, success=True, expectation_config=ExpectationConfiguration( expectation_type="expect_column_values_to_be_unique", kwargs={ "column": "provider_id", "result_format": "BASIC" } ), exception_info=None ) ], statistics={ "evaluated_expectations": 5, "successful_expectations": 3 } ) action.run(validation_result, ValidationResultIdentifier.from_object(validation_result), data_asset=None) assert basic_in_memory_data_context_for_validation_operator.stores["metrics_store"].get(ValidationMetricIdentifier( run_id="bar", expectation_suite_identifier=ExpectationSuiteIdentifier("foo"), metric_name="expect_column_values_to_be_unique.result.unexpected_count", metric_kwargs_id="column=provider_id" )) == 7
def test_ProfilingResultsOverviewSectionRenderer_empty_type_list(): # This rather specific test is a reaction to the error documented in #679 validation = ExpectationSuiteValidationResult( results=[ ExpectationValidationResult( success=True, result={ 'observed_value': "VARIANT", # Note this is NOT a recognized type by many backends }, exception_info={ 'raised_exception': False, 'exception_message': None, 'exception_traceback': None }, expectation_config=ExpectationConfiguration( expectation_type='expect_column_values_to_be_in_type_list', kwargs={ 'column': 'live', 'type_list': None, 'result_format': 'SUMMARY' }, meta={'BasicDatasetProfiler': {'confidence': 'very low'}} ) ) ] ) result = ProfilingResultsOverviewSectionRenderer().render(validation) # Find the variable types content block: types_table = [ block.table for block in result.content_blocks if block.content_block_type == "table" and block.header.string_template["template"] == "Variable types" ][0] assert ["unknown", "1"] in types_table
def test_test_column_map_expectation_function(): asset = ge.dataset.PandasDataset({ "x": [1, 3, 5, 7, 9], "y": [1, 2, None, 7, 9], }) def is_odd( self, column, mostly=None, result_format=None, include_config=True, catch_exceptions=None, meta=None, ): return column % 2 == 1 assert asset.test_column_map_expectation_function( is_odd, column="x", include_config=False) == ExpectationValidationResult( result={ "element_count": 5, "missing_count": 0, "missing_percent": 0, "unexpected_percent": 0.0, "partial_unexpected_list": [], "unexpected_percent_nonmissing": 0.0, "unexpected_count": 0, }, success=True, ) assert asset.test_column_map_expectation_function( is_odd, "x", result_format="BOOLEAN_ONLY", include_config=False) == ExpectationValidationResult(success=True) assert asset.test_column_map_expectation_function( is_odd, column="y", result_format="BOOLEAN_ONLY", include_config=False) == ExpectationValidationResult(success=False) assert asset.test_column_map_expectation_function( is_odd, column="y", result_format="BOOLEAN_ONLY", mostly=0.7, include_config=False, ) == ExpectationValidationResult(success=True)
def test_ValidationResultsColumnSectionRenderer_render_header_evr_with_unescaped_dollar_sign( titanic_profiled_name_column_evrs): evr_with_unescaped_dollar_sign = ExpectationValidationResult( success=True, result={ 'element_count': 1313, 'missing_count': 0, 'missing_percent': 0.0, 'unexpected_count': 0, 'unexpected_percent': 0.0, 'unexpected_percent_nonmissing': 0.0, 'partial_unexpected_list': [], 'partial_unexpected_index_list': [], 'partial_unexpected_counts': [] }, exception_info={ 'raised_exception': False, 'exception_message': None, 'exception_traceback': None }, expectation_config=ExpectationConfiguration( expectation_type='expect_column_values_to_be_in_type_list', kwargs={ 'column': 'Name ($)', 'type_list': ['CHAR', 'StringType', 'TEXT', 'VARCHAR', 'str', 'string'], 'result_format': 'SUMMARY' })) remaining_evrs, content_block = ValidationResultsColumnSectionRenderer._render_header( validation_results=[evr_with_unescaped_dollar_sign], ) print(content_block.to_json_dict()) assert content_block.to_json_dict() == { 'content_block_type': 'header', 'styling': { 'classes': ['col-12', 'p-0'], 'header': { 'classes': ['alert', 'alert-secondary'] } }, 'header': { 'content_block_type': 'string_template', 'string_template': { 'template': 'Name ($$)', 'tag': 'h5', 'styling': { 'classes': ['m-0'] } } } }
def test_test_column_map_expectation_function(): asset = ge.dataset.PandasDataset({ 'x': [1, 3, 5, 7, 9], 'y': [1, 2, None, 7, 9], }) def is_odd(self, column, mostly=None, result_format=None, include_config=True, catch_exceptions=None, meta=None): return column % 2 == 1 assert asset.test_column_map_expectation_function( is_odd, column='x', include_config=False) == ExpectationValidationResult(result={ 'element_count': 5, 'missing_count': 0, 'missing_percent': 0, 'unexpected_percent': 0.0, 'partial_unexpected_list': [], 'unexpected_percent_nonmissing': 0.0, 'unexpected_count': 0 }, success=True) assert asset.test_column_map_expectation_function(is_odd, 'x', result_format="BOOLEAN_ONLY", include_config=False) == \ ExpectationValidationResult(success=True) assert asset.test_column_map_expectation_function(is_odd, column='y', result_format="BOOLEAN_ONLY", include_config=False) == \ ExpectationValidationResult(success=False) assert asset.test_column_map_expectation_function(is_odd, column='y', result_format="BOOLEAN_ONLY", mostly=.7, include_config=False) == \ ExpectationValidationResult(success=True)
def test__find_evr_by_type(titanic_profiled_evrs_1): # TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite.results found_evr = Renderer()._find_evr_by_type(titanic_profiled_evrs_1.results, "expect_column_to_exist") print(found_evr) assert found_evr is None # TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite.results found_evr = Renderer()._find_evr_by_type( titanic_profiled_evrs_1.results, "expect_column_distinct_values_to_be_in_set") print(found_evr) assert found_evr == ExpectationValidationResult( success=True, result={ "observed_value": ["*", "1st", "2nd", "3rd"], "element_count": 1313, "missing_count": 0, "missing_percent": 0.0, "details": { "value_counts": [ { "value": "*", "count": 1 }, { "value": "1st", "count": 322 }, { "value": "2nd", "count": 279 }, { "value": "3rd", "count": 711 }, ] }, }, exception_info={ "raised_exception": False, "exception_message": None, "exception_traceback": None, }, expectation_config=ExpectationConfiguration( expectation_type="expect_column_distinct_values_to_be_in_set", kwargs={ "column": "PClass", "value_set": None, "result_format": "SUMMARY" }, ), )
def test_ProfilingResultsColumnSectionRenderer_render_header_with_unescaped_dollar_sign(titanic_profiled_name_column_evrs): evr_with_unescaped_dollar_sign = ExpectationValidationResult( success=True, result={ "observed_value": "float64" }, exception_info={ "raised_exception": False, "exception_message": None, "exception_traceback": None, }, expectation_config=ExpectationConfiguration( expectation_type="expect_column_values_to_be_in_type_list", kwargs={ "column": "Car Insurance Premiums ($)", "type_list": [ "DOUBLE_PRECISION", "DoubleType", "FLOAT", "FLOAT4", "FLOAT8", "FloatType", "NUMERIC", "float" ], "result_format": "SUMMARY" }, meta={ "BasicDatasetProfiler": { "confidence": "very low" } } ) ) content_block = ProfilingResultsColumnSectionRenderer._render_header( [evr_with_unescaped_dollar_sign], column_type=[], ).to_json_dict() print(content_block) assert content_block == { 'content_block_type': 'header', 'styling': {'classes': ['col-12', 'p-0'], 'header': { 'classes': ['alert', 'alert-secondary']}}, 'header': {'content_block_type': 'string_template', 'string_template': { 'template': 'Car Insurance Premiums ($$)', 'tooltip': {'content': 'expect_column_to_exist', 'placement': 'top'}, 'tag': 'h5', 'styling': {'classes': ['m-0', 'p-0']}}}, 'subheader': {'content_block_type': 'string_template', 'string_template': {'template': 'Type: []', 'tooltip': { 'content': 'expect_column_values_to_be_of_type <br>expect_column_values_to_be_in_type_list'}, 'tag': 'h6', 'styling': {'classes': ['mt-1', 'mb-0']}}}}
def test_expectation_decorator_meta(): metadata = {'meta_key': 'meta_value'} eds = ExpectationOnlyDataAsset() out = eds.no_op_value_expectation('a', meta=metadata) config = eds.get_expectation_suite() assert ExpectationValidationResult( success=True, meta=metadata, expectation_config=config.expectations[0]) == out assert ExpectationConfiguration(expectation_type='no_op_value_expectation', kwargs={'value': 'a'}, meta=metadata) == config.expectations[0]
def test_expectation_decorator_meta(): metadata = {"meta_key": "meta_value"} eds = ExpectationOnlyDataAsset() out = eds.no_op_value_expectation("a", meta=metadata) config = eds.get_expectation_suite() assert (ExpectationValidationResult( success=True, meta=metadata, expectation_config=config.expectations[0]) == out) assert (ExpectationConfiguration( expectation_type="no_op_value_expectation", kwargs={"value": "a"}, meta=metadata, ) == config.expectations[0])
def test_ValidationResultsTableContentBlockRenderer_generate_expectation_row_happy_path(): evr = ExpectationValidationResult( success=True, result={ 'observed_value': True, 'element_count': 162, 'missing_count': 153, 'missing_percent': 94.44444444444444 }, exception_info={ 'raised_exception': False, 'exception_message': None, 'exception_traceback': None }, expectation_config=ExpectationConfiguration( expectation_type='expect_column_min_to_be_between', kwargs={ 'column': 'live', 'min_value': None, 'max_value': None, 'result_format': 'SUMMARY' }, meta={'BasicDatasetProfiler': {'confidence': 'very low'}} ) ) result = ValidationResultsTableContentBlockRenderer.render([evr]).to_json_dict() print(result) # Note: A better approach to testing would separate out styling into a separate test. assert result == { 'content_block_type': 'table', 'styling': { 'body': {'classes': ['table']}, 'classes': ['ml-2', 'mr-2', 'mt-0', 'mb-0', 'table-responsive', 'hide-succeeded-validations-column-section-target-child']}, 'table': [[{'content_block_type': 'string_template', 'styling': {'parent': {'classes': ['hide-succeeded-validation-target-child']}}, 'string_template': {'template': '$icon', 'params': {'icon': ''}, 'styling': { 'params': {'icon': {'classes': ['fas', 'fa-check-circle', 'text-success'], 'tag': 'i'}}}}}, {'content_block_type': 'string_template', 'string_template': { 'template': '$column minimum value may have any numerical value.', 'params': {"column": "live", "min_value": None, "max_value": None, "result_format": "SUMMARY", "parse_strings_as_datetimes": None}, 'styling': {'default': { 'classes': ['badge', 'badge-secondary']}, 'params': {'column': { 'classes': ['badge', 'badge-primary']}}}}}, 'True']], 'header_row': ['Status', 'Expectation', 'Observed Value']}
def expected_evr_without_unexpected_rows(): return ExpectationValidationResult( success=False, expectation_config={ "expectation_type": "expect_column_values_to_be_in_set", "kwargs": { "column": "a", "value_set": [1, 5, 22], }, "meta": {}, }, result={ "element_count": 6, "unexpected_count": 2, "unexpected_index_list": [3, 5], "unexpected_percent": 33.33333333333333, "partial_unexpected_list": [3, 10], "unexpected_list": [3, 10], "partial_unexpected_index_list": [3, 5], "partial_unexpected_counts": [ { "value": 3, "count": 1 }, { "value": 10, "count": 1 }, ], "missing_count": 0, "missing_percent": 0.0, "unexpected_percent_total": 33.33333333333333, "unexpected_percent_nonmissing": 33.33333333333333, }, exception_info={ "raised_exception": False, "exception_traceback": None, "exception_message": None, }, meta={}, )
def test__find_evr_by_type(titanic_profiled_evrs_1): #TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite.results found_evr = Renderer()._find_evr_by_type(titanic_profiled_evrs_1.results, "expect_column_to_exist") print(found_evr) assert found_evr is None #TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite.results found_evr = Renderer()._find_evr_by_type( titanic_profiled_evrs_1.results, "expect_column_distinct_values_to_be_in_set") print(found_evr) assert found_evr == ExpectationValidationResult( success=True, result={ 'observed_value': ['*', '1st', '2nd', '3rd'], 'element_count': 1313, 'missing_count': 0, 'missing_percent': 0.0, 'details': { 'value_counts': [{ 'value': '*', 'count': 1 }, { 'value': '1st', 'count': 322 }, { 'value': '2nd', 'count': 279 }, { 'value': '3rd', 'count': 711 }] } }, exception_info={ 'raised_exception': False, 'exception_message': None, 'exception_traceback': None }, expectation_config=ExpectationConfiguration( expectation_type='expect_column_distinct_values_to_be_in_set', kwargs={ 'column': 'PClass', 'value_set': None, 'result_format': 'SUMMARY' }))
def _get_diagnostic_rendered_content( update_dict: Dict[str, Union[dict, ExpectationConfiguration]], ) -> RenderedAtomicContent: # Overwrite any fields passed in from test and instantiate ExpectationValidationResult evr_kwargs.update(update_dict) evr = ExpectationValidationResult(**evr_kwargs) expectation_config = evr_kwargs["expectation_config"] expectation_type = expectation_config["expectation_type"] # Programatically determine the renderer implementations renderer_impl = get_renderer_impl( object_name=expectation_type, renderer_type="atomic.diagnostic.observed_value", )[1] # Determine RenderedAtomicContent output source_obj = {"result": evr} res = renderer_impl(**source_obj) return res
def test_column_map_expectation_decorator(): # Create a new CustomPandasDataset to # (1) demonstrate that custom subclassing works, and # (2) Test expectation business logic without dependencies on any other functions. class CustomPandasDataset(PandasDataset): @MetaPandasDataset.column_map_expectation def expect_column_values_to_be_odd(self, column): return column.map(lambda x: x % 2) @MetaPandasDataset.column_map_expectation def expectation_that_crashes_on_sixes(self, column): return column.map(lambda x: (x - 6) / 0 != "duck") df = CustomPandasDataset({ 'all_odd': [1, 3, 5, 5, 5, 7, 9, 9, 9, 11], 'mostly_odd': [1, 3, 5, 7, 9, 2, 4, 1, 3, 5], 'all_even': [2, 4, 4, 6, 6, 6, 8, 8, 8, 8], 'odd_missing': [1, 3, 5, None, None, None, None, 1, 3, None], 'mixed_missing': [1, 3, 5, None, None, 2, 4, 1, 3, None], 'all_missing': [None, None, None, None, None, None, None, None, None, None] }) df.set_default_expectation_argument("result_format", "COMPLETE") df.set_default_expectation_argument("include_config", False) assert df.expect_column_values_to_be_odd( "all_odd") == ExpectationValidationResult(result={ 'element_count': 10, 'missing_count': 0, 'missing_percent': 0.0, 'partial_unexpected_counts': [], 'partial_unexpected_index_list': [], 'partial_unexpected_list': [], 'unexpected_count': 0, 'unexpected_index_list': [], 'unexpected_list': [], 'unexpected_percent': 0.0, 'unexpected_percent_nonmissing': 0.0 }, success=True) assert df.expect_column_values_to_be_odd( "all_missing") == ExpectationValidationResult(result={ 'element_count': 10, 'missing_count': 10, 'missing_percent': 100.0, 'partial_unexpected_counts': [], 'partial_unexpected_index_list': [], 'partial_unexpected_list': [], 'unexpected_count': 0, 'unexpected_index_list': [], 'unexpected_list': [], 'unexpected_percent': 0.0, 'unexpected_percent_nonmissing': None }, success=True) assert df.expect_column_values_to_be_odd( "odd_missing") == ExpectationValidationResult(result={ 'element_count': 10, 'missing_count': 5, 'missing_percent': 50.0, 'partial_unexpected_counts': [], 'partial_unexpected_index_list': [], 'partial_unexpected_list': [], 'unexpected_count': 0, 'unexpected_index_list': [], 'unexpected_list': [], 'unexpected_percent': 0.0, 'unexpected_percent_nonmissing': 0.0 }, success=True) assert df.expect_column_values_to_be_odd( "mixed_missing") == ExpectationValidationResult(result={ 'element_count': 10, 'missing_count': 3, 'missing_percent': 30.0, 'partial_unexpected_counts': [{ 'value': 2., 'count': 1 }, { 'value': 4., 'count': 1 }], 'partial_unexpected_index_list': [5, 6], 'partial_unexpected_list': [2., 4.], 'unexpected_count': 2, 'unexpected_index_list': [5, 6], 'unexpected_list': [2, 4], 'unexpected_percent': 20.0, 'unexpected_percent_nonmissing': (2 / 7 * 100) }, success=False) assert df.expect_column_values_to_be_odd( "mostly_odd") == ExpectationValidationResult(result={ 'element_count': 10, 'missing_count': 0, 'missing_percent': 0, 'partial_unexpected_counts': [{ 'value': 2., 'count': 1 }, { 'value': 4., 'count': 1 }], 'partial_unexpected_index_list': [5, 6], 'partial_unexpected_list': [2., 4.], 'unexpected_count': 2, 'unexpected_index_list': [5, 6], 'unexpected_list': [2, 4], 'unexpected_percent': 20.0, 'unexpected_percent_nonmissing': 20.0 }, success=False) assert df.expect_column_values_to_be_odd( "mostly_odd", mostly=.6) == ExpectationValidationResult(result={ 'element_count': 10, 'missing_count': 0, 'missing_percent': 0, 'partial_unexpected_counts': [{ 'value': 2., 'count': 1 }, { 'value': 4., 'count': 1 }], 'partial_unexpected_index_list': [5, 6], 'partial_unexpected_list': [2., 4.], 'unexpected_count': 2, 'unexpected_index_list': [5, 6], 'unexpected_list': [2, 4], 'unexpected_percent': 20.0, 'unexpected_percent_nonmissing': 20.0 }, success=True) assert df.expect_column_values_to_be_odd("mostly_odd", result_format="BOOLEAN_ONLY") == \ ExpectationValidationResult(success=False) df.default_expectation_args["result_format"] = "BOOLEAN_ONLY" assert df.expect_column_values_to_be_odd( "mostly_odd") == ExpectationValidationResult(success=False) df.default_expectation_args["result_format"] = "BASIC" assert df.expect_column_values_to_be_odd( "mostly_odd", include_config=True) == ExpectationValidationResult( expectation_config=ExpectationConfiguration( expectation_type="expect_column_values_to_be_odd", kwargs={ "column": "mostly_odd", "result_format": "BASIC" }), result={ 'element_count': 10, 'missing_count': 0, 'missing_percent': 0, 'partial_unexpected_list': [2, 4], 'unexpected_count': 2, 'unexpected_percent': 20.0, 'unexpected_percent_nonmissing': 20.0 }, success=False)
def test_evaluation_parameter_store_methods(data_context): run_id = "20191125T000000.000000Z" source_patient_data_results = ExpectationSuiteValidationResult( meta={ "expectation_suite_name": "source_patient_data.default", "run_id": run_id }, results=[ ExpectationValidationResult( expectation_config=ExpectationConfiguration( expectation_type="expect_table_row_count_to_equal", kwargs={ "value": 1024, }), success=True, exception_info={ "exception_message": None, "exception_traceback": None, "raised_exception": False }, result={ "observed_value": 1024, "element_count": 1024, "missing_percent": 0.0, "missing_count": 0 }) ], success=True) data_context.store_evaluation_parameters(source_patient_data_results) bound_parameters = data_context.evaluation_parameter_store.get_bind_params( run_id) assert bound_parameters == { 'urn:great_expectations:validations:source_patient_data.default:expect_table_row_count_to_equal.result' '.observed_value': 1024 } source_diabetes_data_results = ExpectationSuiteValidationResult( meta={ "expectation_suite_name": "source_diabetes_data.default", "run_id": run_id }, results=[ ExpectationValidationResult( expectation_config=ExpectationConfiguration( expectation_type= "expect_column_unique_value_count_to_be_between", kwargs={ "column": "patient_nbr", "min": 2048, "max": 2048 }), success=True, exception_info={ "exception_message": None, "exception_traceback": None, "raised_exception": False }, result={ "observed_value": 2048, "element_count": 5000, "missing_percent": 0.0, "missing_count": 0 }) ], success=True) data_context.store_evaluation_parameters(source_diabetes_data_results) bound_parameters = data_context.evaluation_parameter_store.get_bind_params( run_id) assert bound_parameters == { 'urn:great_expectations:validations:source_patient_data.default:expect_table_row_count_to_equal.result' '.observed_value': 1024, 'urn:great_expectations:validations:source_diabetes_data.default' ':expect_column_unique_value_count_to_be_between.result.observed_value:column=patient_nbr': 2048 }
def test_exception_list_content_block_renderer(): # We should grab the exception message and add default formatting result = ExceptionListContentBlockRenderer.render([ ExpectationValidationResult( success=False, exception_info={ "raised_exception": True, "exception_message": "Invalid partition object.", "exception_traceback": "Traceback (most recent call last):\n File \"/home/user/great_expectations/great_expectations/data_asset/data_asset.py\", line 186, in wrapper\n return_obj = func(self, **evaluation_args)\n File \" /home/user/great_expectations/great_expectations/dataset/dataset.py\", line 106, in inner_wrapper\n evaluation_result = func(self, column, *args, **kwargs)\n File \"/home/user/great_expectations/great_expectations/dataset/dataset.py\", line 3388, in expect_column_kl_divergence_to_be_less_than\n raise ValueError(\"Invalid partition object.\")\nValueError: Invalid partition object.\n" }, expectation_config=ExpectationConfiguration( expectation_type="expect_column_kl_divergence_to_be_less_than", kwargs={ "column": "answer", "partition_object": None, "threshold": None, "result_format": "SUMMARY" }, meta={"BasicDatasetProfiler": { "confidence": "very low" }})) ]) assert result.to_json_dict() == { "content_block_type": "bullet_list", "bullet_list": [{ "content_block_type": "string_template", "string_template": { "template": "$column: $expectation_type raised an exception: $exception_message", "params": { "column": "answer", "expectation_type": "expect_column_kl_divergence_to_be_less_than", "exception_message": "Invalid partition object." }, "styling": { "classes": ["list-group-item"], "params": { "column": { "classes": ["badge", "badge-primary"] }, "expectation_type": { "classes": ["text-monospace"] }, "exception_message": { "classes": ["text-monospace"] } } } } }], "styling": { "classes": ["col-12"], "styles": { "margin-top": "20px" }, "header": { "classes": ["collapsed"], "attributes": { "data-toggle": "collapse", "href": "#{{content_block_id}}-body", "role": "button", "aria-expanded": "true", "aria-controls": "collapseExample" }, "styles": { "cursor": "pointer" } }, "body": { "classes": ["list-group", "collapse"] } }, "header": "Failed expectations <span class=\"mr-3 triangle\"></span>" }
def test_ValidationResultsTableContentBlockRenderer_get_unexpected_table( evr_success): evr_failed_no_result = ExpectationValidationResult( success=False, exception_info={ "raised_exception": False, "exception_message": None, "exception_traceback": None, }, expectation_config=ExpectationConfiguration( expectation_type="expect_column_values_to_be_in_set", kwargs={ "column": "Unnamed: 0", "value_set": [], "result_format": "SUMMARY", }, ), ) evr_failed_no_unexpected_list_or_counts = ExpectationValidationResult( success=False, result={ "element_count": 1313, "missing_count": 0, "missing_percent": 0.0, "unexpected_count": 1313, "unexpected_percent": 100.0, "unexpected_percent_nonmissing": 100.0, }, exception_info={ "raised_exception": False, "exception_message": None, "exception_traceback": None, }, expectation_config=ExpectationConfiguration( expectation_type="expect_column_values_to_be_in_set", kwargs={ "column": "Unnamed: 0", "value_set": [], "result_format": "SUMMARY", }, ), ) evr_failed_partial_unexpected_list = ExpectationValidationResult( success=False, result={ "element_count": 1313, "missing_count": 0, "missing_percent": 0.0, "unexpected_count": 1313, "unexpected_percent": 100.0, "unexpected_percent_nonmissing": 100.0, "partial_unexpected_list": [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ], }, exception_info={ "raised_exception": False, "exception_message": None, "exception_traceback": None, }, expectation_config=ExpectationConfiguration( expectation_type="expect_column_values_to_be_in_set", kwargs={ "column": "Unnamed: 0", "value_set": [], "result_format": "SUMMARY", }, ), ) evr_failed_partial_unexpected_counts = ExpectationValidationResult( success=False, result={ "element_count": 1313, "missing_count": 0, "missing_percent": 0.0, "unexpected_count": 1313, "unexpected_percent": 100.0, "unexpected_percent_nonmissing": 100.0, "partial_unexpected_list": [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ], "partial_unexpected_index_list": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, ], "partial_unexpected_counts": [ { "value": 1, "count": 1 }, { "value": 2, "count": 1 }, { "value": 3, "count": 1 }, { "value": 4, "count": 1 }, { "value": 5, "count": 1 }, { "value": 6, "count": 1 }, { "value": 7, "count": 1 }, { "value": 8, "count": 1 }, { "value": 9, "count": 1 }, { "value": 10, "count": 1 }, { "value": 11, "count": 1 }, { "value": 12, "count": 1 }, { "value": 13, "count": 1 }, { "value": 14, "count": 1 }, { "value": 15, "count": 1 }, { "value": 16, "count": 1 }, { "value": 17, "count": 1 }, { "value": 18, "count": 1 }, { "value": 19, "count": 1 }, { "value": 20, "count": 1 }, ], }, exception_info={ "raised_exception": False, "exception_message": None, "exception_traceback": None, }, expectation_config=ExpectationConfiguration( expectation_type="expect_column_values_to_be_in_set", kwargs={ "column": "Unnamed: 0", "value_set": [], "result_format": "SUMMARY", }, ), ) # test for succeeded evr output_1 = ValidationResultsTableContentBlockRenderer._get_unexpected_table( evr_success) assert output_1 is None # test for failed evr with no "result" key output_2 = ValidationResultsTableContentBlockRenderer._get_unexpected_table( evr_failed_no_result) assert output_2 is None # test for failed evr with no unexpected list or unexpected counts output_3 = ValidationResultsTableContentBlockRenderer._get_unexpected_table( evr_failed_no_unexpected_list_or_counts) assert output_3 is None # test for failed evr with partial unexpected list output_4 = ValidationResultsTableContentBlockRenderer._get_unexpected_table( evr_failed_partial_unexpected_list) assert output_4.to_json_dict() == { "content_block_type": "table", "table": [ [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], ], "header_row": ["Sampled Unexpected Values"], "styling": { "body": { "classes": ["table-bordered", "table-sm", "mt-3"] } }, } # test for failed evr with partial unexpected counts output_5 = ValidationResultsTableContentBlockRenderer._get_unexpected_table( evr_failed_partial_unexpected_counts) assert output_5.to_json_dict() == { "content_block_type": "table", "table": [ [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], ], "header_row": ["Sampled Unexpected Values"], "styling": { "body": { "classes": ["table-bordered", "table-sm", "mt-3"] } }, }
def test_ValidationResultsTableContentBlockRenderer_get_unexpected_statement( evr_success, evr_failed): evr_no_result = ExpectationValidationResult( success=True, exception_info={ "raised_exception": False, "exception_message": None, "exception_traceback": None, }, expectation_config=ExpectationConfiguration( expectation_type="expect_table_row_count_to_be_between", kwargs={ "min_value": 0, "max_value": None, "result_format": "SUMMARY" }, ), ) evr_failed_no_unexpected_count = ExpectationValidationResult( success=False, result={ "element_count": 1313, "missing_count": 0, "missing_percent": 0.0, "unexpected_percent": 0.2284843869002285, "unexpected_percent_nonmissing": 0.2284843869002285, "partial_unexpected_list": [ "Daly, Mr Peter Denis ", "Barber, Ms ", "Geiger, Miss Emily ", ], "partial_unexpected_index_list": [77, 289, 303], "partial_unexpected_counts": [ { "value": "Barber, Ms ", "count": 1 }, { "value": "Daly, Mr Peter Denis ", "count": 1 }, { "value": "Geiger, Miss Emily ", "count": 1 }, ], }, exception_info={ "raised_exception": False, "exception_message": None, "exception_traceback": None, }, expectation_config=ExpectationConfiguration( expectation_type="expect_column_values_to_not_match_regex", kwargs={ "column": "Name", "regex": "^\\s+|\\s+$", "result_format": "SUMMARY", }, ), ) # test for succeeded evr output_1 = ValidationResultsTableContentBlockRenderer._get_unexpected_statement( evr_success) assert output_1 == [] # test for failed evr output_2 = ValidationResultsTableContentBlockRenderer._get_unexpected_statement( evr_failed) assert output_2 == [ RenderedStringTemplateContent( **{ "content_block_type": "string_template", "string_template": { "template": "\n\n$unexpected_count unexpected values found. $unexpected_percent of $element_count total rows.", "params": { "unexpected_count": "3", "unexpected_percent": "≈0.2285%", "element_count": "1,313", }, "tag": "strong", "styling": { "classes": ["text-danger"] }, }, }) ] # test for evr with no "result" key output_3 = ValidationResultsTableContentBlockRenderer._get_unexpected_statement( evr_no_result) print(json.dumps(output_3, indent=2)) assert output_3 == [] # test for evr with no unexpected count output_4 = ValidationResultsTableContentBlockRenderer._get_unexpected_statement( evr_failed_no_unexpected_count) print(output_4) assert output_4 == [] # test for evr with exception evr_failed_exception = ExpectationValidationResult( success=False, exception_info={ "raised_exception": True, "exception_message": "Unrecognized column: not_a_real_column", "exception_traceback": "Traceback (most recent call last):\n...more_traceback...", }, expectation_config=ExpectationConfiguration( expectation_type="expect_column_values_to_not_match_regex", kwargs={ "column": "Name", "regex": "^\\s+|\\s+$", "result_format": "SUMMARY", }, ), ) output_5 = ValidationResultsTableContentBlockRenderer._get_unexpected_statement( evr_failed_exception) output_5 = [content.to_json_dict() for content in output_5] expected_output_5 = [ { "content_block_type": "string_template", "string_template": { "template": "\n\n$expectation_type raised an exception:\n$exception_message", "params": { "expectation_type": "expect_column_values_to_not_match_regex", "exception_message": "Unrecognized column: not_a_real_column", }, "tag": "strong", "styling": { "classes": ["text-danger"], "params": { "exception_message": { "tag": "code" }, "expectation_type": { "classes": ["badge", "badge-danger", "mb-2"] }, }, }, }, }, { "content_block_type": "collapse", "collapse_toggle_link": "Show exception traceback...", "collapse": [{ "content_block_type": "string_template", "string_template": { "template": "Traceback (most recent call last):\n...more_traceback...", "tag": "code", }, }], "inline_link": False, }, ] assert output_5 == expected_output_5
"action": { "class_name": "OpenLineageValidationAction", "module_name": "openlineage.common.provider.great_expectations.action" } }] } }, anonymous_usage_statistics={'enabled': False} ) TABLE_NAME = "test_data" # Common validation results table_result = ExpectationValidationResult(success=True, expectation_config=ExpectationConfiguration( expectation_type='expect_table_row_count_to_equal', kwargs={'value': 10}), result={"observed_value": 10}) column_result = ExpectationValidationResult(success=True, expectation_config=ExpectationConfiguration( expectation_type='expect_column_sum_to_be_between', kwargs={'column': 'size', 'min_value': 0, 'max_value': 100} ), result={'observed_value': 60}) result_suite = ExpectationSuiteValidationResult(success=True, meta={'batch_kwargs': {}}, results=[table_result, column_result]) @pytest.fixture(scope='session') def test_db_file():
def test_file_format_map_output(): incomplete_file_path = file_relative_path( __file__, '../test_sets/toy_data_incomplete.csv') incomplete_file_dat = ge.data_asset.FileDataAsset(incomplete_file_path) null_file_path = file_relative_path(__file__, '../test_sets/null_file.csv') null_file_dat = ge.data_asset.FileDataAsset(null_file_path) white_space_path = file_relative_path(__file__, '../test_sets/white_space.txt') white_space_dat = ge.data_asset.FileDataAsset(white_space_path) # Boolean Expectation Output expectation = incomplete_file_dat.expect_file_line_regex_match_count_to_equal( regex=r',\S', expected_count=3, skip=1, result_format="BOOLEAN_ONLY", include_config=False) expected_result = ExpectationValidationResult(success=False) assert expected_result == expectation # Empty File Expectations expectation = null_file_dat.expect_file_line_regex_match_count_to_equal( regex=r',\S', expected_count=3, skip=1, result_format="BASIC", include_config=False) expected_result = ExpectationValidationResult( success=None, result={ "element_count": 0, "missing_count": 0, "missing_percent": None, "unexpected_count": 0, "unexpected_percent": None, "unexpected_percent_nonmissing": None, "partial_unexpected_list": [] }) assert expected_result == expectation # White Space File expectation = white_space_dat.expect_file_line_regex_match_count_to_equal( regex=r',\S', expected_count=3, result_format="BASIC", include_config=False) expected_result = ExpectationValidationResult( success=None, result={ "element_count": 11, "missing_count": 11, "missing_percent": 100.0, "unexpected_count": 0, "unexpected_percent": 0, "unexpected_percent_nonmissing": None, "partial_unexpected_list": [] }) assert expected_result == expectation # Complete Result Format expectation = incomplete_file_dat.expect_file_line_regex_match_count_to_equal( regex=r',\S', expected_count=3, skip=1, result_format="COMPLETE", include_config=False) expected_result = ExpectationValidationResult( success=False, result={ "element_count": 9, "missing_count": 2, "missing_percent": (2 / 9 * 100), "unexpected_count": 3, "unexpected_percent": (3 / 9 * 100), "unexpected_percent_nonmissing": (3 / 7 * 100), "partial_unexpected_list": ['A,C,1\n', 'B,1,4\n', 'A,1,4\n'], "partial_unexpected_counts": [{ "value": 'A,1,4\n', "count": 1 }, { "value": 'A,C,1\n', "count": 1 }, { "value": 'B,1,4\n', "count": 1 }], "partial_unexpected_index_list": [0, 3, 5], "unexpected_list": ['A,C,1\n', 'B,1,4\n', 'A,1,4\n'], "unexpected_index_list": [0, 3, 5] }) assert expected_result == expectation # Invalid Result Format with pytest.raises(ValueError): expectation = incomplete_file_dat.expect_file_line_regex_match_count_to_equal( regex=r',\S', expected_count=3, skip=1, result_format="JOKE", include_config=False)
def test_column_aggregate_expectation_decorator(): # Create a new CustomPandasDataset to # (1) demonstrate that custom subclassing works, and # (2) Test expectation business logic without dependencies on any other functions. class CustomPandasDataset(PandasDataset): @PandasDataset.column_aggregate_expectation def expect_column_median_to_be_odd(self, column): median = self.get_column_median(column) return { "success": median % 2, "result": { "observed_value": median } } df = CustomPandasDataset({ 'all_odd': [1, 3, 5, 7, 9], 'all_even': [2, 4, 6, 8, 10], 'odd_missing': [1, 3, 5, None, None], 'mixed_missing': [1, 2, None, None, 6], 'mixed_missing_2': [1, 3, None, None, 6], 'all_missing': [ None, None, None, None, None, ], }) df.set_default_expectation_argument("result_format", "COMPLETE") df.set_default_expectation_argument("include_config", False) assert df.expect_column_median_to_be_odd( "all_odd") == ExpectationValidationResult(result={ 'observed_value': 5, 'element_count': 5, 'missing_count': 0, 'missing_percent': 0 }, success=True) assert df.expect_column_median_to_be_odd( "all_even") == ExpectationValidationResult(result={ 'observed_value': 6, 'element_count': 5, 'missing_count': 0, 'missing_percent': 0 }, success=False) assert df.expect_column_median_to_be_odd( "all_even", result_format="SUMMARY") == ExpectationValidationResult(result={ 'observed_value': 6, 'element_count': 5, 'missing_count': 0, 'missing_percent': 0 }, success=False) assert df.expect_column_median_to_be_odd( "all_even", result_format="BOOLEAN_ONLY") == ExpectationValidationResult( success=False) df.default_expectation_args["result_format"] = "BOOLEAN_ONLY" assert df.expect_column_median_to_be_odd( "all_even") == ExpectationValidationResult(success=False) assert df.expect_column_median_to_be_odd( "all_even", result_format="BASIC") == ExpectationValidationResult(result={ 'observed_value': 6, 'element_count': 5, 'missing_count': 0, 'missing_percent': 0 }, success=False)
def test_column_pair_map_expectation_decorator(): # Create a new CustomPandasDataset to # (1) Demonstrate that custom subclassing works, and # (2) Test expectation business logic without dependencies on any other functions. class CustomPandasDataset(PandasDataset): @PandasDataset.column_pair_map_expectation def expect_column_pair_values_to_be_different(self, column_A, column_B, keep_missing="either", output_format=None, include_config=True, catch_exceptions=None): return column_A != column_B df = CustomPandasDataset({ 'all_odd': [1, 3, 5, 7, 9], 'all_even': [2, 4, 6, 8, 10], 'odd_missing': [1, 3, 5, None, None], 'mixed_missing': [1, 2, None, None, 6], 'mixed_missing_2': [1, 3, None, None, 6], 'all_missing': [ None, None, None, None, None, ], }) df.set_default_expectation_argument("result_format", "COMPLETE") df.set_default_expectation_argument("include_config", False) assert df.expect_column_pair_values_to_be_different( "all_odd", "all_even") == ExpectationValidationResult( success=True, result={ "element_count": 5, "missing_count": 0, "unexpected_count": 0, "missing_percent": 0.0, "unexpected_percent": 0.0, "unexpected_percent_nonmissing": 0.0, "unexpected_list": [], "unexpected_index_list": [], "partial_unexpected_list": [], "partial_unexpected_index_list": [], "partial_unexpected_counts": [], }) assert df.expect_column_pair_values_to_be_different( "all_odd", "all_even", ignore_row_if="both_values_are_missing", ) == ExpectationValidationResult(success=True, result={ "element_count": 5, "missing_count": 0, "unexpected_count": 0, "missing_percent": 0.0, "unexpected_percent": 0.0, "unexpected_percent_nonmissing": 0.0, "unexpected_list": [], "unexpected_index_list": [], "partial_unexpected_list": [], "partial_unexpected_index_list": [], "partial_unexpected_counts": [], }) assert df.expect_column_pair_values_to_be_different( "all_odd", "odd_missing") == ExpectationValidationResult( success=False, result={ "element_count": 5, "missing_count": 0, "unexpected_count": 3, "missing_percent": 0.0, "unexpected_percent": 60.0, "unexpected_percent_nonmissing": 60.0, "unexpected_list": [(1, 1.), (3, 3.), (5, 5.)], "unexpected_index_list": [0, 1, 2], "partial_unexpected_list": [(1, 1.), (3, 3.), (5, 5.)], "partial_unexpected_index_list": [0, 1, 2], "partial_unexpected_counts": [{ 'count': 1, 'value': (1, 1.) }, { 'count': 1, 'value': (3, 3.) }, { 'count': 1, 'value': (5, 5.) }] }) assert df.expect_column_pair_values_to_be_different( "all_odd", "odd_missing", ignore_row_if="both_values_are_missing" ) == ExpectationValidationResult(success=False, result={ "element_count": 5, "missing_count": 0, "unexpected_count": 3, "missing_percent": 0.0, "unexpected_percent": 60.0, "unexpected_percent_nonmissing": 60.0, "unexpected_list": [(1, 1.), (3, 3.), (5, 5.)], "unexpected_index_list": [0, 1, 2], "partial_unexpected_list": [(1, 1.), (3, 3.), (5, 5.)], "partial_unexpected_index_list": [0, 1, 2], "partial_unexpected_counts": [{ 'count': 1, 'value': (1, 1.) }, { 'count': 1, 'value': (3, 3.) }, { 'count': 1, 'value': (5, 5.) }] }) assert df.expect_column_pair_values_to_be_different( "all_odd", "odd_missing", ignore_row_if="either_value_is_missing" ) == ExpectationValidationResult(success=False, result={ "element_count": 5, "missing_count": 2, "unexpected_count": 3, "missing_percent": 40.0, "unexpected_percent": 60.0, "unexpected_percent_nonmissing": 100.0, "unexpected_list": [(1, 1.), (3, 3.), (5, 5.)], "unexpected_index_list": [0, 1, 2], "partial_unexpected_list": [(1, 1.), (3, 3.), (5, 5.)], "partial_unexpected_index_list": [0, 1, 2], "partial_unexpected_counts": [{ 'count': 1, 'value': (1, 1.) }, { 'count': 1, 'value': (3, 3.) }, { 'count': 1, 'value': (5, 5.) }] }) with pytest.raises(ValueError): df.expect_column_pair_values_to_be_different( "all_odd", "odd_missing", ignore_row_if="blahblahblah") # Test SUMMARY, BASIC, and BOOLEAN_ONLY output_formats assert df.expect_column_pair_values_to_be_different( "all_odd", "all_even", result_format="SUMMARY") == ExpectationValidationResult( success=True, result={ "element_count": 5, "missing_count": 0, "unexpected_count": 0, "missing_percent": 0.0, "unexpected_percent": 0.0, "unexpected_percent_nonmissing": 0.0, "partial_unexpected_list": [], "partial_unexpected_index_list": [], "partial_unexpected_counts": [], }) assert df.expect_column_pair_values_to_be_different( "all_odd", "all_even", result_format="BASIC") == ExpectationValidationResult( success=True, result={ "element_count": 5, "missing_count": 0, "unexpected_count": 0, "missing_percent": 0.0, "unexpected_percent": 0.0, "unexpected_percent_nonmissing": 0.0, "partial_unexpected_list": [], }) assert df.expect_column_pair_values_to_be_different( "all_odd", "all_even", result_format="BOOLEAN_ONLY") == ExpectationValidationResult( success=True)
def test_ValidationResultsTableContentBlockRenderer_get_observed_value( evr_success): evr_no_result_key = ExpectationValidationResult( success=True, exception_info={ "raised_exception": False, "exception_message": None, "exception_traceback": None, }, expectation_config=ExpectationConfiguration( expectation_type="expect_table_row_count_to_be_between", kwargs={ "min_value": 0, "max_value": None, "result_format": "SUMMARY" }, ), ) evr_expect_column_values_to_not_be_null = ExpectationValidationResult( success=True, result={ "element_count": 1313, "unexpected_count": 1050, "unexpected_percent": 79.96953541508, "partial_unexpected_list": [], }, exception_info={ "raised_exception": False, "exception_message": None, "exception_traceback": None, }, expectation_config=ExpectationConfiguration( expectation_type="expect_column_values_to_not_be_null", kwargs={ "column": "Unnamed: 0", "mostly": 0.5, "result_format": "SUMMARY" }, ), ) evr_expect_column_values_to_be_null = ExpectationValidationResult( success=True, result={ "element_count": 1313, "unexpected_count": 0, "unexpected_percent": 0.0, "partial_unexpected_list": [], }, exception_info={ "raised_exception": False, "exception_message": None, "exception_traceback": None, }, expectation_config=ExpectationConfiguration( expectation_type="expect_column_values_to_be_null", kwargs={ "column": "Unnamed: 0", "mostly": 0.5, "result_format": "SUMMARY" }, ), ) # test _get_observed_value when evr.result["observed_value"] exists output_1 = ValidationResultsTableContentBlockRenderer._get_observed_value( evr_success) assert output_1 == "1,313" # test _get_observed_value when evr.result does not exist output_2 = ValidationResultsTableContentBlockRenderer._get_observed_value( evr_no_result_key) assert output_2 == "--" # test _get_observed_value for expect_column_values_to_not_be_null expectation type output_3 = ValidationResultsTableContentBlockRenderer._get_observed_value( evr_expect_column_values_to_not_be_null) assert output_3 == "≈20.03% not null" # test _get_observed_value for expect_column_values_to_be_null expectation type output_4 = ValidationResultsTableContentBlockRenderer._get_observed_value( evr_expect_column_values_to_be_null) assert output_4 == "100% null"
def test_evaluation_parameters_for_between_expectations_parse_correctly( titanic_pandas_data_context_with_v013_datasource_with_checkpoints_v1_with_empty_store_stats_enabled, ): context = titanic_pandas_data_context_with_v013_datasource_with_checkpoints_v1_with_empty_store_stats_enabled # Note that if you modify this batch request, you may save the new version as a .json file # to pass in later via the --batch-request option df = pandas.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) batch_request = { "datasource_name": "my_datasource", "data_connector_name": "my_runtime_data_connector", "data_asset_name": "foo", "runtime_parameters": { "batch_data": df }, "batch_identifiers": { "pipeline_stage_name": "kickoff", "airflow_run_id": "1234", }, } # Feel free to change the name of your suite here. Renaming this will not remove the other one. expectation_suite_name = "abcde" try: suite = context.get_expectation_suite( expectation_suite_name=expectation_suite_name) print( f'Loaded ExpectationSuite "{suite.expectation_suite_name}" containing {len(suite.expectations)} ' f"expectations.") except DataContextError: suite = context.create_expectation_suite( expectation_suite_name=expectation_suite_name) print(f'Created ExpectationSuite "{suite.expectation_suite_name}".') validator = context.get_validator( batch_request=RuntimeBatchRequest(**batch_request), expectation_suite_name=expectation_suite_name, ) column_names = [f'"{column_name}"' for column_name in validator.columns()] print(f"Columns: {', '.join(column_names)}.") validator.set_evaluation_parameter("my_min", 1) validator.set_evaluation_parameter("my_max", 5) result = validator.expect_table_row_count_to_be_between( min_value={ "$PARAMETER": "my_min", "$PARAMETER.upstream_row_count": 10 }, max_value={ "$PARAMETER": "my_max", "$PARAMETER.upstream_row_count": 50 }, ) assert result == ExpectationValidationResult( **{ "expectation_config": { "meta": { "substituted_parameters": { "min_value": 1, "max_value": 5 } }, "kwargs": { "min_value": 1, "max_value": 5, "batch_id": "15fe04adb6ff20b9fc6eda486b7a36b7", }, "expectation_type": "expect_table_row_count_to_be_between", "ge_cloud_id": None, }, "meta": {}, "exception_info": { "raised_exception": False, "exception_traceback": None, "exception_message": None, }, "success": True, "result": { "observed_value": 3 }, })
def test_validate(): with open( file_relative_path(__file__, "./test_sets/titanic_expectations.json")) as f: my_expectation_suite = expectationSuiteSchema.loads(f.read()) with mock.patch("uuid.uuid1") as uuid: uuid.return_value = "1234" my_df = ge.read_csv(file_relative_path(__file__, "./test_sets/Titanic.csv"), expectation_suite=my_expectation_suite) my_df.set_default_expectation_argument("result_format", "COMPLETE") with mock.patch("datetime.datetime") as mock_datetime: mock_datetime.utcnow.return_value = datetime(1955, 11, 5) results = my_df.validate(catch_exceptions=False) with open( file_relative_path( __file__, './test_sets/titanic_expected_data_asset_validate_results.json' )) as f: expected_results = expectationSuiteValidationResultSchema.loads( f.read()) del results.meta["great_expectations.__version__"] assert expected_results == results # Now, change the results and ensure they are no longer equal results.results[0] = ExpectationValidationResult() assert expected_results != results # Finally, confirm that only_return_failures works # and does not affect the "statistics" field. with mock.patch("datetime.datetime") as mock_datetime: mock_datetime.utcnow.return_value = datetime(1955, 11, 5) validation_results = my_df.validate(only_return_failures=True) del validation_results.meta["great_expectations.__version__"] expected_results = ExpectationSuiteValidationResult( meta={ "expectation_suite_name": "titanic", "run_id": "19551105T000000.000000Z", "batch_kwargs": { "ge_batch_id": "1234" }, "batch_markers": {}, "batch_parameters": {} }, results=[ ExpectationValidationResult( expectation_config=ExpectationConfiguration( expectation_type="expect_column_values_to_be_in_set", kwargs={ "column": "PClass", "value_set": ["1st", "2nd", "3rd"] }), success=False, exception_info={ "exception_message": None, "exception_traceback": None, "raised_exception": False }, result={ "partial_unexpected_index_list": [456], "unexpected_count": 1, "unexpected_list": ["*"], "unexpected_percent": 0.07616146230007616, "element_count": 1313, "missing_percent": 0.0, "partial_unexpected_counts": [{ "count": 1, "value": "*" }], "partial_unexpected_list": ["*"], "unexpected_percent_nonmissing": 0.07616146230007616, "missing_count": 0, "unexpected_index_list": [456] }) ], success=expected_results.success, # unaffected statistics=expected_results["statistics"] # unaffected ) assert expected_results == validation_results