def _create_string_length_expectation( self, key: str, details: dict) -> Optional[ExpectationConfiguration]: """https://json-schema.org/understanding-json-schema/reference/string.html#length""" type_ = details.get("type", None) minimum = details.get("minLength", None) maximum = details.get("maxLength", None) if type_ != JsonSchemaTypes.STRING.value: return None if minimum is None and maximum is None: return None kwargs = { "column": key, } if minimum == maximum: kwargs["value"] = minimum return ExpectationConfiguration( "expect_column_value_lengths_to_equal", ExpectationKwargs(kwargs)) if minimum is not None: kwargs["min_value"] = minimum if maximum is not None: kwargs["max_value"] = maximum return ExpectationConfiguration( "expect_column_value_lengths_to_be_between", ExpectationKwargs(kwargs))
def _create_range_expectation( self, key: str, details: dict) -> Optional[ExpectationConfiguration]: """https://json-schema.org/understanding-json-schema/reference/numeric.html#range""" type_ = details.get("type", None) if type_ not in [ JsonSchemaTypes.INTEGER.value, JsonSchemaTypes.NUMBER.value ]: return None minimum = details.get("minimum", None) maximum = details.get("maximum", None) exclusive_minimum = details.get("exclusiveMinimum", None) exclusive_maximum = details.get("exclusiveMaximum", None) if (minimum is None and maximum is None and exclusive_minimum is None and exclusive_maximum is None): return None kwargs: Dict[str, Any] = {"column": key} if minimum is not None: kwargs["min_value"] = minimum if maximum is not None: kwargs["max_value"] = maximum if exclusive_minimum is not None: kwargs["min_value"] = exclusive_minimum kwargs["strict_min"] = True if exclusive_maximum is not None: kwargs["max_value"] = exclusive_maximum kwargs["strict_max"] = True return ExpectationConfiguration("expect_column_values_to_be_between", ExpectationKwargs(kwargs))
def test_catch_exceptions_with_bad_expectation_type(): # We want to catch degenerate cases where an expectation suite is incompatible with my_df = PandasDataset({"x": range(10)}) my_df._append_expectation( ExpectationConfiguration(expectation_type="foobar", kwargs={}) ) result = my_df.validate(catch_exceptions=True) # Find the foobar result idx = 0 for idx, val_result in enumerate(result.results): if val_result.expectation_config.expectation_type == "foobar": break assert result.results[idx].success is False assert result.results[idx].expectation_config.expectation_type == "foobar" assert result.results[idx].expectation_config.kwargs == ExpectationKwargs() assert result.results[idx].exception_info["raised_exception"] is True assert ( "AttributeError: 'PandasDataset' object has no attribute 'foobar'" in result.results[idx].exception_info["exception_traceback"] ) with pytest.raises(AttributeError): result = my_df.validate(catch_exceptions=False)
def _create_type_expectation( self, key: str, details: dict) -> Optional[ExpectationConfiguration]: type_ = details.get("type", None) if type_ is None: return None type_list = self.PROFILER_TYPE_LIST_BY_JSON_SCHEMA_TYPE[type_] kwargs = ExpectationKwargs(column=key, type_list=type_list) return ExpectationConfiguration( "expect_column_values_to_be_in_type_list", kwargs)
def test_expectation_decorator_build_config(): eds = ExpectationOnlyDataAsset() eds.no_op_expectation() eds.no_op_value_expectation('a') config = eds.get_expectation_suite() assert ExpectationConfiguration(expectation_type='no_op_expectation', kwargs={}) == config.expectations[0] assert ExpectationConfiguration( expectation_type='no_op_value_expectation', kwargs=ExpectationKwargs({'value': 'a'})) == config.expectations[1]
def _create_boolean_expectation( self, key: str, details: dict) -> Optional[ExpectationConfiguration]: """https://json-schema.org/understanding-json-schema/reference/boolean.html""" type_ = details.get("type", None) if type_ != JsonSchemaTypes.BOOLEAN.value: return None # TODO map JSONSchema types to which type backend? Pandas? Should this value set be parameterized per back end? kwargs = ExpectationKwargs(column=key, value_set=[True, False]) return ExpectationConfiguration("expect_column_values_to_be_in_set", kwargs)
def test_expectation_decorator_build_config(): eds = ExpectationOnlyDataAsset() eds.no_op_expectation() eds.no_op_value_expectation("a") config = eds.get_expectation_suite() assert (ExpectationConfiguration(expectation_type="no_op_expectation", kwargs={}) == config.expectations[0]) assert (ExpectationConfiguration( expectation_type="no_op_value_expectation", kwargs=ExpectationKwargs({"value": "a"}), ) == config.expectations[1])
def _create_set_expectation( self, key: str, details: dict) -> Optional[ExpectationConfiguration]: """https://json-schema.org/understanding-json-schema/reference/generic.html#enumerated-values""" if JsonSchemaTypes.ENUM.value not in details.keys(): return None enum = details.get("enum", None) if not isinstance(enum, list): return None kwargs = ExpectationKwargs(column=key, value_set=enum) return ExpectationConfiguration("expect_column_values_to_be_in_set", kwargs)
def _create_existence_expectation( self, key: str, details: dict) -> ExpectationConfiguration: kwargs = ExpectationKwargs(column=key) description = details.get("description", None) meta = None if description: meta = { "notes": { "format": "markdown", "content": [f"### Description:\n{description}"], } } return ExpectationConfiguration("expect_column_to_exist", kwargs, meta=meta)
def kwargs1(): return ExpectationKwargs({ "column": "a", "value_set": [1, 2, 3], "result_format": "BASIC" })
def kwargs3(): return ExpectationKwargs(column="a", value_set=[1, 2, 3], result_format="COMPLETE")
def kwargs2(): return ExpectationKwargs(column="a", value_set=[1, 2, 3], result_format="BASIC")
def test_expectation_suite_filedata_asset(): # Load in data files file_path = file_relative_path(__file__, '../test_sets/toy_data_complete.csv') # Create FileDataAsset objects f_dat = ge.data_asset.FileDataAsset(file_path) # Set up expectations f_dat.expect_file_line_regex_match_count_to_equal(regex=r',\S', expected_count=3, skip=1, result_format="BASIC", catch_exceptions=True) f_dat.expect_file_line_regex_match_count_to_be_between( regex=r',\S', expected_max_count=2, skip=1, result_format="SUMMARY", include_config=True) # Test basic config output complete_config = f_dat.get_expectation_suite() assert [ ExpectationConfiguration( expectation_type='expect_file_line_regex_match_count_to_equal', kwargs=ExpectationKwargs(expected_count=3, regex=',\\S', skip=1)) ] == complete_config.expectations # Include result format kwargs complete_config2 = f_dat.get_expectation_suite( discard_result_format_kwargs=False, discard_failed_expectations=False) assert [ ExpectationConfiguration( expectation_type='expect_file_line_regex_match_count_to_equal', kwargs={ 'expected_count': 3, 'regex': ',\\S', 'result_format': "BASIC", 'skip': 1 }), ExpectationConfiguration( expectation_type='expect_file_line_regex_match_count_to_be_between', kwargs={ 'expected_max_count': 2, 'regex': ',\\S', 'result_format': 'SUMMARY', 'skip': 1 }) ] == complete_config2.expectations # Discard Failing Expectations complete_config3 = f_dat.get_expectation_suite( discard_result_format_kwargs=False, discard_failed_expectations=True) assert [ ExpectationConfiguration( expectation_type='expect_file_line_regex_match_count_to_equal', kwargs={ 'expected_count': 3, 'regex': ',\\S', 'result_format': 'BASIC', 'skip': 1 }) ] == complete_config3.expectations
def test_expectation_suite_filedata_asset(): # Load in data files file_path = file_relative_path(__file__, "../test_sets/toy_data_complete.csv") # Create FileDataAsset objects f_dat = ge.data_asset.FileDataAsset(file_path) # Set up expectations f_dat.expect_file_line_regex_match_count_to_equal( regex=r",\S", expected_count=3, skip=1, result_format="BASIC", catch_exceptions=True, ) f_dat.expect_file_line_regex_match_count_to_be_between( regex=r",\S", expected_max_count=2, skip=1, result_format="SUMMARY", include_config=True, ) # Test basic config output complete_config = f_dat.get_expectation_suite() assert [ ExpectationConfiguration( expectation_type="expect_file_line_regex_match_count_to_equal", kwargs=ExpectationKwargs(expected_count=3, regex=",\\S", skip=1), ) ] == complete_config.expectations # Include result format kwargs complete_config2 = f_dat.get_expectation_suite( discard_result_format_kwargs=False, discard_failed_expectations=False) assert [ ExpectationConfiguration( expectation_type="expect_file_line_regex_match_count_to_equal", kwargs={ "expected_count": 3, "regex": ",\\S", "result_format": "BASIC", "skip": 1, }, ), ExpectationConfiguration( expectation_type="expect_file_line_regex_match_count_to_be_between", kwargs={ "expected_max_count": 2, "regex": ",\\S", "result_format": "SUMMARY", "skip": 1, }, ), ] == complete_config2.expectations # Discard Failing Expectations complete_config3 = f_dat.get_expectation_suite( discard_result_format_kwargs=False, discard_failed_expectations=True) assert [ ExpectationConfiguration( expectation_type="expect_file_line_regex_match_count_to_equal", kwargs={ "expected_count": 3, "regex": ",\\S", "result_format": "BASIC", "skip": 1, }, ) ] == complete_config3.expectations