], ], *[ _boolean_update_column_case(bool_kwarg) for bool_kwarg in [ "nullable", "allow_duplicates", "coerce", "required", "regex", ] ], [ Column(Int, checks=Check.greater_than(0)), "col", { "checks": Check.less_than(10) }, lambda old, new: [ old.columns["col"].checks == [Check.greater_than(0)], new.columns["col"].checks == [Check.less_than(10)], ], ], # error cases [Column(Int), "col", { "name": "renamed_col" }, ValueError], [Column(Int), "foobar", {}, ValueError], ], ) def test_dataframe_schema_update_column(column, column_to_update, update, assertion_fn):
def test_lazy_dataframe_validation_error(): """Test exceptions on lazy dataframe validation.""" schema = DataFrameSchema( columns={ "int_col": Column(Int, Check.greater_than(5)), "int_col2": Column(Int), "float_col": Column(Float, Check.less_than(0)), "str_col": Column(String, Check.isin(["foo", "bar"])), "not_in_dataframe": Column(Int), }, checks=Check(lambda df: df != 1, error="dataframe_not_equal_1", ignore_na=False), index=Index(String, name="str_index"), strict=True, ) dataframe = pd.DataFrame( data={ "int_col": [1, 2, 6], "int_col2": ["a", "b", "c"], "float_col": [1.0, -2.0, 3.0], "str_col": ["foo", "b", "c"], "unknown_col": [None, None, None], }, index=pd.Index(["index0", "index1", "index2"], name="str_index"), ) expectation = { # schema object context -> check failure cases "DataFrameSchema": { # check name -> failure cases "column_in_schema": ["unknown_col"], "dataframe_not_equal_1": [1], "column_in_dataframe": ["not_in_dataframe"], }, "Column": { "greater_than(5)": [1, 2], "pandas_dtype('int64')": ["object"], "less_than(0)": [1, 3], }, } with pytest.raises(errors.SchemaErrors, match="^A total of .+ schema errors were found"): schema.validate(dataframe, lazy=True) try: schema.validate(dataframe, lazy=True) except errors.SchemaErrors as err: # data in the caught exception should be equal to the dataframe # passed into validate assert err.data.equals(dataframe) # make sure all expected check errors are in schema errors for schema_context, check_failure_cases in expectation.items(): err_df = err.failure_cases.loc[err.failure_cases.schema_context == schema_context] for check, failure_cases in check_failure_cases.items(): assert check in err_df.check.values assert (err_df.loc[err_df.check == check].failure_case.isin( failure_cases).all())
], ], *[ _boolean_update_column_case(bool_kwarg) for bool_kwarg in [ "nullable", "allow_duplicates", "coerce", "required", "regex", ] ], [ Column(Int, checks=Check.greater_than(0)), "col", {"checks": Check.less_than(10)}, lambda old, new: [ old.columns["col"].checks == [Check.greater_than(0)], new.columns["col"].checks == [Check.less_than(10)], ], ], # error cases [Column(Int), "col", {"name": "renamed_col"}, ValueError], [Column(Int), "foobar", {}, ValueError], ], ) def test_dataframe_schema_update_column( column, column_to_update, update, assertion_fn ): """Test that DataFrameSchema columns create updated copies.""" schema = DataFrameSchema({"col": column})