def test_column_regex_strict() -> None: """Test that Column regex patterns correctly parsed in DataFrameSchema.""" data = pd.DataFrame( { "foo_1": [1, 2, 3], "foo_2": [1, 2, 3], "foo_3": [1, 2, 3], } ) schema = DataFrameSchema( columns={"foo_*": Column(Int, regex=True)}, strict=True ) assert isinstance(schema.validate(data), pd.DataFrame) # adding an extra column in the dataframe should cause error data = data.assign(bar=[1, 2, 3]) with pytest.raises(errors.SchemaError): schema.validate(data) # adding an extra regex column to the schema should pass the strictness # test validated_data = schema.add_columns( {"bar_*": Column(Int, regex=True)} ).validate(data.assign(bar_1=[1, 2, 3])) assert isinstance(validated_data, pd.DataFrame)
def test_add_and_remove_columns(): """Check that adding and removing columns works as expected and doesn't modify the original underlying DataFrameSchema.""" schema1 = DataFrameSchema( { "col1": Column(Int, Check(lambda s: s >= 0)), }, strict=True, ) schema1_exact_copy = copy.deepcopy(schema1) # test that add_columns doesn't modify schema1 after add_columns: schema2 = schema1.add_columns( { "col2": Column(String, Check(lambda x: x <= 0)), "col3": Column(Object, Check(lambda x: x == 0)), } ) schema2_exact_copy = copy.deepcopy(schema2) assert schema1 == schema1_exact_copy # test that add_columns changed schema1 into schema2: expected_schema_2 = DataFrameSchema( { "col1": Column(Int, Check(lambda s: s >= 0)), "col2": Column(String, Check(lambda x: x <= 0)), "col3": Column(Object, Check(lambda x: x == 0)), }, strict=True, ) assert schema2 == expected_schema_2 # test that remove_columns doesn't modify schema2: schema3 = schema2.remove_columns(["col2"]) assert schema2 == schema2_exact_copy # test that remove_columns has removed the changes as expected: expected_schema_3 = DataFrameSchema( { "col1": Column(Int, Check(lambda s: s >= 0)), "col3": Column(Object, Check(lambda x: x == 0)), }, strict=True, ) assert schema3 == expected_schema_3 # test that remove_columns can remove two columns: schema4 = schema2.remove_columns(["col2", "col3"]) expected_schema_4 = DataFrameSchema( {"col1": Column(Int, Check(lambda s: s >= 0))}, strict=True ) assert schema4 == expected_schema_4 == schema1