Example #1
0
            ],
        ],
        *[
            _boolean_update_column_case(bool_kwarg) for bool_kwarg in [
                "nullable",
                "allow_duplicates",
                "coerce",
                "required",
                "regex",
            ]
        ],
        [
            Column(Int, checks=Check.greater_than(0)),
            "col",
            {
                "checks": Check.less_than(10)
            },
            lambda old, new: [
                old.columns["col"].checks == [Check.greater_than(0)],
                new.columns["col"].checks == [Check.less_than(10)],
            ],
        ],
        # error cases
        [Column(Int), "col", {
            "name": "renamed_col"
        }, ValueError],
        [Column(Int), "foobar", {}, ValueError],
    ],
)
def test_dataframe_schema_update_column(column, column_to_update, update,
                                        assertion_fn):
Example #2
0
def test_lazy_dataframe_validation_error():
    """Test exceptions on lazy dataframe validation."""
    schema = DataFrameSchema(
        columns={
            "int_col": Column(Int, Check.greater_than(5)),
            "int_col2": Column(Int),
            "float_col": Column(Float, Check.less_than(0)),
            "str_col": Column(String, Check.isin(["foo", "bar"])),
            "not_in_dataframe": Column(Int),
        },
        checks=Check(lambda df: df != 1,
                     error="dataframe_not_equal_1",
                     ignore_na=False),
        index=Index(String, name="str_index"),
        strict=True,
    )

    dataframe = pd.DataFrame(
        data={
            "int_col": [1, 2, 6],
            "int_col2": ["a", "b", "c"],
            "float_col": [1.0, -2.0, 3.0],
            "str_col": ["foo", "b", "c"],
            "unknown_col": [None, None, None],
        },
        index=pd.Index(["index0", "index1", "index2"], name="str_index"),
    )

    expectation = {
        # schema object context -> check failure cases
        "DataFrameSchema": {
            # check name -> failure cases
            "column_in_schema": ["unknown_col"],
            "dataframe_not_equal_1": [1],
            "column_in_dataframe": ["not_in_dataframe"],
        },
        "Column": {
            "greater_than(5)": [1, 2],
            "pandas_dtype('int64')": ["object"],
            "less_than(0)": [1, 3],
        },
    }

    with pytest.raises(errors.SchemaErrors,
                       match="^A total of .+ schema errors were found"):
        schema.validate(dataframe, lazy=True)

    try:
        schema.validate(dataframe, lazy=True)
    except errors.SchemaErrors as err:

        # data in the caught exception should be equal to the dataframe
        # passed into validate
        assert err.data.equals(dataframe)

        # make sure all expected check errors are in schema errors
        for schema_context, check_failure_cases in expectation.items():
            err_df = err.failure_cases.loc[err.failure_cases.schema_context ==
                                           schema_context]
            for check, failure_cases in check_failure_cases.items():
                assert check in err_df.check.values
                assert (err_df.loc[err_df.check == check].failure_case.isin(
                    failure_cases).all())
Example #3
0
            ],
        ],
        *[
            _boolean_update_column_case(bool_kwarg)
            for bool_kwarg in [
                "nullable",
                "allow_duplicates",
                "coerce",
                "required",
                "regex",
            ]
        ],
        [
            Column(Int, checks=Check.greater_than(0)),
            "col",
            {"checks": Check.less_than(10)},
            lambda old, new: [
                old.columns["col"].checks == [Check.greater_than(0)],
                new.columns["col"].checks == [Check.less_than(10)],
            ],
        ],
        # error cases
        [Column(Int), "col", {"name": "renamed_col"}, ValueError],
        [Column(Int), "foobar", {}, ValueError],
    ],
)
def test_dataframe_schema_update_column(
    column, column_to_update, update, assertion_fn
):
    """Test that DataFrameSchema columns create updated copies."""
    schema = DataFrameSchema({"col": column})