Ejemplo n.º 1
0
def test_lazy_dataframe_validation_error():
    """Test exceptions on lazy dataframe validation."""
    schema = DataFrameSchema(
        columns={
            "int_col": Column(Int, Check.greater_than(5)),
            "int_col2": Column(Int),
            "float_col": Column(Float, Check.less_than(0)),
            "str_col": Column(String, Check.isin(["foo", "bar"])),
            "not_in_dataframe": Column(Int),
        },
        checks=Check(lambda df: df != 1,
                     error="dataframe_not_equal_1",
                     ignore_na=False),
        index=Index(String, name="str_index"),
        strict=True,
    )

    dataframe = pd.DataFrame(
        data={
            "int_col": [1, 2, 6],
            "int_col2": ["a", "b", "c"],
            "float_col": [1.0, -2.0, 3.0],
            "str_col": ["foo", "b", "c"],
            "unknown_col": [None, None, None],
        },
        index=pd.Index(["index0", "index1", "index2"], name="str_index"),
    )

    expectation = {
        # schema object context -> check failure cases
        "DataFrameSchema": {
            # check name -> failure cases
            "column_in_schema": ["unknown_col"],
            "dataframe_not_equal_1": [1],
            "column_in_dataframe": ["not_in_dataframe"],
        },
        "Column": {
            "greater_than(5)": [1, 2],
            "pandas_dtype('int64')": ["object"],
            "less_than(0)": [1, 3],
        },
    }

    with pytest.raises(errors.SchemaErrors,
                       match="^A total of .+ schema errors were found"):
        schema.validate(dataframe, lazy=True)

    try:
        schema.validate(dataframe, lazy=True)
    except errors.SchemaErrors as err:

        # data in the caught exception should be equal to the dataframe
        # passed into validate
        assert err.data.equals(dataframe)

        # make sure all expected check errors are in schema errors
        for schema_context, check_failure_cases in expectation.items():
            err_df = err.failure_cases.loc[err.failure_cases.schema_context ==
                                           schema_context]
            for check, failure_cases in check_failure_cases.items():
                assert check in err_df.check.values
                assert (err_df.loc[err_df.check == check].failure_case.isin(
                    failure_cases).all())
Ejemplo n.º 2
0
         lambda old, new: [
             old.columns["col"].pandas_dtype is Int,
             new.columns["col"].pandas_dtype is String,
         ],
     ],
     *[
         _boolean_update_column_case(bool_kwarg) for bool_kwarg in [
             "nullable",
             "allow_duplicates",
             "coerce",
             "required",
             "regex",
         ]
     ],
     [
         Column(Int, checks=Check.greater_than(0)),
         "col",
         {
             "checks": Check.less_than(10)
         },
         lambda old, new: [
             old.columns["col"].checks == [Check.greater_than(0)],
             new.columns["col"].checks == [Check.less_than(10)],
         ],
     ],
     # error cases
     [Column(Int), "col", {
         "name": "renamed_col"
     }, ValueError],
     [Column(Int), "foobar", {}, ValueError],
 ],