def test_required_column_failure(): df = _get_test_dataframe() # drop a column df = df[["primary_key", "letter"]] with pytest.raises(ValueError): dataframe.validate( df, require_present_columns=["primary_key", "letter", "produce"])
def test_failure_dict_return(): df = _get_test_dataframe() df = df[["primary_key", "letter"]] result = dataframe.validate(df, raise_error=False, require_present_columns=["produce"]) assert not result.passed assert "produce" in result.details["failed_present_columns"]
def test_successful_validation(): df = _get_test_dataframe() result = dataframe.validate( df, require_present_columns=["primary_key", "letter", "produce"], require_unique_columns=["primary_key"], require_nonnull_columns=["primary_key", "letter", "produce"], ) assert result.passed assert len(result.details) == 0
def main() -> ValidationCheck: """Checks the columns of a DataFrame Returns: ValidationCheck object consisting of Pass/Fail and dict of failed columns """ df1 = pd.DataFrame({ "a": [1, 1, 1, 1], "b": [1, 1, 2, 2], "c": [2, 2, 2, 2], "d": [3, 3, 3, 3] }) failed_checks_dict = dataframe.validate( df1, require_present_columns=["a", "b"], require_unique_columns=["d"], require_nonnull_columns=["b", "c"], custom_checks=((ensure_not_2, ["a", "b"]), (ensure_not_3, ["a", "d"])), ) return failed_checks_dict
def test_check_custom(): def ensure_not_2(value): return value != 2 def ensure_3(value): return value == 3 df_check = pd.DataFrame({ "a": [1, 1, 1, 1], "b": [2, 2, 2, 2], "c": [3, 3, 3, 3] }) dataframe.validate(df_check, custom_checks=((ensure_not_2, ["a"]), (ensure_3, ["c" ]))) with pytest.raises(ValueError): dataframe.validate(df_check, custom_checks=((ensure_3, ["c"]), (ensure_not_2, ["b"]))) with pytest.raises(ValueError): dataframe.validate(df_check, custom_checks=((ensure_not_2, ["b"]), ))
def test_nonnull_column_failure(): df = _get_test_dataframe() # introduce an empty value df.loc[0, "produce"] = None with pytest.raises(ValueError): dataframe.validate(df, require_nonnull_columns=["produce"])
def test_unique_column_failure(): df = _get_test_dataframe() # introduce a repeated value df.loc[1, "primary_key"] = "1" with pytest.raises(ValueError): dataframe.validate(df, require_unique_columns=["primary_key"])