def test_deprecate_pandas_dtype(schema_cls, as_pos_arg): """Test that pandas_dtype deprecation warnings/errors are raised.""" assert schema_cls(dtype=int).dtype.check(pa.Int()) assert schema_cls(pandas_dtype=int).dtype.check(pa.Int()) with pytest.warns(DeprecationWarning): schema_cls(pandas_dtype=int) with pytest.raises(pa.errors.SchemaInitError): schema_cls(dtype=int, pandas_dtype=int) if as_pos_arg: assert schema_cls(int).dtype.check(pa.Int()) with pytest.raises(pa.errors.SchemaInitError): schema_cls(int, pandas_dtype=int)
def test_register_check_strategy(data) -> None: """Test registering check strategy on a custom check.""" # pylint: disable=unused-argument def custom_eq_strategy( pandas_dtype: pa.DataType, strategy: st.SearchStrategy = None, *, value: Any, ): return st.just(value).map(strategies.to_numpy_dtype(pandas_dtype).type) # pylint: disable=no-member class CustomCheck(_CheckBase): """Custom check class.""" @classmethod @strategies.register_check_strategy(custom_eq_strategy) @register_check_statistics(["value"]) def custom_equals(cls, value, **kwargs) -> "CustomCheck": """Define a built-in check.""" def _custom_equals(series: pd.Series) -> pd.Series: """Comparison function for check""" return series == value return cls( _custom_equals, name=cls.custom_equals.__name__, error=f"equal_to({value})", **kwargs, ) check = CustomCheck.custom_equals(100) result = data.draw(check.strategy(pa.Int())) assert result == 100
def test_index_example() -> None: """ Test Index schema component example method generates examples that pass. """ data_type = pa.Int() index_schema = pa.Index(data_type, unique=True) for _ in range(10): index_schema(pd.DataFrame(index=index_schema.example()))
def test_index_strategy(data) -> None: """Test Index schema component strategy.""" data_type = pa.Int() index_schema = pa.Index(data_type, unique=True, name="index") strat = index_schema.strategy(size=10) example = data.draw(strat) assert (~example.duplicated()).all() actual_data_type = pandas_engine.Engine.dtype(example.dtype) assert data_type.check(actual_data_type) index_schema(pd.DataFrame(index=example))
# pylint: disable=W0212 """Unit tests for inferring statistics of pandas objects.""" import pandas as pd import pytest import pandera as pa from pandera import dtypes, schema_statistics from pandera.engines import pandas_engine DEFAULT_FLOAT = pandas_engine.Engine.dtype(float) DEFAULT_INT = pandas_engine.Engine.dtype(int) NUMERIC_TYPES = [ pa.Int(), pa.UInt(), pa.Float(), pa.Complex(), pandas_engine.Engine.dtype("Int32"), pandas_engine.Engine.dtype("UInt32"), ] INTEGER_TYPES = [ dtypes.Int(), dtypes.Int8(), dtypes.Int16(), dtypes.Int32(), dtypes.Int64(), dtypes.UInt8(), dtypes.UInt16(), dtypes.UInt32(), dtypes.UInt64(), ]
def test_column_example(): """Test Column schema example method generate examples that pass.""" column_schema = pa.Column(pa.Int(), pa.Check.gt(0), name="column") for _ in range(10): column_schema(column_schema.example())
def test_column_strategy(data) -> None: """Test Column schema strategy.""" column_schema = pa.Column(pa.Int(), pa.Check.gt(0), name="column") column_schema(data.draw(column_schema.strategy()))
def test_series_example() -> None: """Test SeriesSchema example method generate examples that pass.""" series_schema = pa.SeriesSchema(pa.Int(), pa.Check.gt(0)) for _ in range(10): series_schema(series_schema.example())
def test_series_strategy(data) -> None: """Test SeriesSchema strategy.""" series_schema = pa.SeriesSchema(pa.Int(), pa.Check.gt(0)) series_schema(data.draw(series_schema.strategy()))
@hypothesis.given(st.data()) def test_dataframe_checks(data_type, data): """Test dataframe strategy with checks defined at the dataframe level.""" min_value, max_value = data.draw(value_ranges(data_type)) dataframe_schema = pa.DataFrameSchema( {f"{data_type}_col": pa.Column(data_type) for _ in range(5)}, checks=pa.Check.in_range(min_value, max_value), ) strat = dataframe_schema.strategy(size=5) example = data.draw(strat) dataframe_schema(example) @pytest.mark.parametrize("data_type", [pa.Int(), pa.Float, pa.String, pa.DateTime]) @hypothesis.given(st.data()) @hypothesis.settings( suppress_health_check=[hypothesis.HealthCheck.too_slow], ) def test_dataframe_strategy_with_indexes(data_type, data): """Test dataframe strategy with index and multiindex components.""" dataframe_schema_index = pa.DataFrameSchema(index=pa.Index(data_type)) dataframe_schema_multiindex = pa.DataFrameSchema(index=pa.MultiIndex( [pa.Index(data_type, name=f"index{i}") for i in range(3)])) dataframe_schema_index(data.draw(dataframe_schema_index.strategy(size=10))) dataframe_schema_multiindex( data.draw(dataframe_schema_multiindex.strategy(size=10))) @hypothesis.given(st.data())
def test_dataframe_example() -> None: """Test DataFrameSchema example method generate examples that pass.""" schema = pa.DataFrameSchema( {"column": pa.Column(pa.Int(), pa.Check.gt(0))}) for _ in range(10): schema(schema.example())