def test_invalid_numpy_type_conversions(type1, type2): """Test various numpy dtypes""" with pytest.raises(SchemaError): helper_type_validation(type1, type2) PandasDtype.from_numpy_type(np.float_) with pytest.raises(TypeError): PandasDtype.from_numpy_type(pd.DatetimeIndex)
def test_numpy_type(): """Test various numpy dtypes""" # Test correct conversions valid_types = ( (np.complex, np.complex), # Pandas converts complex numbers always to np.complex128 (np.complex, np.complex128), (np.complex128, np.complex), (np.complex64, np.complex128), # Pandas converts complex numbers always to np.complex128 (np.complex128, np.complex128), (np.float, np.float), (np.float, np.float64), (np.float16, np.float64), # Pandas converts float numbers always to np.float64 (np.float32, np.float64), (np.float64, np.float64), (np.int, np.int), (np.int, np.int64), (np.int8, np.int64), # Pandas converts int numbers always to np.int64 (np.int16, np.int64), (np.int32, np.int64), (np.int64, np.int64), (np.uint, np.int64), # Pandas converts int numbers always to np.int64 (np.uint, np.int64), (np.uint8, np.int64), (np.uint16, np.int64), (np.uint32, np.int64), (np.uint64, np.int64), (np.bool, np.bool), (np.str, np.str) # np.object, np.void and bytes are not tested ) for valid_type in valid_types: try: helper_type_validation(valid_type[0], valid_type[1]) except: # pylint: disable=bare-except # No exceptions since it should cover all exceptions for debug purpose # Rerun test with debug inforation print(f"Error on types: {valid_type}") helper_type_validation(valid_type[0], valid_type[1], True) # Examples of types comparisons, which shall fail invalid_types = ( (np.complex, np.int), (np.int, np.complex), (float, np.complex), (np.complex, float), (np.int, np.float), (np.uint8, np.float), (np.complex, str) ) for invalid_type in invalid_types: with pytest.raises(SchemaError): helper_type_validation(invalid_type[0], invalid_type[1]) PandasDtype.from_numpy_type(np.float) with pytest.raises(TypeError): PandasDtype.from_numpy_type(pd.DatetimeIndex)
def test_schema_coerce_inplace_validation(inplace, from_dtype, to_dtype): """Test coercion logic for validation when inplace is True and False""" to_dtype = PandasDtype.from_python_type(to_dtype).str_alias from_dtype = PandasDtype.from_python_type(from_dtype).str_alias df = pd.DataFrame({"column": pd.Series([1, 2, 6], dtype=from_dtype)}) schema = DataFrameSchema({"column": Column(to_dtype, coerce=True)}) validated_df = schema.validate(df, inplace=inplace) assert validated_df["column"].dtype == to_dtype if inplace: # inplace mutates original dataframe assert df["column"].dtype == to_dtype else: # not inplace preserves original dataframe type assert df["column"].dtype == from_dtype
def test_check_types_coerce(): """Test that check_types return the result of validate.""" @check_types() def transform_in(df: DataFrame[InSchema]): return df df = transform_in(pd.DataFrame({"a": ["1"]}, index=["1"])) expected = InSchema.to_schema().columns["a"].pandas_dtype assert PandasDtype(str(df["a"].dtype)) == expected == PandasDtype("int") @check_types() def transform_out() -> DataFrame[OutSchema]: # OutSchema.b should be coerced to an integer. return pd.DataFrame({"b": ["1"]}) out_df = transform_out() expected = OutSchema.to_schema().columns["b"].pandas_dtype assert (PandasDtype(str(out_df["b"].dtype)) == expected == PandasDtype("int"))
def test_pandas_api_type_exception(invalid_pandas_api_type): """Test unsupported values for pandas api type conversion.""" with pytest.raises(TypeError): PandasDtype.from_pandas_api_type(invalid_pandas_api_type)
def test_pandas_api_types(pandas_api_type, pandas_dtype): """Test pandas api type conversion.""" assert PandasDtype.from_pandas_api_type(pandas_api_type) is pandas_dtype
def test_unrecognized_str_aliases(str_alias): """Test that unrecognized string aliases are supported.""" with pytest.raises(TypeError): PandasDtype.from_str_alias(str_alias)
# pylint: disable=W0212 """Unit tests for inferring statistics of pandas objects.""" import pandas as pd import pytest import pandera as pa from pandera import schema_statistics from pandera import dtypes, PandasDtype DEFAULT_INT = PandasDtype.from_str_alias(dtypes._DEFAULT_PANDAS_INT_TYPE) DEFAULT_FLOAT = PandasDtype.from_str_alias(dtypes._DEFAULT_PANDAS_FLOAT_TYPE) def _create_dataframe(multi_index=False, nullable=False): if multi_index: index = pd.MultiIndex.from_arrays( [[1, 1, 2], ["a", "b", "c"]], names=["int_index", "str_index"], ) else: index = pd.Index([10, 11, 12], name="int_index") df = pd.DataFrame( data={ "int": [1, 2, 3], "float": [1., 2., 3.], "boolean": [True, False, True], "string": ["a", "b", "c"], "datetime": pd.to_datetime(["20180101", "20180102", "20180103"]),