Example #1
0
def test_invalid_numpy_type_conversions(type1, type2):
    """Test various numpy dtypes"""
    with pytest.raises(SchemaError):
        helper_type_validation(type1, type2)

    PandasDtype.from_numpy_type(np.float_)
    with pytest.raises(TypeError):
        PandasDtype.from_numpy_type(pd.DatetimeIndex)
Example #2
0
def test_numpy_type():
    """Test various numpy dtypes"""
    # Test correct conversions
    valid_types = (
        (np.complex, np.complex),  # Pandas converts complex numbers always to np.complex128
        (np.complex, np.complex128),
        (np.complex128, np.complex),
        (np.complex64, np.complex128),  # Pandas converts complex numbers always to np.complex128
        (np.complex128, np.complex128),
        (np.float, np.float),
        (np.float, np.float64),
        (np.float16, np.float64),  # Pandas converts float numbers always to np.float64
        (np.float32, np.float64),
        (np.float64, np.float64),
        (np.int, np.int),
        (np.int, np.int64),
        (np.int8, np.int64),   # Pandas converts int numbers always to np.int64
        (np.int16, np.int64),
        (np.int32, np.int64),
        (np.int64, np.int64),
        (np.uint, np.int64),  # Pandas converts int numbers always to np.int64
        (np.uint, np.int64),
        (np.uint8, np.int64),
        (np.uint16, np.int64),
        (np.uint32, np.int64),
        (np.uint64, np.int64),
        (np.bool, np.bool),
        (np.str, np.str)
        # np.object, np.void and bytes are not tested
    )

    for valid_type in valid_types:
        try:
            helper_type_validation(valid_type[0], valid_type[1])
        except:  # pylint: disable=bare-except
            # No exceptions since it should cover all exceptions for debug purpose
            # Rerun test with debug inforation
            print(f"Error on types: {valid_type}")
            helper_type_validation(valid_type[0], valid_type[1], True)

    # Examples of types comparisons, which shall fail
    invalid_types = (
        (np.complex, np.int),
        (np.int, np.complex),
        (float, np.complex),
        (np.complex, float),
        (np.int, np.float),
        (np.uint8, np.float),
        (np.complex, str)
    )
    for invalid_type in invalid_types:
        with pytest.raises(SchemaError):
            helper_type_validation(invalid_type[0], invalid_type[1])

    PandasDtype.from_numpy_type(np.float)
    with pytest.raises(TypeError):
        PandasDtype.from_numpy_type(pd.DatetimeIndex)
Example #3
0
def test_schema_coerce_inplace_validation(inplace, from_dtype, to_dtype):
    """Test coercion logic for validation when inplace is True and False"""

    to_dtype = PandasDtype.from_python_type(to_dtype).str_alias
    from_dtype = PandasDtype.from_python_type(from_dtype).str_alias

    df = pd.DataFrame({"column": pd.Series([1, 2, 6], dtype=from_dtype)})
    schema = DataFrameSchema({"column": Column(to_dtype, coerce=True)})
    validated_df = schema.validate(df, inplace=inplace)

    assert validated_df["column"].dtype == to_dtype
    if inplace:
        # inplace mutates original dataframe
        assert df["column"].dtype == to_dtype
    else:
        # not inplace preserves original dataframe type
        assert df["column"].dtype == from_dtype
Example #4
0
def test_check_types_coerce():
    """Test that check_types return the result of validate."""
    @check_types()
    def transform_in(df: DataFrame[InSchema]):
        return df

    df = transform_in(pd.DataFrame({"a": ["1"]}, index=["1"]))
    expected = InSchema.to_schema().columns["a"].pandas_dtype
    assert PandasDtype(str(df["a"].dtype)) == expected == PandasDtype("int")

    @check_types()
    def transform_out() -> DataFrame[OutSchema]:
        # OutSchema.b should be coerced to an integer.
        return pd.DataFrame({"b": ["1"]})

    out_df = transform_out()
    expected = OutSchema.to_schema().columns["b"].pandas_dtype
    assert (PandasDtype(str(out_df["b"].dtype)) == expected ==
            PandasDtype("int"))
Example #5
0
def test_pandas_api_type_exception(invalid_pandas_api_type):
    """Test unsupported values for pandas api type conversion."""
    with pytest.raises(TypeError):
        PandasDtype.from_pandas_api_type(invalid_pandas_api_type)
Example #6
0
def test_pandas_api_types(pandas_api_type, pandas_dtype):
    """Test pandas api type conversion."""
    assert PandasDtype.from_pandas_api_type(pandas_api_type) is pandas_dtype
Example #7
0
def test_unrecognized_str_aliases(str_alias):
    """Test that unrecognized string aliases are supported."""
    with pytest.raises(TypeError):
        PandasDtype.from_str_alias(str_alias)
Example #8
0
# pylint: disable=W0212
"""Unit tests for inferring statistics of pandas objects."""

import pandas as pd
import pytest

import pandera as pa
from pandera import schema_statistics
from pandera import dtypes, PandasDtype


DEFAULT_INT = PandasDtype.from_str_alias(dtypes._DEFAULT_PANDAS_INT_TYPE)
DEFAULT_FLOAT = PandasDtype.from_str_alias(dtypes._DEFAULT_PANDAS_FLOAT_TYPE)


def _create_dataframe(multi_index=False, nullable=False):
    if multi_index:
        index = pd.MultiIndex.from_arrays(
            [[1, 1, 2], ["a", "b", "c"]],
            names=["int_index", "str_index"],
        )
    else:
        index = pd.Index([10, 11, 12], name="int_index")

    df = pd.DataFrame(
        data={
            "int": [1, 2, 3],
            "float": [1., 2., 3.],
            "boolean": [True, False, True],
            "string": ["a", "b", "c"],
            "datetime": pd.to_datetime(["20180101", "20180102", "20180103"]),