Python PandasDtype Examples

Programming Language: Python

Namespace/Package Name: pandera

Class/Type: PandasDtype

Examples at hotexamples.com: 8

Python PandasDtype - 8 examples found. These are the top rated real world Python examples of pandera.PandasDtype extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

from_pandas_api_type(2)

from_numpy_type(2)

from_str_alias(2)

PandasDtype(1)

from_python_type(1)

Example #1

Show file

File: test_dtypes.py Project: lkadin/pandera

def test_invalid_numpy_type_conversions(type1, type2):
    """Test various numpy dtypes"""
    with pytest.raises(SchemaError):
        helper_type_validation(type1, type2)

    PandasDtype.from_numpy_type(np.float_)
    with pytest.raises(TypeError):
        PandasDtype.from_numpy_type(pd.DatetimeIndex)

Example #2

Show file

def test_numpy_type():
    """Test various numpy dtypes"""
    # Test correct conversions
    valid_types = (
        (np.complex, np.complex),  # Pandas converts complex numbers always to np.complex128
        (np.complex, np.complex128),
        (np.complex128, np.complex),
        (np.complex64, np.complex128),  # Pandas converts complex numbers always to np.complex128
        (np.complex128, np.complex128),
        (np.float, np.float),
        (np.float, np.float64),
        (np.float16, np.float64),  # Pandas converts float numbers always to np.float64
        (np.float32, np.float64),
        (np.float64, np.float64),
        (np.int, np.int),
        (np.int, np.int64),
        (np.int8, np.int64),   # Pandas converts int numbers always to np.int64
        (np.int16, np.int64),
        (np.int32, np.int64),
        (np.int64, np.int64),
        (np.uint, np.int64),  # Pandas converts int numbers always to np.int64
        (np.uint, np.int64),
        (np.uint8, np.int64),
        (np.uint16, np.int64),
        (np.uint32, np.int64),
        (np.uint64, np.int64),
        (np.bool, np.bool),
        (np.str, np.str)
        # np.object, np.void and bytes are not tested
    )

    for valid_type in valid_types:
        try:
            helper_type_validation(valid_type[0], valid_type[1])
        except:  # pylint: disable=bare-except
            # No exceptions since it should cover all exceptions for debug purpose
            # Rerun test with debug inforation
            print(f"Error on types: {valid_type}")
            helper_type_validation(valid_type[0], valid_type[1], True)

    # Examples of types comparisons, which shall fail
    invalid_types = (
        (np.complex, np.int),
        (np.int, np.complex),
        (float, np.complex),
        (np.complex, float),
        (np.int, np.float),
        (np.uint8, np.float),
        (np.complex, str)
    )
    for invalid_type in invalid_types:
        with pytest.raises(SchemaError):
            helper_type_validation(invalid_type[0], invalid_type[1])

    PandasDtype.from_numpy_type(np.float)
    with pytest.raises(TypeError):
        PandasDtype.from_numpy_type(pd.DatetimeIndex)

Example #3

Show file

def test_schema_coerce_inplace_validation(inplace, from_dtype, to_dtype):
    """Test coercion logic for validation when inplace is True and False"""

    to_dtype = PandasDtype.from_python_type(to_dtype).str_alias
    from_dtype = PandasDtype.from_python_type(from_dtype).str_alias

    df = pd.DataFrame({"column": pd.Series([1, 2, 6], dtype=from_dtype)})
    schema = DataFrameSchema({"column": Column(to_dtype, coerce=True)})
    validated_df = schema.validate(df, inplace=inplace)

    assert validated_df["column"].dtype == to_dtype
    if inplace:
        # inplace mutates original dataframe
        assert df["column"].dtype == to_dtype
    else:
        # not inplace preserves original dataframe type
        assert df["column"].dtype == from_dtype

Example #4

Show file

def test_check_types_coerce():
    """Test that check_types return the result of validate."""
    @check_types()
    def transform_in(df: DataFrame[InSchema]):
        return df

    df = transform_in(pd.DataFrame({"a": ["1"]}, index=["1"]))
    expected = InSchema.to_schema().columns["a"].pandas_dtype
    assert PandasDtype(str(df["a"].dtype)) == expected == PandasDtype("int")

    @check_types()
    def transform_out() -> DataFrame[OutSchema]:
        # OutSchema.b should be coerced to an integer.
        return pd.DataFrame({"b": ["1"]})

    out_df = transform_out()
    expected = OutSchema.to_schema().columns["b"].pandas_dtype
    assert (PandasDtype(str(out_df["b"].dtype)) == expected ==
            PandasDtype("int"))

Example #5

Show file

File: test_dtypes.py Project: baskervilski/pandera

def test_pandas_api_type_exception(invalid_pandas_api_type):
    """Test unsupported values for pandas api type conversion."""
    with pytest.raises(TypeError):
        PandasDtype.from_pandas_api_type(invalid_pandas_api_type)

Example #6

Show file

File: test_dtypes.py Project: baskervilski/pandera

def test_pandas_api_types(pandas_api_type, pandas_dtype):
    """Test pandas api type conversion."""
    assert PandasDtype.from_pandas_api_type(pandas_api_type) is pandas_dtype

Example #7

Show file

File: test_dtypes.py Project: baskervilski/pandera

def test_unrecognized_str_aliases(str_alias):
    """Test that unrecognized string aliases are supported."""
    with pytest.raises(TypeError):
        PandasDtype.from_str_alias(str_alias)

Example #8

Show file

# pylint: disable=W0212
"""Unit tests for inferring statistics of pandas objects."""

import pandas as pd
import pytest

import pandera as pa
from pandera import schema_statistics
from pandera import dtypes, PandasDtype


DEFAULT_INT = PandasDtype.from_str_alias(dtypes._DEFAULT_PANDAS_INT_TYPE)
DEFAULT_FLOAT = PandasDtype.from_str_alias(dtypes._DEFAULT_PANDAS_FLOAT_TYPE)


def _create_dataframe(multi_index=False, nullable=False):
    if multi_index:
        index = pd.MultiIndex.from_arrays(
            [[1, 1, 2], ["a", "b", "c"]],
            names=["int_index", "str_index"],
        )
    else:
        index = pd.Index([10, 11, 12], name="int_index")

    df = pd.DataFrame(
        data={
            "int": [1, 2, 3],
            "float": [1., 2., 3.],
            "boolean": [True, False, True],
            "string": ["a", "b", "c"],
            "datetime": pd.to_datetime(["20180101", "20180102", "20180103"]),