예제 #1
0
def test_dataframe_strategy_with_indexes(pdtype, data):
    """Test dataframe strategy with index and multiindex components."""
    dataframe_schema_index = pa.DataFrameSchema(index=pa.Index(pdtype))
    dataframe_schema_multiindex = pa.DataFrameSchema(index=pa.MultiIndex(
        [pa.Index(pdtype, name=f"index{i}") for i in range(3)]))

    dataframe_schema_index(data.draw(dataframe_schema_index.strategy(size=10)))
    dataframe_schema_multiindex(
        data.draw(dataframe_schema_multiindex.strategy(size=10)))
예제 #2
0
def test_dataframeschema():
    """Test that DataFrameSchema is compatible with pydantic."""
    assert isinstance(
        DataFrameSchemaPydantic(
            pa_schema=pa.DataFrameSchema(),
            pa_mi=pa.MultiIndex([pa.Index(str), pa.Index(int)]),
        ),
        DataFrameSchemaPydantic,
    )
예제 #3
0
def _create_schema(index="single"):

    if index == "multi":
        index = pa.MultiIndex([
            pa.Index(pa.Int, name="int_index0"),
            pa.Index(pa.Int, name="int_index1"),
            pa.Index(pa.Int, name="int_index2"),
        ])
    elif index == "single":
        index = pa.Index(pa.Int, name="int_index")
    else:
        index = None

    return pa.DataFrameSchema(columns={
        "int_column":
        pa.Column(
            pa.Int,
            checks=[
                pa.Check.greater_than(0),
                pa.Check.less_than(10),
                pa.Check.in_range(0, 10),
            ],
        ),
        "float_column":
        pa.Column(
            pa.Float,
            checks=[
                pa.Check.greater_than(-10),
                pa.Check.less_than(20),
                pa.Check.in_range(-10, 20),
            ],
        ),
        "str_column":
        pa.Column(
            pa.String,
            checks=[
                pa.Check.isin(["foo", "bar", "x", "xy"]),
                pa.Check.str_length(1, 3)
            ],
        ),
        "datetime_column":
        pa.Column(pa.DateTime,
                  checks=[
                      pa.Check.greater_than(pd.Timestamp("20100101")),
                      pa.Check.less_than(pd.Timestamp("20200101")),
                  ]),
        "timedelta_column":
        pa.Column(pa.Timedelta,
                  checks=[
                      pa.Check.greater_than(pd.Timedelta(1000, unit="ns")),
                      pa.Check.less_than(pd.Timedelta(10000, unit="ns")),
                  ]),
    },
                              index=index,
                              coerce=False,
                              strict=True)
예제 #4
0
def test_multiindex():
    """Test that multiple Index annotations create a MultiIndex."""
    class Schema(pa.SchemaModel):
        a: Index[int] = pa.Field(gt=0)
        b: Index[str]

    expected = pa.DataFrameSchema(index=pa.MultiIndex([
        pa.Index(int, name="a", checks=pa.Check.gt(0)),
        pa.Index(str, name="b"),
    ]))
    assert expected == Schema.to_schema()
예제 #5
0
def test_multiindex_example():
    """
    Test MultiIndex schema component example method generates examples that
    pass.
    """
    pdtype = pa.PandasDtype.Float
    multiindex = pa.MultiIndex(indexes=[
        pa.Index(pdtype, allow_duplicates=False, name="level_0"),
        pa.Index(pdtype, nullable=True),
        pa.Index(pdtype),
    ])
    for _ in range(10):
        example = multiindex.example()
        multiindex(pd.DataFrame(index=example))
예제 #6
0
def test_multiindex_example() -> None:
    """
    Test MultiIndex schema component example method generates examples that
    pass.
    """
    data_type = pa.Float()
    multiindex = pa.MultiIndex(indexes=[
        pa.Index(data_type, unique=True, name="level_0"),
        pa.Index(data_type, nullable=True),
        pa.Index(data_type),
    ])
    for _ in range(10):
        example = multiindex.example()
        multiindex(pd.DataFrame(index=example))
예제 #7
0
def test_multiindex_strategy(data):
    """Test MultiIndex schema component strategy."""
    pdtype = pa.PandasDtype.Float
    multiindex = pa.MultiIndex(indexes=[
        pa.Index(pdtype, allow_duplicates=False, name="level_0"),
        pa.Index(pdtype, nullable=True),
        pa.Index(pdtype),
    ])
    strat = multiindex.strategy(size=10)
    example = data.draw(strat)
    for i in range(example.nlevels):
        assert example.get_level_values(i).dtype == pdtype.str_alias

    with pytest.raises(pa.errors.BaseStrategyOnlyError):
        strategies.multiindex_strategy(
            pdtype, strategies.pandas_dtype_strategy(pdtype))
예제 #8
0
def test_config() -> None:
    """Test that Config can be inherited and translate into DataFrameSchema options."""
    class Base(pa.SchemaModel):
        a: Series[int]
        idx_1: Index[str]
        idx_2: Index[str]

        class Config:
            name = "Base schema"
            coerce = True
            ordered = True
            multiindex_coerce = True
            multiindex_strict = True
            multiindex_name: Optional[str] = "mi"

    class Child(Base):
        b: Series[int]

        class Config:
            name = "Child schema"
            strict = True
            multiindex_strict = False
            description = "foo"
            title = "bar"

    expected = pa.DataFrameSchema(
        columns={
            "a": pa.Column(int),
            "b": pa.Column(int)
        },
        index=pa.MultiIndex(
            [pa.Index(str, name="idx_1"),
             pa.Index(str, name="idx_2")],
            coerce=True,
            strict=False,
            name="mi",
        ),
        name="Child schema",
        coerce=True,
        strict=True,
        ordered=True,
        description="foo",
        title="bar",
    )

    assert expected == Child.to_schema()
예제 #9
0
def test_multiindex_strategy(data) -> None:
    """Test MultiIndex schema component strategy."""
    data_type = pa.Float()
    multiindex = pa.MultiIndex(indexes=[
        pa.Index(data_type, unique=True, name="level_0"),
        pa.Index(data_type, nullable=True),
        pa.Index(data_type),
    ])
    strat = multiindex.strategy(size=10)
    example = data.draw(strat)
    for i in range(example.nlevels):
        actual_data_type = pandas_engine.Engine.dtype(
            example.get_level_values(i).dtype)
        assert data_type.check(actual_data_type)

    with pytest.raises(pa.errors.BaseStrategyOnlyError):
        strategies.multiindex_strategy(
            data_type, strategies.pandas_dtype_strategy(data_type))
예제 #10
0
def test_field_name_access_inherit():
    """Test that column and index names can be accessed through the class"""
    class Base(pa.SchemaModel):
        a: Series[int]
        b: Series[int] = pa.Field()
        c: Series[int] = pa.Field(alias="_c")
        d: Series[int] = pa.Field(alias=123)
        i1: Index[int]
        i2: Index[int] = pa.Field()

    class Child(Base):
        b: Series[str] = pa.Field(alias="_b")
        c: Series[str]
        d: Series[str] = pa.Field()
        extra1: Series[int]
        extra2: Series[int] = pa.Field()
        extra3: Series[int] = pa.Field(alias="_extra3")
        i1: Index[str]
        i3: Index[int] = pa.Field(alias="_i3")

    expected_base = pa.DataFrameSchema(
        columns={
            "a": pa.Column(int),
            "b": pa.Column(int),
            "_c": pa.Column(int),
            123: pa.Column(int),
        },
        index=pa.MultiIndex([
            pa.Index(int, name="i1"),
            pa.Index(int, name="i2"),
        ]),
    )

    expected_child = pa.DataFrameSchema(
        columns={
            "a": pa.Column(int),
            "_b": pa.Column(str),
            "c": pa.Column(str),
            "d": pa.Column(str),
            "extra1": pa.Column(int),
            "extra2": pa.Column(int),
            "_extra3": pa.Column(int),
        },
        index=pa.MultiIndex([
            pa.Index(str, name="i1"),
            pa.Index(int, name="i2"),
            pa.Index(int, name="_i3"),
        ]),
    )

    assert expected_base == Base.to_schema()
    assert expected_child == Child.to_schema()
    assert Child.a == "a"  # pylint:disable=no-member
    assert Child.b == "_b"
    assert Child.c == "c"
    assert Child.d == "d"
    assert Child.extra1 == "extra1"
    assert Child.extra2 == "extra2"
    assert Child.extra3 == "_extra3"
    assert Child.i1 == "i1"
    assert Child.i2 == "i2"
    assert Child.i3 == "_i3"
예제 #11
0
def _create_schema(index="single"):

    if index == "multi":
        index = pa.MultiIndex([
            pa.Index(pa.Int, name="int_index0"),
            pa.Index(pa.Int, name="int_index1"),
            pa.Index(pa.Int, name="int_index2"),
        ])
    elif index == "single":
        # make sure io modules can handle case when index name is None
        index = pa.Index(pa.Int, name=None)
    else:
        index = None

    return pa.DataFrameSchema(
        columns={
            "int_column":
            pa.Column(
                pa.Int,
                checks=[
                    pa.Check.greater_than(0),
                    pa.Check.less_than(10),
                    pa.Check.in_range(0, 10),
                ],
            ),
            "float_column":
            pa.Column(
                pa.Float,
                checks=[
                    pa.Check.greater_than(-10),
                    pa.Check.less_than(20),
                    pa.Check.in_range(-10, 20),
                ],
            ),
            "str_column":
            pa.Column(
                pa.String,
                checks=[
                    pa.Check.isin(["foo", "bar", "x", "xy"]),
                    pa.Check.str_length(1, 3),
                ],
            ),
            "datetime_column":
            pa.Column(
                pa.DateTime,
                checks=[
                    pa.Check.greater_than(pd.Timestamp("20100101")),
                    pa.Check.less_than(pd.Timestamp("20200101")),
                ],
            ),
            "timedelta_column":
            pa.Column(
                pa.Timedelta,
                checks=[
                    pa.Check.greater_than(pd.Timedelta(1000, unit="ns")),
                    pa.Check.less_than(pd.Timedelta(10000, unit="ns")),
                ],
            ),
            "optional_props_column":
            pa.Column(
                pa.String,
                nullable=True,
                allow_duplicates=True,
                coerce=True,
                required=False,
                regex=True,
                checks=[pa.Check.str_length(1, 3)],
            ),
        },
        index=index,
        coerce=False,
        strict=True,
    )