Esempi in Python per Column, esempi in Python per pandera.Column

Esempio n. 1

0

Mostra file

def _create_schema_python_types():
    return pa.DataFrameSchema({
        "int_column": pa.Column(int),
        "float_column": pa.Column(float),
        "str_column": pa.Column(str),
        "object_column": pa.Column(object),
    })

Esempio n. 2

0

Mostra file

File: test_schemas_on_modin.py Progetto: tfwillems/pandera

def test_unique():
    """Test uniqueness checks on modin dataframes."""
    schema = pa.DataFrameSchema({"field": pa.Column(int)}, unique=["field"])
    column_schema = pa.Column(int, unique=True, name="field")
    series_schema = pa.SeriesSchema(int, unique=True, name="field")

    data_unique = mpd.DataFrame({"field": [1, 2, 3]})
    data_non_unique = mpd.DataFrame({"field": [1, 1, 1]})

    assert isinstance(schema(data_unique), mpd.DataFrame)
    assert isinstance(column_schema(data_unique), mpd.DataFrame)
    assert isinstance(series_schema(data_unique["field"]), mpd.Series)

    with pytest.raises(pa.errors.SchemaError, match="columns .+ not unique"):
        schema(data_non_unique)
    with pytest.raises(pa.errors.SchemaError,
                       match="series .+ contains duplicate values"):
        column_schema(data_non_unique)
    with pytest.raises(pa.errors.SchemaError,
                       match="series .+ contains duplicate values"):
        series_schema(data_non_unique["field"])

    schema.unique = None
    column_schema.unique = False
    series_schema.unique = False

    assert isinstance(schema(data_non_unique), mpd.DataFrame)
    assert isinstance(column_schema(data_non_unique), mpd.DataFrame)
    assert isinstance(series_schema(data_non_unique["field"]), mpd.Series)

Esempio n. 3

0

Mostra file

File: invoice.py Progetto: Thelin90/geomsagi

def create_invoice_stats_schema(coerce: bool = True,
                                strict: bool = True,
                                nullable: bool = True):
    """Function to validate that invoice stats schema is correct, it also does value checks in
    runtime (really nice stuff, right here).

    Args:
        coerce (bool): Flag given to determine whether to coerce series to specified type
        strict (bool): Flag given to determine whether or not to accept columns in the
            dataframe that are not in the DataFrame
        nullable (bool): If columns should be nullable or not

    Returns: A pandas DataFrame schema that validates that the types are correct

    """
    return pa.DataFrameSchema(
        {
            INVOICE_STATS_COLUMN_NAMES.get("invoice_median"):
            pa.Column(pa.Float64, nullable=nullable),
            INVOICE_STATS_COLUMN_NAMES.get("invoice_mean"):
            pa.Column(pa.Float64, nullable=nullable),
        },
        index=pa.Index(pa.Int),
        strict=strict,
        coerce=coerce,
    )

Esempio n. 4

0

Mostra file

File: test_dagster_pandera.py Progetto: trevenrawr/dagster

def sample_dataframe_schema(**kwargs):
    return pa.DataFrameSchema(
        {
            "a":
            pa.Column(int, checks=pa.Check.le(10), description="a desc"),
            "b":
            pa.Column(float, checks=pa.Check.lt(-1.2), description="b desc"),
            "c":
            pa.Column(
                str,
                description="c desc",
                checks=[
                    pa.Check.str_startswith("value_"),
                    pa.Check(
                        lambda s: s.str.split("_", expand=True).shape[1] == 2,
                        description="Two words separated by underscore",
                    ),
                ],
            ),
        },
        checks=[
            pa.Check(lambda df: df["a"].sum() > df["b"].sum(),
                     description="sum(a) > sum(b)"),
        ],
        **kwargs,
    )

Esempio n. 5

0

Mostra file

File: event.py Progetto: Thelin90/heuleum

def create_event_schema(
    coerce: bool = True,
    strict: bool = True,
    nullable: bool = False,
):
    """Function to validate that event schema is correct, it also does value checks in runtime
    (really nice stuff, right here). If this fails, then write to dead letter queue.

    Args:
        coerce (bool): Flag given to determine whether to coerce series to specified type
        strict (bool): Flag given to determine whether or not to accept columns in the
            dataframe that are not in the DataFrame
        nullable (bool): If columns should be nullable or not

    Returns: A pandas DataFrame schema that validates that the types are correct, and that the
    values inserted are correct.

    """
    return pa.DataFrameSchema(
        {
            "id": pa.Column(pa.String, nullable=nullable),
            "timestamp": pa.Column(pa.DateTime, nullable=nullable),
            "version": pa.Column(pa.String, nullable=nullable),
        },
        index=pa.Index(pa.Int),
        strict=strict,
        coerce=coerce,
    )

Esempio n. 6

0

Mostra file

def test_to_script_lambda_check():
    """Test writing DataFrameSchema to a script with lambda check."""
    schema1 = pandera.DataFrameSchema(
        {
            "a": pandera.Column(
                pandera.Int,
                checks=pandera.Check(
                    lambda s: s.mean() > 5, element_wise=False
                ),
            ),
        }
    )

    with pytest.warns(UserWarning):
        pandera.io.to_script(schema1)

    schema2 = pandera.DataFrameSchema(
        {
            "a": pandera.Column(
                pandera.Int,
            ),
        },
        checks=pandera.Check(lambda s: s.mean() > 5, element_wise=False),
    )

    with pytest.warns(UserWarning, match=".*registered checks.*"):
        pandera.io.to_script(schema2)

Esempio n. 7

0

Mostra file

def test_serialize_deserialize_custom_datetime_checks():
    """
    Test that custom checks for datetime columns can be serialized and
    deserialized
    """

    # pylint: disable=unused-variable,unused-argument
    @pandera.extensions.register_check_method(statistics=["stat"])
    def datetime_check(pandas_obj, *, stat):
        ...

    schema = pandera.DataFrameSchema(
        {
            "dt_col": pandera.Column(
                pandera.DateTime,
                checks=pandera.Check.datetime_check("foobar"),
            ),
            "td_col": pandera.Column(
                pandera.Timedelta,
                checks=pandera.Check.datetime_check("foobar"),
            ),
        }
    )
    yaml_schema = schema.to_yaml()
    schema_from_yaml = schema.from_yaml(yaml_schema)
    assert schema_from_yaml == schema

Esempio n. 8

0

Mostra file

def test_register_custom_groupby_check(custom_check_teardown: None) -> None:
    """Test registering a custom groupby check."""
    @extensions.register_check_method(
        statistics=["group_a", "group_b"],
        supported_types=(pd.Series, pd.DataFrame),
        check_type="groupby",
    )
    def custom_check(dict_groups, *, group_a, group_b):
        """
        Test that the mean values in group A is larger than that of group B.

        Note that this function can handle groups of both dataframes and
        series.
        """
        return (dict_groups[group_a].values.mean() >
                dict_groups[group_b].values.mean())

    # column groupby check
    data_column_check = pd.DataFrame({
        "col1": [20, 20, 10, 10],
        "col2": list("aabb"),
    })

    schema_column_check = pa.DataFrameSchema({
        "col1":
        pa.Column(
            int,
            Check.custom_check(group_a="a", group_b="b", groupby="col2"),
        ),
        "col2":
        pa.Column(str),
    })
    assert isinstance(schema_column_check(data_column_check), pd.DataFrame)

    # dataframe groupby check
    data_df_check = pd.DataFrame(
        {
            "col1": [20, 20, 10, 10],
            "col2": [30, 30, 5, 5],
            "col3": [10, 10, 1, 1],
        },
        index=pd.Index(list("aabb"), name="my_index"),
    )
    schema_df_check = pa.DataFrameSchema(
        columns={
            "col1": pa.Column(int),
            "col2": pa.Column(int),
            "col3": pa.Column(int),
        },
        index=pa.Index(str, name="my_index"),
        checks=Check.custom_check(group_a="a", group_b="b",
                                  groupby="my_index"),
    )
    assert isinstance(schema_df_check(data_df_check), pd.DataFrame)

    for kwargs in [{"element_wise": True}, {"element_wise": False}]:
        with pytest.warns(UserWarning):
            Check.custom_check(val=10, **kwargs)

Esempio n. 9

0

Mostra file

def _create_schema(index="single"):

    if index == "multi":
        index = pa.MultiIndex([
            pa.Index(pa.Int, name="int_index0"),
            pa.Index(pa.Int, name="int_index1"),
            pa.Index(pa.Int, name="int_index2"),
        ])
    elif index == "single":
        index = pa.Index(pa.Int, name="int_index")
    else:
        index = None

    return pa.DataFrameSchema(columns={
        "int_column":
        pa.Column(
            pa.Int,
            checks=[
                pa.Check.greater_than(0),
                pa.Check.less_than(10),
                pa.Check.in_range(0, 10),
            ],
        ),
        "float_column":
        pa.Column(
            pa.Float,
            checks=[
                pa.Check.greater_than(-10),
                pa.Check.less_than(20),
                pa.Check.in_range(-10, 20),
            ],
        ),
        "str_column":
        pa.Column(
            pa.String,
            checks=[
                pa.Check.isin(["foo", "bar", "x", "xy"]),
                pa.Check.str_length(1, 3)
            ],
        ),
        "datetime_column":
        pa.Column(pa.DateTime,
                  checks=[
                      pa.Check.greater_than(pd.Timestamp("20100101")),
                      pa.Check.less_than(pd.Timestamp("20200101")),
                  ]),
        "timedelta_column":
        pa.Column(pa.Timedelta,
                  checks=[
                      pa.Check.greater_than(pd.Timedelta(1000, unit="ns")),
                      pa.Check.less_than(pd.Timedelta(10000, unit="ns")),
                  ]),
    },
                              index=index,
                              coerce=False,
                              strict=True)

Esempio n. 10

0

Mostra file

File: test_pandera.py Progetto: jeffzi/pandas-select

def test_schema_selector(df, attrs, expected):
    schema = pa.DataFrameSchema({
        "a":
        pa.Column(int, regex=True, nullable=False),
        "b":
        pa.Column(int, required=False, nullable=True),
    })
    df = schema.validate(df)
    selector = SchemaSelector(**attrs)
    assert_col_indexer(df, selector, expected)

Esempio n. 11

0

Mostra file

File: pio.py Progetto: ApfeldLab/pharynx_redox

def validate_movement_annotations(mvmt_tbl: pd.DataFrame) -> pd.DataFrame:
    mvmt_tbl_schema = pa.DataFrameSchema(
        {
            "animal": pa.Column(pa.Int, pa.Check.greater_than_or_equal_to(0)),
            "timepoint": pa.Column(pa.Int, pa.Check.greater_than_or_equal_to(0)),
            "pair": pa.Column(pa.Int, pa.Check.greater_than_or_equal_to(0)),
            "mvmt-*": pa.Column(pa.Int, regex=True),
        },
        strict=True,
    )

    return mvmt_tbl_schema.validate(mvmt_tbl)

Esempio n. 12

0

Mostra file

def test_dataframe_unique(size, data) -> None:
    """Test that DataFrameSchemas with unique columns are actually unique."""
    schema = pa.DataFrameSchema(
        {
            "col1": pa.Column(int),
            "col2": pa.Column(float),
            "col3": pa.Column(str),
            "col4": pa.Column(int),
        },
        unique=["col1", "col2", "col3"],
    )
    df_sample = data.draw(schema.strategy(size=size))
    schema(df_sample)

Esempio n. 13

0

Mostra file

File: test_data_import.py Progetto: RikoNyberg/meal_planner

 def test_csv_download(self):
     df = data_import.download_csv()
     schema_csv_download = pa.DataFrameSchema({
         'name':
         pa.Column(pa.String),
         'energy,calculated (kJ)':
         pa.Column(
             pa.Int,
             pa.Check(lambda x: 0 <= x <= 4000,
                      element_wise=True,
                      error="kJ range checker [0, 2000]")),
         'fat, total (g)':
         pa.Column(pa.String),
         'carbohydrate, available (g)':
         pa.Column(pa.String),
         'protein, total (g)':
         pa.Column(pa.String),
         # 'fibre, total (g)': pa.Column(), # can have NaN values
         'sugars, total (g)':
         pa.Column(pa.String),
         'alcohol (g)':
         pa.Column(pa.String),
         # 'sodium (mg)': pa.Column(), # can have NaN values
         'salt (mg)':
         pa.Column(pa.String),
     })
     df_valid = schema_csv_download.validate(df)
     self.assertTrue(1000 in df_valid.index)

Esempio n. 14

0

Mostra file

File: test_pandera.py Progetto: jeffzi/pandas-select

def test_schema_selector_multi_index(df_mi, attrs, expected):
    schema = pa.DataFrameSchema({
        ("int", "number"):
        pa.Column(int, nullable=True),
        ("float", "number"):
        pa.Column(float, nullable=True),
        ("category", "nominal"):
        pa.Column(str, required=False),
        ("string", "nominal"):
        pa.Column(str, required=False, nullable=True),
    })
    df = schema.validate(df_mi)
    selector = SchemaSelector(**attrs)
    assert_col_indexer(df, selector, expected)

Esempio n. 15

0

Mostra file

File: test_schemas_on_modin.py Progetto: tfwillems/pandera

def test_required_column():
    """Test the required column raises error."""
    required_schema = pa.DataFrameSchema(
        {"field": pa.Column(int, required=True)})
    schema = pa.DataFrameSchema({"field_": pa.Column(int, required=False)})

    data = mpd.DataFrame({"field": [1, 2, 3]})

    assert isinstance(required_schema(data), mpd.DataFrame)
    assert isinstance(schema(data), mpd.DataFrame)

    with pytest.raises(pa.errors.SchemaError):
        required_schema(mpd.DataFrame({"another_field": [1, 2, 3]}))
    schema(mpd.DataFrame({"another_field": [1, 2, 3]}))

Esempio n. 16

0

Mostra file

File: test_model.py Progetto: tfwillems/pandera

def test_empty_dtype() -> None:
    expected = pa.DataFrameSchema({"empty_column": pa.Column()})

    class EmptyDtypeSchema(pa.SchemaModel):
        empty_column: pa.typing.Series

    assert EmptyDtypeSchema.to_schema() == expected

Esempio n. 17

0

Mostra file

File: test_schemas_on_modin.py Progetto: tfwillems/pandera

def test_dataframe_schema_case(coerce):
    """Test a simple schema case."""
    schema = pa.DataFrameSchema(
        {
            "int_column": pa.Column(int, pa.Check.ge(0)),
            "float_column": pa.Column(float, pa.Check.le(0)),
            "str_column": pa.Column(str, pa.Check.isin(list("abcde"))),
        },
        coerce=coerce,
    )
    mdf = mpd.DataFrame({
        "int_column": range(10),
        "float_column": [float(-x) for x in range(10)],
        "str_column": list("aabbcceedd"),
    })
    assert isinstance(schema.validate(mdf), mpd.DataFrame)

Esempio n. 18

0

Mostra file

File: test_schemas_on_modin.py Progetto: tfwillems/pandera

def test_strict_schema():
    """Test schema strictness."""
    strict_schema = pa.DataFrameSchema({"field": pa.Column()}, strict=True)
    non_strict_schema = pa.DataFrameSchema({"field": pa.Column()})

    strict_df = mpd.DataFrame({"field": [1]})
    non_strict_df = mpd.DataFrame({"field": [1], "foo": [2]})

    strict_schema(strict_df)
    non_strict_schema(strict_df)

    with pytest.raises(pa.errors.SchemaError,
                       match="column 'foo' not in DataFrameSchema"):
        strict_schema(non_strict_df)

    non_strict_schema(non_strict_df)

Esempio n. 19

0

Mostra file

File: test_schema_inference.py Progetto: vshulyak/pandera

def test_infer_dataframe_schema(multi_index):
    """Test dataframe schema is correctly inferred."""
    dataframe = _create_dataframe(multi_index=multi_index)
    schema = schema_inference.infer_dataframe_schema(dataframe)
    assert isinstance(schema, pa.DataFrameSchema)

    if multi_index:
        assert isinstance(schema.index, pa.MultiIndex)
    else:
        assert isinstance(schema.index, pa.Index)

    with pytest.warns(
            UserWarning,
            match="^This .+ is an inferred schema that hasn't been modified"):
        schema.validate(dataframe)

    # modifying an inferred schema should set _is_inferred to False
    schema_with_added_cols = schema.add_columns(
        {"foo": pa.Column(pa.String)})
    assert schema._is_inferred
    assert not schema_with_added_cols._is_inferred
    assert isinstance(
        schema_with_added_cols.validate(dataframe.assign(foo="a")),
        pd.DataFrame)

    schema_with_removed_cols = schema.remove_columns(["int"])
    assert schema._is_inferred
    assert not schema_with_removed_cols._is_inferred
    assert isinstance(
        schema_with_removed_cols.validate(dataframe.drop("int", axis=1)),
        pd.DataFrame)

Esempio n. 20

0

Mostra file

    def __init__(self, raw_data: pd.DataFrame):

        # Limit word usage to their specific pos.
        schema = pa.DataFrameSchema({
            "word": pa.Column(pa.String),
            "pos": pa.Column(pa.String)
        })
        schema.validate(raw_data)
        self.raw_data = raw_data
        # self.words = words
        # self.pos = pos
        # Use WordNet to map a word to it's synonym set.
        self.synset_map = self.get_synset_map(raw_data=raw_data)
        self.synset_map = self.get_synonyms(synset_map=self.synset_map)
        # self.word_df = pd.DataFrame()  # maps word to synonym, def, pos
        self.syn_to_word = defaultdict(list)  # maps a synonym to word.

Esempio n. 21

0

Mostra file

File: test_geopandas.py Progetto: tfwillems/pandera

def test_dataframe_schema():
    """Test that DataFrameSchema works on GeoDataFrames."""
    geo_df = gpd.GeoDataFrame(
        {
            "geometry": [
                Polygon(((0, 0), (0, 1), (1, 1), (1, 0))),
                Polygon(((0, 0), (0, -1), (-1, -1), (-1, 0))),
            ],
        }
    )

    for geo_schema in [
        pa.DataFrameSchema({"geometry": pa.Column("geometry")}),
        pa.DataFrameSchema({"geometry": pa.Column(gpd.array.GeometryDtype)}),
        pa.DataFrameSchema({"geometry": pa.Column(gpd.array.GeometryDtype())}),
    ]:
        assert isinstance(geo_schema.validate(geo_df), gpd.GeoDataFrame)

Esempio n. 22

0

Mostra file

def _test_literal_pandas_dtype(
    model: Type[SchemaModel], pandas_dtype: PandasDtype
):
    schema = model.to_schema()
    assert (
        schema.columns["col"].dtype
        == pa.Column(pandas_dtype, name="col").dtype
    )

Esempio n. 23

0

Mostra file

File: test_model.py Progetto: tfwillems/pandera

def test_config() -> None:
    """Test that Config can be inherited and translate into DataFrameSchema options."""
    class Base(pa.SchemaModel):
        a: Series[int]
        idx_1: Index[str]
        idx_2: Index[str]

        class Config:
            name = "Base schema"
            coerce = True
            ordered = True
            multiindex_coerce = True
            multiindex_strict = True
            multiindex_name: Optional[str] = "mi"

    class Child(Base):
        b: Series[int]

        class Config:
            name = "Child schema"
            strict = True
            multiindex_strict = False
            description = "foo"
            title = "bar"

    expected = pa.DataFrameSchema(
        columns={
            "a": pa.Column(int),
            "b": pa.Column(int)
        },
        index=pa.MultiIndex(
            [pa.Index(str, name="idx_1"),
             pa.Index(str, name="idx_2")],
            coerce=True,
            strict=False,
            name="mi",
        ),
        name="Child schema",
        coerce=True,
        strict=True,
        ordered=True,
        description="foo",
        title="bar",
    )

    assert expected == Child.to_schema()

Esempio n. 24

0

Mostra file

File: test_schemas_on_koalas.py Progetto: tfwillems/pandera

def test_nullable(
    dtype: pandas_engine.DataType,
    data: st.DataObject,
):
    """Test nullable checks on koalas dataframes."""
    checks = None
    if dtypes.is_datetime(type(dtype)) and MIN_TIMESTAMP is not None:
        checks = [pa.Check.gt(MIN_TIMESTAMP)]
    nullable_schema = pa.DataFrameSchema(
        {"field": pa.Column(dtype, checks=checks, nullable=True)})
    nonnullable_schema = pa.DataFrameSchema(
        {"field": pa.Column(dtype, checks=checks, nullable=False)})
    null_sample = data.draw(nullable_schema.strategy(size=5))
    nonnull_sample = data.draw(nonnullable_schema.strategy(size=5))

    # for some reason values less than MIN_TIMESTAMP are still sampled.
    if dtype is pandas_engine.DateTime or isinstance(dtype,
                                                     pandas_engine.DateTime):
        if MIN_TIMESTAMP is not None and (null_sample < MIN_TIMESTAMP).any(
                axis=None):
            with pytest.raises(OverflowError,
                               match="mktime argument out of range"):
                ks.DataFrame(null_sample)
            return
        if MIN_TIMESTAMP is not None and (nonnull_sample < MIN_TIMESTAMP).any(
                axis=None):
            with pytest.raises(OverflowError,
                               match="mktime argument out of range"):
                ks.DataFrame(nonnull_sample)
            return
    else:
        try:
            ks_null_sample = ks.DataFrame(null_sample)
        except TypeError as exc:
            if "can not accept object <NA> in type" not in exc.args[0]:
                raise
            pytest.skip("koalas cannot handle native pd.NA type with dtype "
                        f"{dtype.type}")
        ks_nonnull_sample = ks.DataFrame(nonnull_sample)
        n_nulls = ks_null_sample.isna().sum().item()
        assert ks_nonnull_sample.notna().all().item()
        assert n_nulls >= 0
        if n_nulls > 0:
            with pytest.raises(pa.errors.SchemaError):
                nonnullable_schema(ks_null_sample)

Esempio n. 25

0

Mostra file

def _create_schema_null_index():

    return pa.DataFrameSchema(columns={
        "float_column":
        pa.Column(pa.Float,
                  checks=[
                      pa.Check.greater_than(-10),
                      pa.Check.less_than(20),
                      pa.Check.in_range(-10, 20),
                  ]),
        "str_column":
        pa.Column(pa.String,
                  checks=[
                      pa.Check.isin(["foo", "bar", "x", "xy"]),
                      pa.Check.str_length(1, 3)
                  ]),
    },
                              index=None)

Esempio n. 26

0

Mostra file

File: test_strategies.py Progetto: lkadin/pandera

def test_unsatisfiable_checks():
    """Test that unsatisfiable checks raise an exception."""
    schema = pa.DataFrameSchema(columns={
        "col1":
        pa.Column(int, checks=[pa.Check.gt(0), pa.Check.lt(0)])
    })
    for _ in range(5):
        with pytest.raises(hypothesis.errors.Unsatisfiable):
            schema.example(size=10)

Esempio n. 27

0

Mostra file

def helper_type_validation(dataframe_type, schema_type, debugging=False):
    """Helper function for using same or different dtypes for the dataframe and the schema_type"""
    df = pd.DataFrame({"column1": [dataframe_type(1)]})
    if debugging:
        print(dataframe_type, df.column1)
    schema = pa.DataFrameSchema({"column1": pa.Column(schema_type)})
    if debugging:
        print(schema)
    schema(df)

Esempio n. 28

0

Mostra file

File: test_schemas_on_koalas.py Progetto: tfwillems/pandera

def test_dtype_coercion(from_dtype, to_dtype, data):
    """Test the datatype coercion provides informative errors."""
    from_schema = pa.DataFrameSchema({"field": pa.Column(from_dtype)})
    to_schema = pa.DataFrameSchema({"field": pa.Column(to_dtype, coerce=True)})

    pd_sample = data.draw(from_schema.strategy(size=3))
    sample = ks.DataFrame(pd_sample)
    if from_dtype is to_dtype:
        assert isinstance(to_schema(sample), ks.DataFrame)
        return

    # strings that can't be intepreted as numbers are converted to NA
    if from_dtype is str and to_dtype in {int, float}:
        with pytest.raises(pa.errors.SchemaError, match="non-nullable series"):
            to_schema(sample)
        return

    assert isinstance(to_schema(sample), ks.DataFrame)

Esempio n. 29

0

Mostra file

File: invoice.py Progetto: Thelin90/geomsagi

def create_invoice_schema(
    max_invoice_value: Decimal,
    min_invoice_value: Decimal,
    coerce: bool = True,
    strict: bool = True,
    nullable: bool = False,
):
    """Function to validate that invoice schema is correct, it also does value checks in runtime
    (really nice stuff, right here).

    Args:
        max_invoice_value (Decimal): Given max invoice value
        min_invoice_value (Decimal): Given min invoice value
        coerce (bool): Flag given to determine whether to coerce series to specified type
        strict (bool): Flag given to determine whether or not to accept columns in the
            dataframe that are not in the DataFrame
        nullable (bool): If columns should be nullable or not

    Returns: A pandas DataFrame schema that validates that the types are correct, and that the
    values inserted are correct. If a row is inserted that does not follow:

    0 < invoice_value < 200000000.00

    An error will be thrown in runtime.

    """
    return pa.DataFrameSchema(
        {
            INVOICE_COLUMN_NAMES.get("invoice_name"):
            pa.Column(pa.String, nullable=nullable),
            INVOICE_COLUMN_NAMES.get("invoice_value"):
            pa.Column(
                pa.Float64,
                checks=[
                    pa.Check.less_than_or_equal_to(max_invoice_value),
                    pa.Check.greater_than_or_equal_to(min_invoice_value),
                ],
                nullable=nullable,
            ),
        },
        index=pa.Index(pa.Int),
        strict=strict,
        coerce=coerce,
    )

Esempio n. 30

0

Mostra file

File: test_model.py Progetto: lkadin/pandera

def test_inherit_schemamodel_fields_alias():
    """Test that columns and index aliases are inherited."""
    class Base(pa.SchemaModel):
        a: Series[int]
        idx: Index[str]

    class Mid(Base):
        b: Series[str] = pa.Field(alias="_b")
        idx: Index[str]

    class ChildOverrideAttr(Mid):
        b: Series[int]

    class ChildOverrideAlias(Mid):
        b: Series[str] = pa.Field(alias="new_b")

    class ChildNewAttr(Mid):
        c: Series[int]

    class ChildEmpty(Mid):
        pass

    expected_mid = pa.DataFrameSchema(
        columns={
            "a": pa.Column(int),
            "_b": pa.Column(str)
        },
        index=pa.Index(str),
    )
    expected_child_override_attr = expected_mid.rename_columns({
        "_b": "b"
    }).update_column("b", pandas_dtype=int)
    expected_child_override_alias = expected_mid.rename_columns(
        {"_b": "new_b"})
    expected_child_new_attr = expected_mid.add_columns({
        "c": pa.Column(int),
    })

    assert expected_mid == Mid.to_schema()
    assert expected_child_override_attr == ChildOverrideAttr.to_schema()
    assert expected_child_override_alias == ChildOverrideAlias.to_schema()
    assert expected_child_new_attr == ChildNewAttr.to_schema()
    assert expected_mid == ChildEmpty.to_schema()