Esempio n. 1
0
def test_one_sample_hypothesis():
    """Check one sample ttest."""
    schema = DataFrameSchema({
        "height_in_feet": Column(
            Float, [
                Hypothesis.one_sample_ttest(
                    popmean=5,
                    relationship="greater_than",
                    alpha=0.1),
            ]
        ),
    })

    subset_schema = DataFrameSchema({
        "group": Column(String),
        "height_in_feet": Column(
            Float, [
                Hypothesis.one_sample_ttest(
                    sample="A",
                    groupby="group",
                    popmean=5,
                    relationship="greater_than",
                    alpha=0.1),
            ]
        ),
    })

    df = (
        pd.DataFrame({
            "height_in_feet": [8.1, 7, 6.5, 6.7, 5.1],
            "group": ["A", "A", "B", "B", "A"],
        })
    )
    schema.validate(df)
    subset_schema.validate(df)
Esempio n. 2
0
percent_accidental = fatal_encounters_clean.disposition_accidental.mean()
display(Markdown(f"{percent_accidental * 100:0.02f}%"))

# %% [markdown] slideshow={"slide_type": "subslide"}
# **Hypothesis**: "the `disposition_accidental` target has a
# class balance of ~2.75%"

# %%
from pandera import Hypothesis

# use the Column object as a stand-alone schema object
target_schema = Column(
    pa.Bool,
    name="disposition_accidental",
    checks=Hypothesis.one_sample_ttest(
        popmean=0.0275, relationship="equal", alpha=0.01
    )
)

target_schema(fatal_encounters_clean);

# %% [markdown] slideshow={"slide_type": "skip"}
# What's the age distribution in the dataset?

# %% tags=["hide_input"] slideshow={"slide_type": "skip"}
fatal_encounters_clean.age.plot.hist(figsize=(8, 5)).set_xlabel("age");

# %% [markdown] slideshow={"slide_type": "skip"}
# **Hypothesis** check: "the `age` column is right-skewed"

# %% slideshow={"slide_type": "skip"}