def test_one_sample_hypothesis(): """Check one sample ttest.""" schema = DataFrameSchema({ "height_in_feet": Column( Float, [ Hypothesis.one_sample_ttest( popmean=5, relationship="greater_than", alpha=0.1), ] ), }) subset_schema = DataFrameSchema({ "group": Column(String), "height_in_feet": Column( Float, [ Hypothesis.one_sample_ttest( sample="A", groupby="group", popmean=5, relationship="greater_than", alpha=0.1), ] ), }) df = ( pd.DataFrame({ "height_in_feet": [8.1, 7, 6.5, 6.7, 5.1], "group": ["A", "A", "B", "B", "A"], }) ) schema.validate(df) subset_schema.validate(df)
percent_accidental = fatal_encounters_clean.disposition_accidental.mean() display(Markdown(f"{percent_accidental * 100:0.02f}%")) # %% [markdown] slideshow={"slide_type": "subslide"} # **Hypothesis**: "the `disposition_accidental` target has a # class balance of ~2.75%" # %% from pandera import Hypothesis # use the Column object as a stand-alone schema object target_schema = Column( pa.Bool, name="disposition_accidental", checks=Hypothesis.one_sample_ttest( popmean=0.0275, relationship="equal", alpha=0.01 ) ) target_schema(fatal_encounters_clean); # %% [markdown] slideshow={"slide_type": "skip"} # What's the age distribution in the dataset? # %% tags=["hide_input"] slideshow={"slide_type": "skip"} fatal_encounters_clean.age.plot.hist(figsize=(8, 5)).set_xlabel("age"); # %% [markdown] slideshow={"slide_type": "skip"} # **Hypothesis** check: "the `age` column is right-skewed" # %% slideshow={"slide_type": "skip"}