Ejemplo n.º 1
0
def plot_roc_auc(y_true, y_pred, label, ax=None):
    fpr, tpr, _ = roc_curve(y_true, y_pred)
    roc_curve_df = pd.DataFrame({"fpr": fpr, "tpr": tpr}).pipe(
        pa.DataFrameSchema({
            "fpr": Column(pa.Float, Check.in_range(0, 1)),
            "tpr": Column(pa.Float, Check.in_range(0, 1)),
        })
    )
    return roc_curve_df.plot.line(x="fpr", y="tpr", label=label, ax=ax)
Ejemplo n.º 2
0
    "european_american_white", "hispanic_latino", "middle_eastern",
    "native_american_alaskan", "race_unspecified",
]
causes_of_death = [
    'asphyxiated_restrained', 'beaten_bludgeoned_with_instrument',
    'burned_smoke_inhalation', 'chemical_agent_pepper_spray',
    'drowned', 'drug_overdose', 'fell_from_a_height', 'gunshot',
    'medical_emergency', 'other', 'stabbed', 'tasered', 'undetermined',
    'unknown', 'vehicle'
]

# %%
training_data_schema = pa.DataFrameSchema(
    {
        # feature columns
        "age": Column(pa.Float, Check.in_range(0, 120), nullable=True),
        "gender": Column(pa.String, Check.isin(genders), nullable=True),
        "race": Column(pa.String, Check.isin(races), nullable=True),
        "cause_of_death": Column(pa.String, Check.isin(causes_of_death), nullable=True),
        "symptoms_of_mental_illness": Column(pa.Bool, nullable=True),
        # target column
        "disposition_accidental": Column(pa.Bool, nullable=False),
    },
    coerce=True  # <- coerce columns to the specified type
)

# %% [markdown] slideshow={"slide_type": "subslide"}
# #### Serialize schema to yaml format:

# %%
print(training_data_schema.to_yaml())