Ejemplo n.º 1
0
def test_disparate_impact():
    """Tests that the old and new disparate_impact matches exactly."""
    di = disparate_impact_ratio(y,
                                y_pred,
                                prot_attr='sex',
                                sample_weight=sample_weight)
    assert di == cm.disparate_impact()
Ejemplo n.º 2
0
def __binary_group_fairness_measures(X,
                                     prtc_attr,
                                     y_true,
                                     y_pred,
                                     y_prob=None,
                                     priv_grp=1):
    """[summary]

    Args:
        X (pandas DataFrame): Sample features
        prtc_attr (named array-like): values for the protected attribute
            (note: protected attribute may also be present in X)
        y_true (pandas DataFrame): Sample targets
        y_pred (pandas DataFrame): Sample target predictions
        y_prob (pandas DataFrame, optional): Sample target probabilities. Defaults
            to None.

    Returns:
        [type]: [description]
    """
    pa_names = prtc_attr.columns.tolist()
    gf_vals = {}
    gf_key = 'Group Fairness'
    gf_vals['Statistical Parity Difference'] = \
        aif_mtrc.statistical_parity_difference(y_true, y_pred, prot_attr=pa_names)
    gf_vals['Disparate Impact Ratio'] = \
        aif_mtrc.disparate_impact_ratio(y_true, y_pred, prot_attr=pa_names)
    if not helper.is_tutorial_running() and not len(pa_names) > 1:
        gf_vals['Demographic Parity Difference'] = \
            fl_mtrc.demographic_parity_difference(y_true, y_pred,
                                                  sensitive_features=prtc_attr)
        gf_vals['Demographic Parity Ratio'] = \
            fl_mtrc.demographic_parity_ratio(y_true, y_pred,
                                             sensitive_features=prtc_attr)
    gf_vals['Average Odds Difference'] = \
        aif_mtrc.average_odds_difference(y_true, y_pred, prot_attr=pa_names)
    gf_vals['Equal Opportunity Difference'] = \
        aif_mtrc.equal_opportunity_difference(y_true, y_pred, prot_attr=pa_names)
    if not helper.is_tutorial_running() and not len(pa_names) > 1:
        gf_vals['Equalized Odds Difference'] = \
            fl_mtrc.equalized_odds_difference(y_true, y_pred,
                                              sensitive_features=prtc_attr)
        gf_vals['Equalized Odds Ratio'] = \
            fl_mtrc.equalized_odds_ratio(y_true, y_pred,
                                         sensitive_features=prtc_attr)
    gf_vals['Positive Predictive Parity Difference'] = \
        aif_mtrc.difference(sk_metric.precision_score, y_true,
                            y_pred, prot_attr=pa_names, priv_group=priv_grp)
    gf_vals['Balanced Accuracy Difference'] = \
        aif_mtrc.difference(sk_metric.balanced_accuracy_score, y_true,
                            y_pred, prot_attr=pa_names, priv_group=priv_grp)
    if y_prob is not None:
        gf_vals['AUC Difference'] = \
            aif_mtrc.difference(sk_metric.roc_auc_score, y_true, y_prob,
                                prot_attr=pa_names, priv_group=priv_grp)
    return (gf_key, gf_vals)
Ejemplo n.º 3
0
def bias_table(Y, prot_attr=None, instance_type=None):
    groups = Y.index.unique(prot_attr)
    with np.errstate(divide='ignore', invalid='ignore'):
        pct = [Y.xs(g, level=prot_attr).shape[0]/Y.shape[0] for g in groups]
        data = [[np.divide(1, disparate_impact_ratio(Y[stage].dropna() == outcome, prot_attr=prot_attr, priv_group=g))
                 for stage in Y.columns for outcome in Y[stage].unique() if not pd.isna(outcome)]
                for g in groups]
    pct_name = 'proportion at first stage' if instance_type is None else f'proportion of {instance_type}'
    num_stages = len(data[0])
    col = pd.MultiIndex.from_tuples([(pct_name, '')]
            + list(zip(['disparate impact']*num_stages, [f'{stage} -> {outcome}' for stage in Y.columns for outcome in Y[stage].unique() if not pd.isna(outcome)])))
    table = pd.DataFrame(np.c_[pct, data], columns=col, index=groups).sort_index()
    table = filter_bias(table)
    def colorize(v):
        if v < 0.8:
            return 'color: red'
        elif v > 1.25:
            return 'color: blue'
        return ''
    return table.style.format('{:.3f}').format({(pct_name, ''): '{:.1%}'}
            ).bar(subset=pct_name, align='left', vmin=0, vmax=1, color='#5fba7d'
            ).applymap(colorize, subset='disparate impact')