コード例 #1
0
def audit(df, configs, model_id=1, preprocessed=False):
    """

    :param df:
    :param ref_groups_method:
    :param model_id:
    :param configs:
    :param report:
    :param preprocessed:
    :return:
    """
    if not preprocessed:
        df, attr_cols_input = preprocess_input_df(df)
        if not configs.attr_cols:
            configs.attr_cols = attr_cols_input
    g = Group()
    print('Welcome to Aequitas-Audit')
    print('Fairness measures requested:',
          ','.join(configs.fair_measures_requested))
    groups_model, attr_cols = g.get_crosstabs(
        df,
        score_thresholds=configs.score_thresholds,
        model_id=model_id,
        attr_cols=configs.attr_cols)
    print('audit: df shape from the crosstabs:', groups_model.shape)
    b = Bias()
    # todo move this to the new configs object / the attr_cols now are passed through the configs object...
    ref_groups_method = configs.ref_groups_method
    if ref_groups_method == 'predefined' and configs.ref_groups:
        bias_df = b.get_disparity_predefined_groups(groups_model,
                                                    configs.ref_groups)
    elif ref_groups_method == 'majority':
        bias_df = b.get_disparity_major_group(groups_model)
    else:
        bias_df = b.get_disparity_min_metric(groups_model)
    print('Any NaN?: ', bias_df.isnull().values.any())
    print('bias_df shape:', bias_df.shape)
    f = Fairness(tau=configs.fairness_threshold)
    print('Fairness Threshold:', configs.fairness_threshold)
    print('Fairness Measures:', configs.fair_measures_requested)
    group_value_df = f.get_group_value_fairness(
        bias_df, fair_measures_requested=configs.fair_measures_requested)
    group_attribute_df = f.get_group_attribute_fairness(
        group_value_df,
        fair_measures_requested=configs.fair_measures_requested)
    fair_results = f.get_overall_fairness(group_attribute_df)
    print(fair_results)
    report = None
    if configs.report is True:
        report = audit_report_markdown(configs, group_value_df,
                                       f.fair_measures_depend, fair_results)
    return group_value_df, report
コード例 #2
0
    def get_model_fairness(self, level='model'):
        g = Group()
        xtab, _ = g.get_crosstabs(self.df)

        b = Bias()
        majority_bdf = b.get_disparity_major_group(xtab,
                                                   original_df=self.df,
                                                   mask_significance=True)

        f = Fairness()
        fdf = f.get_group_value_fairness(majority_bdf)
        f_res = fdf
        if level == 'model':
            f_res = f.get_overall_fairness(fdf)
        elif level == 'attribute':
            f_res = f.get_group_attribute_fairness(fdf)

        return f_res
コード例 #3
0
def tabla_medidas_equidad(data, attr_ref, tau=0.8):
    # Calculamos las métricas de grupo
    g = Group()
    xtab, _ = g.get_crosstabs(data)
    # Calculamos las metricas de sesgo
    b = Bias()
    # Establecemos los atributos de referencia
    bdf = b.get_disparity_predefined_groups(xtab,
                                            original_df=data,
                                            ref_groups_dict=attr_ref,
                                            alpha=0.05,
                                            mask_significance=True)
    # Definimos las medidas de equidad a partir de la tabla de metricas de sesgo
    f = Fairness()
    # Establecemos el valor del umbral con la variable tau
    fdf = f.get_group_value_fairness(bdf, tau=tau)
    # Tabla con si se cumplen las medidas de equidad para cada atributo
    tabla_equidad = f.get_group_attribute_fairness(fdf)
    return tabla_equidad
コード例 #4
0
ファイル: bias_fairness.py プロジェクト: santibatte/dpa_2021
def fairnessf(bdf, absolute_metrics, bias):
    """
     args:
         df (dataframe): Recibe el data frame que tiene los features sobre los que queremos medir la equidad.
     returns:
         -
    """
    fair = Fairness()
    fdf = fair.get_group_value_fairness(bdf)

    parity_determinations = fair.list_parities(fdf)
    fairness = fdf[['attribute_name', 'attribute_value'] + absolute_metrics +
                   bias.list_disparities(fdf) + parity_determinations].round(2)

    ## Storing metadata
    aq_metadata["v_group"] = str(fdf.loc[0, "attribute_value"])
    aq_metadata["FOR_p"] = str(fdf.loc[0, "FOR Parity"])
    aq_metadata["FNR_p"] = str(fdf.loc[0, "FNR Parity"])

    #return df_aeq
    gaf = fair.get_group_attribute_fairness(fdf)
    gof = fair.get_overall_fairness(fdf)

    return fairness, gaf, gof
コード例 #5
0
    disparity_significance = b.list_significance(bdf)
    # Mostramos la tabla de metricas de sesgo
    print(bdf[['attribute_name', 'attribute_value'] + calculated_disparities +
              disparity_significance])

    # Plots de disparidad
    #aqp.plot_disparity(bdf, group_metric='fpr_disparity', attribute_name='race', significance_alpha=0.05)
    #j = aqp.plot_disparity_all(bdf, metrics=['precision_disparity', 'fpr_disparity'], attributes=['age_cat'], significance_alpha=0.05)

    # Definimos las medidas de equidad a partir de la tabla de metricas de sesgo
    f = Fairness()
    # Establecemos el valor del umbral con la variable tau
    fdf = f.get_group_value_fairness(bdf, tau=0.8)
    #parity_detrminations = f.list_parities(fdf)
    # Tabla con si se cumplen las medidas de equidad para cada atributo
    gaf = f.get_group_attribute_fairness(fdf)
    #print(gaf['Equalized Odds'])

    # Metricas de grupo y de sesgo una vez aplicados los umbrales de equidad
    fg = aqp.plot_fairness_group_all(
        fdf, ncols=2, metrics=['ppr', 'pprev', 'fdr', 'fpr', 'for', 'fnr'])
    fg.savefig('./figures/LAW_DATA/disparity_group_law.png')
    m = aqp.plot_fairness_disparity_all(fdf,
                                        metrics=['for', 'fnr'],
                                        attributes=['race'])
    m.savefig('./figures/LAW_DATA/disparity_law_race.png')
    m = aqp.plot_fairness_disparity_all(fdf,
                                        metrics=['for', 'fnr'],
                                        attributes=['sex'])
    m.savefig('./figures/LAW_DATA/disparity_law_sex.png')
コード例 #6
0
def run_aequitas(predictions_data_path):
    '''
	Check for False negative rate, chances of certain group missing out on assistance using aequitas toolkit
	The functions transform the data to make it aequitas complaint and checks for series of bias and fairness metrics
	Input: model prediction path for the selected model (unzip the selected file to run)
	Output: plots saved in charts folder
	'''

    best_model_pred = pd.read_csv(predictions_data_path)

    # Transform data for aquetias module compliance
    aqc = [
        'Other', 'White', 'African American', 'Asian', 'Hispanic',
        'American Indian'
    ]
    aqcol = [
        'White alone_scale', 'Black/AfAmer alone_scale',
        'AmInd/Alaskn alone_scale', 'Asian alone_scale', 'HI alone_scale',
        'Some other race alone_scale', 'Hispanic or Latino_scale'
    ]
    display(aqcol)
    aqcol_label = [
        'no_renew_nextpd', 'pred_class_10%',
        'Median household income (1999 dollars)_scale'
    ] + aqcol
    aqus = best_model_pred[aqcol_label]
    print('Creating classes for racial and income distribution', '\n')

    # Convert to binary
    bin_var = [
        'no_renew_nextpd',
        'pred_class_10%',
    ]
    for var in bin_var:
        aqus[var] = np.where(aqus[var] == True, 1, 0)
    # Rename
    aqus.rename(columns={
        'no_renew_nextpd': 'label_value',
        'pred_class_10%': 'score'
    },
                inplace=True)

    print('Define majority rule defined on relative proportion of the class',
          '\n')
    aqus['race'] = aqus[aqcol].idxmax(axis=1)
    # Use quantile income distribution
    aqus['income'] = pd.qcut(
        aqus['Median household income (1999 dollars)_scale'],
        3,
        labels=["rich", "median", "poor"])

    # Final form
    aqus.drop(aqcol, axis=1, inplace=True)
    aqus.drop(['Median household income (1999 dollars)_scale'],
              axis=1,
              inplace=True)
    aq = aqus.reset_index()
    aq.rename(columns={'index': 'entity_id'}, inplace=True)
    aq['race'] = aq['race'].replace({
        'Some other race alone_scale':
        'Other',
        'White alone_scale':
        'White',
        'Black/AfAmer alone_scale':
        'African American',
        'Asian alone_scale':
        'Asian',
        'HI alone_scale':
        'Hispanic',
        'AmInd/Alaskn alone_scale':
        'American Indian'
    })

    # Consolidate types
    aq['income'] = aq['income'].astype(object)
    aq['entity_id'] = aq['entity_id'].astype(object)
    aq['score'] = aq['score'].astype(object)
    aq['label_value'] = aq['label_value'].astype(object)

    # Distribuion of categories
    aq_palette = sns.diverging_palette(225, 35, n=2)
    by_race = sns.countplot(x="race", data=aq[aq.race.isin(aqc)])
    by_race.set_xticklabels(by_race.get_xticklabels(), rotation=40, ha="right")
    plt.savefig('charts/Racial distribution in data.png')

    # Primary distribuion against score
    aq_palette = sns.diverging_palette(225, 35, n=2)
    by_race = sns.countplot(x="race",
                            hue="score",
                            data=aq[aq.race.isin(aqc)],
                            palette=aq_palette)
    by_race.set_xticklabels(by_race.get_xticklabels(), rotation=40, ha="right")
    # Race
    plt.savefig('charts/race_score.png')
    # Income
    by_inc = sns.countplot(x="income",
                           hue="score",
                           data=aq,
                           palette=aq_palette)
    plt.savefig('charts/income_score.png')

    # Set Group
    g = Group()
    xtab, _ = g.get_crosstabs(aq)

    # False Negative Rates
    aqp = Plot()
    fnr = aqp.plot_group_metric(xtab, 'fnr', min_group_size=0.05)
    p = aqp.plot_group_metric_all(xtab,
                                  metrics=['ppr', 'pprev', 'fnr', 'fpr'],
                                  ncols=4)
    p.savefig('charts/eth_metrics.png')

    # Bias with respect to white rich category
    b = Bias()
    bdf = b.get_disparity_predefined_groups(xtab,
                                            original_df=aq,
                                            ref_groups_dict={
                                                'race': 'White',
                                                'income': 'rich'
                                            },
                                            alpha=0.05,
                                            mask_significance=True)
    bdf.style
    calculated_disparities = b.list_disparities(bdf)
    disparity_significance = b.list_significance(bdf)
    aqp.plot_disparity(bdf,
                       group_metric='fpr_disparity',
                       attribute_name='race',
                       significance_alpha=0.05)
    plt.savefig('charts/disparity.png')

    # Fairness
    hbdf = b.get_disparity_predefined_groups(xtab,
                                             original_df=aq,
                                             ref_groups_dict={
                                                 'race': 'African American',
                                                 'income': 'poor'
                                             },
                                             alpha=0.05,
                                             mask_significance=False)
    majority_bdf = b.get_disparity_major_group(xtab,
                                               original_df=aq,
                                               mask_significance=True)
    min_metric_bdf = b.get_disparity_min_metric(df=xtab, original_df=aq)
    f = Fairness()
    fdf = f.get_group_value_fairness(bdf)
    parity_detrminations = f.list_parities(fdf)
    gaf = f.get_group_attribute_fairness(fdf)
    gof = f.get_overall_fairness(fdf)
    z = aqp.plot_fairness_group(fdf, group_metric='ppr')
    plt.savefig('charts/fairness_overall.png')
    # Checking for False Omission Rate and False Negative Rates
    fg = aqp.plot_fairness_group_all(fdf, metrics=['for', 'fnr'], ncols=2)
    fg.savefig('charts/fairness_metrics.png')

    return None
コード例 #7
0
def fun_bias_fair(a_zip, a_type, fea_eng, model):

    X = fea_eng.drop([
        'aka_name', 'facility_type', 'address', 'inspection_date',
        'inspection_type', 'violations', 'results', 'pass'
    ],
                     axis=1)
    y_pred = model.predict(X)

    xt = pd.DataFrame([
        fea_eng['zip'].astype(float), fea_eng['facility_type'],
        fea_eng['pass'], y_pred
    ]).transpose()
    a_zip['zip'] = a_zip['zip'].astype(float)
    compas = pd.merge(left=xt,
                      right=a_zip,
                      how='left',
                      left_on='zip',
                      right_on='zip')
    compas = pd.merge(left=compas,
                      right=a_type,
                      how='left',
                      left_on='facility_type',
                      right_on='facility_type')
    compas = compas.rename(columns={
        'Unnamed 0': 'score',
        'pass': '******'
    })

    compas.pop('zip')
    compas.pop('facility_type')

    compas['zone'] = compas['zone'].astype(str)
    compas['score'] = compas['score'].astype(int)
    compas['label_value'] = compas['label_value'].astype(int)

    from aequitas.group import Group
    from aequitas.bias import Bias
    from aequitas.fairness import Fairness

    #Group
    g = Group()
    xtab, attrbs = g.get_crosstabs(compas)
    absolute_metrics = g.list_absolute_metrics(xtab)
    xtab[[col for col in xtab.columns if col not in absolute_metrics]]
    group_df = xtab[['attribute_name', 'attribute_value'] +
                    [col for col in xtab.columns
                     if col in absolute_metrics]].round(4)
    abs_gpo = xtab[['attribute_name', 'attribute_value'] +
                   [col for col in xtab.columns
                    if col in absolute_metrics]].round(4)

    #Bias
    bias = Bias()
    bdf = bias.get_disparity_predefined_groups(xtab,
                                               original_df=compas,
                                               ref_groups_dict={
                                                   'zone': 'West',
                                                   'facility_group': 'grocery'
                                               },
                                               alpha=0.05)
    # View disparity metrics added to dataframe
    bias_bdf = bdf[['attribute_name', 'attribute_value'] +
                   bias.list_disparities(bdf)].round(2)
    majority_bdf = bias.get_disparity_major_group(xtab, original_df=compas)
    bias_maj_bdf = majority_bdf[['attribute_name', 'attribute_value'] +
                                bias.list_disparities(majority_bdf)].round(2)
    min_bdf = bias.get_disparity_min_metric(xtab, original_df=compas)
    bias_min_bdf = min_bdf[['attribute_name', 'attribute_value'] +
                           bias.list_disparities(min_bdf)].round(2)
    min_bdf[['attribute_name', 'attribute_value'] +
            bias.list_disparities(min_bdf)].round(2)

    #Fairness
    fair = Fairness()
    fdf = fair.get_group_value_fairness(bdf)
    parity_determinations = fair.list_parities(fdf)
    fair_fdf = fdf[['attribute_name', 'attribute_value'] + absolute_metrics +
                   bias.list_disparities(fdf) + parity_determinations].round(2)
    gaf = fair.get_group_attribute_fairness(fdf)
    fairness_df = fdf.copy()
    gof = fair.get_overall_fairness(fdf)

    tab_bias_fair = fair_fdf[[
        'attribute_name', 'attribute_value', 'for', 'fnr', 'for_disparity',
        'fnr_disparity', 'FOR Parity', 'FNR Parity'
    ]]
    tab_bias_fair.rename(columns={
        'attribute_value': 'group_name',
        'FOR Parity': 'for_parity',
        'FNR Parity': 'fnr_parity',
        'for': 'for_'
    },
                         inplace=True)

    print(tab_bias_fair)

    return tab_bias_fair
コード例 #8
0
def audit(df, configs, preprocessed=False):
    """

    :param df:
    :param configs:
    :param preprocessed:
    :return:
    """
    if not preprocessed:
        df, attr_cols_input = preprocess_input_df(df)
        if not configs.attr_cols:
            configs.attr_cols = attr_cols_input
    g = Group()
    print('Welcome to Aequitas-Audit')
    print('Fairness measures requested:',
          ','.join(configs.fair_measures_requested))
    groups_model, attr_cols = g.get_crosstabs(
        df,
        score_thresholds=configs.score_thresholds,
        attr_cols=configs.attr_cols)
    print('audit: df shape from the crosstabs:', groups_model.shape)
    b = Bias()

    # todo move this to the new configs object / the attr_cols now are passed through the configs object...
    ref_groups_method = configs.ref_groups_method
    if ref_groups_method == 'predefined' and configs.ref_groups:
        bias_df = b.get_disparity_predefined_groups(
            groups_model,
            df,
            configs.ref_groups,
            check_significance=configs.check_significance,
            alpha=configs.alpha,
            selected_significance=configs.selected_significance,
            mask_significance=configs.mask_significance)
    elif ref_groups_method == 'majority':
        bias_df = b.get_disparity_major_group(
            groups_model,
            df,
            check_significance=configs.check_significance,
            alpha=configs.alpha,
            selected_significance=configs.selected_significance,
            mask_significance=configs.mask_significance)
    else:
        bias_df = b.get_disparity_min_metric(
            df=groups_model,
            original_df=df,
            check_significance=configs.check_significance,
            alpha=configs.alpha,
            label_score_ref='fpr',
            selected_significance=configs.selected_significance,
            mask_significance=configs.mask_significance)

    print('Any NaN?: ', bias_df.isnull().values.any())
    print('bias_df shape:', bias_df.shape)

    aqp = Plot()

    if configs.plot_bias_metrics:
        if len(configs.plot_bias_metrics) == 1:
            fig1 = aqp.plot_group_metric(
                bias_df, group_metric=configs.plot_bias_metrics[0])

        elif len(configs.plot_bias_metrics) > 1:
            fig1 = aqp.plot_group_metric_all(
                bias_df, metrics=configs.plot_disparity_attributes)

        if (len(configs.plot_bias_disparities) == 1) and (len(
                configs.plot_disparity_attributes) == 1):
            fig2 = aqp.plot_disparity(
                bias_df,
                group_metric=configs.plot_bias_disparities[0],
                attribute_name=configs.plot_disparity_attributes[0])

        elif (len(configs.plot_bias_disparities) > 1) or (len(
                configs.plot_disparity_attributes) > 1):
            fig2 = aqp.plot_disparity_all(
                bias_df,
                metrics=configs.plot_bias_disparities,
                attributes=configs.plot_disparity_attributes)

    f = Fairness(tau=configs.fairness_threshold)
    print('Fairness Threshold:', configs.fairness_threshold)
    print('Fairness Measures:', configs.fair_measures_requested)
    group_value_df = f.get_group_value_fairness(
        bias_df, fair_measures_requested=configs.fair_measures_requested)
    group_attribute_df = f.get_group_attribute_fairness(
        group_value_df,
        fair_measures_requested=configs.fair_measures_requested)
    fair_results = f.get_overall_fairness(group_attribute_df)

    if configs.plot_bias_metrics:
        if len(configs.plot_bias_metrics) == 1:
            fig3 = aqp.plot_fairness_group(
                group_value_df, group_metric=configs.plot_bias_metrics[0])
        elif len(configs.plot_bias_metrics) > 1:
            fig3 = aqp.plot_fairness_group_all(
                group_value_df, metrics=configs.plot_bias_metrics)

        if (len(configs.plot_bias_disparities) == 1) and (len(
                configs.plot_disparity_attributes) == 1):
            fig4 = aqp.plot_fairness_disparity(
                group_value_df,
                group_metric=configs.plot_bias_disparities[0],
                attribute_name=configs.plot_disparity_attributes[0])
        elif (len(configs.plot_bias_disparities) > 1) or (len(
                configs.plot_disparity_attributes) > 1):
            fig4 = aqp.plot_fairness_disparity_all(
                group_value_df,
                metrics=configs.plot_bias_disparities,
                attributes=configs.plot_disparity_attributes)

    print(fair_results)
    report = None
    if configs.report is True:
        report = audit_report_markdown(configs, group_value_df,
                                       f.fair_measures_depend, fair_results)
    return group_value_df, report