예제 #1
0
def audit(df, configs, model_id=1, preprocessed=False):
    """

    :param df:
    :param ref_groups_method:
    :param model_id:
    :param configs:
    :param report:
    :param preprocessed:
    :return:
    """
    if not preprocessed:
        df, attr_cols_input = preprocess_input_df(df)
        if not configs.attr_cols:
            configs.attr_cols = attr_cols_input
    g = Group()
    print('Welcome to Aequitas-Audit')
    print('Fairness measures requested:',
          ','.join(configs.fair_measures_requested))
    groups_model, attr_cols = g.get_crosstabs(
        df,
        score_thresholds=configs.score_thresholds,
        model_id=model_id,
        attr_cols=configs.attr_cols)
    print('audit: df shape from the crosstabs:', groups_model.shape)
    b = Bias()
    # todo move this to the new configs object / the attr_cols now are passed through the configs object...
    ref_groups_method = configs.ref_groups_method
    if ref_groups_method == 'predefined' and configs.ref_groups:
        bias_df = b.get_disparity_predefined_groups(groups_model, df,
                                                    configs.ref_groups)
    elif ref_groups_method == 'majority':
        bias_df = b.get_disparity_major_group(groups_model, df)
    else:
        bias_df = b.get_disparity_min_metric(groups_model, df)
    print('Any NaN?: ', bias_df.isnull().values.any())
    print('bias_df shape:', bias_df.shape)

    aqp = Plot()

    if len(configs.plot_bias_metrics) == 1:
        fig1 = aqp.plot_disparity(bias_df, metrics=configs.plot_bias_metrics)
    elif len(configs.plot_bias_metrics) > 1:
        fig1 = aqp.plot_disparity_all(bias_df,
                                      metrics=configs.plot_bias_metrics)
    if len(configs.plot_bias_disparities) == 1:
        fig2 = aqp.plot_group_metric(bias_df,
                                     metrics=configs.plot_bias_disparities)
    elif len(configs.plot_bias_disparities) > 1:
        fig2 = aqp.plot_group_metric_all(bias_df,
                                         metrics=configs.plot_bias_disparities)

    f = Fairness(tau=configs.fairness_threshold)
    print('Fairness Threshold:', configs.fairness_threshold)
    print('Fairness Measures:', configs.fair_measures_requested)
    group_value_df = f.get_group_value_fairness(
        bias_df, fair_measures_requested=configs.fair_measures_requested)
    group_attribute_df = f.get_group_attribute_fairness(
        group_value_df,
        fair_measures_requested=configs.fair_measures_requested)
    fair_results = f.get_overall_fairness(group_attribute_df)

    if len(configs.plot_bias_metrics) == 1:
        fig3 = aqp.plot_fairness_group(group_value_df,
                                       metrics=configs.plot_bias_metrics)
    elif len(configs.plot_bias_metrics) > 1:
        fig3 = aqp.plot_fairness_group_all(group_value_df,
                                           metrics=configs.plot_bias_metrics)

    if len(configs.plot_bias_disparities) == 1:
        fig4 = aqp.plot_fairness_disparity(
            group_value_df, metrics=configs.plot_bias_disparities)
    elif len(configs.plot_bias_metrics) > 1:
        fig4 = aqp.plot_fairness_disparity_all(
            group_value_df, metrics=configs.plot_bias_disparities)

    print(fair_results)
    report = None
    if configs.report is True:
        report = audit_report_markdown(configs, group_value_df,
                                       f.fair_measures_depend, fair_results)
    return group_value_df, report
예제 #2
0
hbdf = b.get_disparity_predefined_groups(xtab,
                                         original_df=df,
                                         ref_groups_dict={
                                             'race': 'WHITE',
                                             'gender': 'MALE'
                                         },
                                         alpha=0.5,
                                         mask_significance=False)

majority_bdf = b.get_disparity_major_group(xtab,
                                           original_df=df,
                                           mask_significance=False)
majority_bdf[['attribute_name', 'attribute_value'] + calculated_disparities +
             disparity_significance]

tm_capped = aqp.plot_disparity_all(hbdf,
                                   attributes=['gender', 'race'],
                                   metrics='all',
                                   significance_alpha=0.05)

f = Fairness()
fdf = f.get_group_value_fairness(bdf)
parity_detrminations = f.list_parities(fdf)
fdf[['attribute_name', 'attribute_value'] + absolute_metrics +
    calculated_disparities + parity_detrminations].style
fg = aqp.plot_fairness_group_all(fdf, ncols=5, metrics="all")
n_tm = aqp.plot_fairness_disparity_all(fdf,
                                       attributes=['race', 'gender'],
                                       significance_alpha=0.05)