Ejemplo n.º 1
0
def odds_diff(random_data, predicted_data, target_variable, protected_variable, unprivileged_input):
    random_data['Pred'] = np.random.binomial(1, .5, 1000)
    dataset = BinaryLabelDataset(df=random_data, label_names=[target_variable], protected_attribute_names=[protected_variable])
    classified_dataset = BinaryLabelDataset(df=predicted_data, label_names=[target_variable], protected_attribute_names=[protected_variable])
    privileged_group = []
    for v in predicted_data[protected_variable].unique()[predicted_data[protected_variable].unique() != unprivileged_input]:
        privileged_group.append({protected_variable: v})
    unprivileged_group = [{protected_variable: unprivileged_input}] #female=0
    metric = ClassificationMetric(dataset, classified_dataset, unprivileged_group, privileged_group)
    print(metric.average_abs_odds_difference())
    if abs(metric.average_abs_odds_difference().round(3)) < 0.2:
        print('The algorithm can be considered to be not biased')
    else:
        print('There is a potential bias')
Ejemplo n.º 2
0
def calculate_bias_measures(data_orig_train, data_orig_vt, unprivileged_groups,
                            privileged_groups):
    model = RandomForestClassifier().fit(
        data_orig_train.features,
        data_orig_train.labels.ravel(),
        sample_weight=data_orig_train.instance_weights)
    dataset = data_orig_vt
    dataset_pred = dataset.copy()
    dataset_pred.labels = model.predict(data_orig_vt.features)
    classified_metric_race = ClassificationMetric(
        dataset,
        dataset_pred,
        unprivileged_groups=unprivileged_groups,
        privileged_groups=privileged_groups)
    metric_pred_race = BinaryLabelDatasetMetric(
        dataset_pred,
        unprivileged_groups=unprivileged_groups,
        privileged_groups=privileged_groups)
    print("Mean difference {}".format(metric_pred_race.mean_difference()))
    print("Disparate Metric {}".format(metric_pred_race.disparate_impact()))
    print("Equal Opportunity Difference {}".format(
        classified_metric_race.equal_opportunity_difference()))
    print("Average Abs Odds Difference {}".format(
        classified_metric_race.average_abs_odds_difference()))
    print("Theil index {}".format(classified_metric_race.theil_index()))
Ejemplo n.º 3
0
def fair_metrics(dataset, pred, pred_is_dataset=False):
    if pred_is_dataset:
        dataset_pred = pred
    else:
        dataset_pred = dataset.copy()
        dataset_pred.labels = pred

    cols = [
        'statistical_parity_difference', 'equal_opportunity_difference',
        'average_abs_odds_difference', 'disparate_impact', 'theil_index'
    ]
    obj_fairness = [[0, 0, 0, 1, 0]]

    fair_metrics = pd.DataFrame(data=obj_fairness,
                                index=['objective'],
                                columns=cols)

    for attr in dataset_pred.protected_attribute_names:
        idx = dataset_pred.protected_attribute_names.index(attr)
        privileged_groups = [{
            attr:
            dataset_pred.privileged_protected_attributes[idx][0]
        }]
        unprivileged_groups = [{
            attr:
            dataset_pred.unprivileged_protected_attributes[idx][0]
        }]

        classified_metric = ClassificationMetric(
            dataset,
            dataset_pred,
            unprivileged_groups=unprivileged_groups,
            privileged_groups=privileged_groups)

        metric_pred = BinaryLabelDatasetMetric(
            dataset_pred,
            unprivileged_groups=unprivileged_groups,
            privileged_groups=privileged_groups)

        acc = classified_metric.accuracy()

        row = pd.DataFrame([[
            metric_pred.mean_difference(),
            classified_metric.equal_opportunity_difference(),
            classified_metric.average_abs_odds_difference(),
            metric_pred.disparate_impact(),
            classified_metric.theil_index()
        ]],
                           columns=cols,
                           index=[attr])
        fair_metrics = fair_metrics.append(row)

    fair_metrics = fair_metrics.replace([-np.inf, np.inf], 2)

    return fair_metrics
Ejemplo n.º 4
0
def test_eqodds():
    eqo = EqOddsPostprocessing(unprivileged_groups=[{
        'sex': 0
    }],
                               privileged_groups=[{
                                   'sex': 1
                               }],
                               seed=1234567)
    pred_eqo = eqo.fit(val, val_pred).predict(pred)
    cm_eqo = ClassificationMetric(test,
                                  pred_eqo,
                                  unprivileged_groups=[{
                                      'sex': 0
                                  }],
                                  privileged_groups=[{
                                      'sex': 1
                                  }])
    # accuracy drop should be less than 10% (arbitrary)
    assert (cm_lr.accuracy() - cm_eqo.accuracy()) / cm_lr.accuracy() < 0.1
    # approximately equal odds
    assert cm_eqo.average_abs_odds_difference() < 0.1
Ejemplo n.º 5
0
def get_fair_metrics(dataset, pred, pred_is_dataset=False):
    """
    Measure fairness metrics.
    
    Parameters: 
    dataset (pandas dataframe): Dataset
    pred (array): Model predictions
    pred_is_dataset, optional (bool): True if prediction is already part of the dataset, column name 'labels'.
    
    Returns:
    fair_metrics: Fairness metrics.
    """
    if pred_is_dataset:
        dataset_pred = pred
    else:
        dataset_pred = dataset.copy()
        dataset_pred.labels = pred

    cols = [
        'statistical_parity_difference', 'equal_opportunity_difference',
        'average_abs_odds_difference', 'disparate_impact', 'theil_index'
    ]
    obj_fairness = [[0, 0, 0, 1, 0]]

    fair_metrics = pd.DataFrame(data=obj_fairness,
                                index=['objective'],
                                columns=cols)

    for attr in dataset_pred.protected_attribute_names:
        idx = dataset_pred.protected_attribute_names.index(attr)
        privileged_groups = [{
            attr:
            dataset_pred.privileged_protected_attributes[idx][0]
        }]
        unprivileged_groups = [{
            attr:
            dataset_pred.unprivileged_protected_attributes[idx][0]
        }]

        classified_metric = ClassificationMetric(
            dataset,
            dataset_pred,
            unprivileged_groups=unprivileged_groups,
            privileged_groups=privileged_groups)

        metric_pred = BinaryLabelDatasetMetric(
            dataset_pred,
            unprivileged_groups=unprivileged_groups,
            privileged_groups=privileged_groups)

        acc = classified_metric.accuracy()

        row = pd.DataFrame([[
            metric_pred.mean_difference(),
            classified_metric.equal_opportunity_difference(),
            classified_metric.average_abs_odds_difference(),
            metric_pred.disparate_impact(),
            classified_metric.theil_index()
        ]],
                           columns=cols,
                           index=[attr])
        fair_metrics = fair_metrics.append(row)

    fair_metrics = fair_metrics.replace([-np.inf, np.inf], 2)

    return fair_metrics
Ejemplo n.º 6
0
# =============================================================================
# FAIRNESS METRICS (FOR REAL THIS TIME)
# =============================================================================
# TODO: (1) Redo the previous cell for gender bias and recompute the corresponding
# fairness metrics
# (2)collect these values in a table
# (3) think about a way to visualize these values
# Statistical Parity difference (SPD)
spd_pre_race = fairness_metrics.statistical_parity_difference()

# Disparate Impact Ratio
dir_pre_race = fairness_metrics.disparate_impact()

# Average Odds Difference and Average absolute odds difference
aod_pre_race = fairness_metrics.average_odds_difference()
aaod_pre_race = fairness_metrics.average_abs_odds_difference()

# Equal Opportunity Difference aka true positive rate difference
eod_pre_race = fairness_metrics.equal_opportunity_difference()

# Generealized entropy index with various alpha's
fairness_metrics.between_all_groups_generalized_entropy_index(alpha=2)

ClassificationMetric(dataset=bld_true,
                     classified_dataset=bld_pred,
                     unprivileged_groups=None,
                     privileged_groups=None).false_positive_rate()

df_fm.head()

# TO DELETE