Ejemplo n.º 1
0
def test_between_group():
    data = np.array([[0, 0, 1], [0, 1, 0], [1, 1, 0], [1, 1, 1], [1, 0, 0],
                     [1, 0, 0]])
    pred = data.copy()
    pred[[0, 3], -1] = 0
    pred[[4, 5], -1] = 1
    df = pd.DataFrame(data, columns=['feat', 'feat2', 'label'])
    df2 = pd.DataFrame(pred, columns=['feat', 'feat2', 'label'])
    bld = BinaryLabelDataset(df=df,
                             label_names=['label'],
                             protected_attribute_names=['feat', 'feat2'])
    bld2 = BinaryLabelDataset(df=df2,
                              label_names=['label'],
                              protected_attribute_names=['feat', 'feat2'])
    cm = ClassificationMetric(bld,
                              bld2,
                              unprivileged_groups=[{
                                  'feat': 0
                              }],
                              privileged_groups=[{
                                  'feat': 1
                              }])

    b = np.array([0.5, 0.5, 1.25, 1.25, 1.25, 1.25])
    assert cm.between_group_generalized_entropy_index(
    ) == 1 / 12 * np.sum(b**2 - 1)
def show_classifier_metrics(test_list, prediction_list):
    privileged_groups = [{'sex': 1}]
    unprivileged_groups = [{'sex': 0}]

    counter = 1
    for test_, pred_ in zip(test_list, prediction_list):

        display(Markdown("#### Model {}  dataset metrics".format(counter)))

        model_metric = ClassificationMetric(
            test_,
            pred_,
            unprivileged_groups=unprivileged_groups,
            privileged_groups=privileged_groups)

        ex_model_metric = MetricTextExplainer(model_metric)
        print(ex_model_metric.average_odds_difference())

        print(
            'Difference in Recall between Unprivileged and Privileged: {:.3f}'.
            format(model_metric.equal_opportunity_difference()))

        print(
            'Difference in Precision between Unprivileged and Privileged: {:.3f}.'
            .format(
                model_metric.precision(privileged=False) -
                model_metric.precision(privileged=True)))
        counter += 1
Ejemplo n.º 3
0
def test(dataset, model, thresh_arr, privileged_groups, unprivileged_groups):
    try:
        # sklearn classifier
        y_val_pred_prob = model.predict_proba(dataset.features)
        pos_ind = np.where(model.classes_ == dataset.favorable_label)[0][0]
    except AttributeError:
        # aif360 inprocessing algorithm
        y_val_pred_prob = model.predict(dataset).scores
        pos_ind = 0

    metric_arrs = defaultdict(list)
    for thresh in thresh_arr:
        y_val_pred = (y_val_pred_prob[:, pos_ind] > thresh).astype(np.float64)

        dataset_pred = dataset.copy()
        dataset_pred.labels = y_val_pred
        metric = ClassificationMetric(dataset,
                                      dataset_pred,
                                      unprivileged_groups=unprivileged_groups,
                                      privileged_groups=privileged_groups)

        metric_arrs['bal_acc'].append(
            (metric.true_positive_rate() + metric.true_negative_rate()) / 2)

    return dataset_pred, metric_arrs
Ejemplo n.º 4
0
def calculate_bias_measures(data_orig_train, data_orig_vt, unprivileged_groups,
                            privileged_groups):
    model = RandomForestClassifier().fit(
        data_orig_train.features,
        data_orig_train.labels.ravel(),
        sample_weight=data_orig_train.instance_weights)
    dataset = data_orig_vt
    dataset_pred = dataset.copy()
    dataset_pred.labels = model.predict(data_orig_vt.features)
    classified_metric_race = ClassificationMetric(
        dataset,
        dataset_pred,
        unprivileged_groups=unprivileged_groups,
        privileged_groups=privileged_groups)
    metric_pred_race = BinaryLabelDatasetMetric(
        dataset_pred,
        unprivileged_groups=unprivileged_groups,
        privileged_groups=privileged_groups)
    print("Mean difference {}".format(metric_pred_race.mean_difference()))
    print("Disparate Metric {}".format(metric_pred_race.disparate_impact()))
    print("Equal Opportunity Difference {}".format(
        classified_metric_race.equal_opportunity_difference()))
    print("Average Abs Odds Difference {}".format(
        classified_metric_race.average_abs_odds_difference()))
    print("Theil index {}".format(classified_metric_race.theil_index()))
Ejemplo n.º 5
0
def compute_metrics(dataset_true,
                    dataset_pred,
                    unprivileged_groups,
                    privileged_groups,
                    disp=True):
    """ Compute the key metrics """
    classified_metric_pred = ClassificationMetric(
        dataset_true,
        dataset_pred,
        unprivileged_groups=unprivileged_groups,
        privileged_groups=privileged_groups)
    metrics = OrderedDict()
    metrics["Balanced accuracy"] = \
        0.5*(classified_metric_pred.true_positive_rate()+
            classified_metric_pred.true_negative_rate())
    metrics["Statistical parity difference"] = \
        classified_metric_pred.statistical_parity_difference()
    metrics["Mean difference"] = \
        classified_metric_pred.statistical_parity_difference()
    metrics["Disparate impact"] = \
        classified_metric_pred.disparate_impact()
    metrics["Average odds difference"] = \
        classified_metric_pred.average_odds_difference()
    metrics["Equal opportunity difference"] = \
        classified_metric_pred.equal_opportunity_difference()
    metrics["Theil index"] = classified_metric_pred.theil_index()

    if disp:
        for k in metrics:
            print("%s = %.4f" % (k, metrics[k]))

    return metrics
Ejemplo n.º 6
0
def equal_ops_values(random_data, predicted_data, target_variable, protected_variable, unprivileged_input):
    random_data['Pred'] = np.random.binomial(1, .5, 1000)
    dataset = BinaryLabelDataset(df=random_data, label_names=[target_variable], protected_attribute_names=[protected_variable])
    classified_dataset = BinaryLabelDataset(df=predicted_data, label_names=[target_variable], protected_attribute_names=[protected_variable])
    privileged_group = []
    for v in predicted_data[protected_variable].unique()[predicted_data[protected_variable].unique() != unprivileged_input]:
        privileged_group.append({protected_variable: v})
    unprivileged_group = [{protected_variable: unprivileged_input}] #female=0
    metric = ClassificationMetric(dataset, classified_dataset, unprivileged_group, privileged_group)
    return abs(metric.equal_opportunity_difference())
def get_classifier_metrics(test_list, prediction_list):
    privileged_groups = [{'sex': 1}]
    unprivileged_groups = [{'sex': 0}]
    acc_list = []
    bal_acc_list = []
    avg_odds_list = []
    recall_diff_list = []
    precision_diff_list = []
    for test_, pred_ in zip(test_list, prediction_list):
        model_metric = ClassificationMetric(
            test_,
            pred_,
            unprivileged_groups=unprivileged_groups,
            privileged_groups=privileged_groups)

        acc_list.append(model_metric.accuracy().round(3))
        bal_acc_list.append(((model_metric.true_positive_rate() +
                              model_metric.true_negative_rate()) / 2).round(3))
        avg_odds_list.append(model_metric.average_odds_difference().round(3))
        recall_diff_list.append(
            model_metric.equal_opportunity_difference().round(3))
        precision_diff_list.append(
            (model_metric.precision(privileged=False) -
             model_metric.precision(privileged=True)).round(3))
    return acc_list, bal_acc_list, avg_odds_list, recall_diff_list, precision_diff_list
Ejemplo n.º 8
0
def nondebiased_classifier(train, test, privileged_groups,
                           unprivileged_groups):
    sess = tf.Session()
    NN_model = AdversarialDebiasing(privileged_groups,
                                    unprivileged_groups,
                                    scope_name='nondebiased_classifier',
                                    debias=False,
                                    sess=sess)
    NN_model.fit(train)

    # predict outcome using the test set
    pred_NNmodel = NN_model.predict(test)
    sess.close()
    tf.reset_default_graph()

    # calculate accuracy
    accuracy = accuracy_score(y_true=test.labels, y_pred=pred_NNmodel.labels)

    # calculate fairness metrics
    metric_test = BinaryLabelDatasetMetric(
        pred_NNmodel,
        unprivileged_groups=unprivileged_groups,
        privileged_groups=privileged_groups)
    acc_test = ClassificationMetric(test,
                                    pred_NNmodel,
                                    unprivileged_groups=unprivileged_groups,
                                    privileged_groups=privileged_groups)
    equal_opportunity_difference = equal_opp_diff(test,
                                                  pred_NNmodel,
                                                  'sex',
                                                  privileged=1,
                                                  unprivileged=0,
                                                  favourable=1,
                                                  unfavourable=0)
    average_odds_difference = avg_odds_diff(test,
                                            pred_NNmodel,
                                            'sex',
                                            privileged=1,
                                            unprivileged=0,
                                            favourable=1,
                                            unfavourable=0)

    metrics = [
        metric_test.mean_difference(),
        acc_test.disparate_impact(), equal_opportunity_difference,
        average_odds_difference,
        acc_test.theil_index()
    ]

    return pred_NNmodel, accuracy, metrics
Ejemplo n.º 9
0
def odds_diff(random_data, predicted_data, target_variable, protected_variable, unprivileged_input):
    random_data['Pred'] = np.random.binomial(1, .5, 1000)
    dataset = BinaryLabelDataset(df=random_data, label_names=[target_variable], protected_attribute_names=[protected_variable])
    classified_dataset = BinaryLabelDataset(df=predicted_data, label_names=[target_variable], protected_attribute_names=[protected_variable])
    privileged_group = []
    for v in predicted_data[protected_variable].unique()[predicted_data[protected_variable].unique() != unprivileged_input]:
        privileged_group.append({protected_variable: v})
    unprivileged_group = [{protected_variable: unprivileged_input}] #female=0
    metric = ClassificationMetric(dataset, classified_dataset, unprivileged_group, privileged_group)
    print(metric.average_abs_odds_difference())
    if abs(metric.average_abs_odds_difference().round(3)) < 0.2:
        print('The algorithm can be considered to be not biased')
    else:
        print('There is a potential bias')
Ejemplo n.º 10
0
def reject_option(dataset_orig_valid, dataset_orig_valid_pred,
                  dataset_orig_test, dataset_orig_test_pred, privileged_groups,
                  unprivileged_groups):

    num_thresh = 100
    ba_arr = np.zeros(num_thresh)
    class_thresh_arr = np.linspace(0.01, 0.99, num_thresh)
    for idx, class_thresh in enumerate(class_thresh_arr):

        fav_inds = dataset_orig_valid_pred.scores > class_thresh
        dataset_orig_valid_pred.labels[
            fav_inds] = dataset_orig_valid_pred.favorable_label
        dataset_orig_valid_pred.labels[
            ~fav_inds] = dataset_orig_valid_pred.unfavorable_label

        classified_metric_orig_valid = ClassificationMetric(
            dataset_orig_valid,
            dataset_orig_valid_pred,
            unprivileged_groups=unprivileged_groups,
            privileged_groups=privileged_groups)

        ba_arr[idx] = 0.5*(classified_metric_orig_valid.true_positive_rate()\
                       +classified_metric_orig_valid.true_negative_rate())

    best_ind = np.where(ba_arr == np.max(ba_arr))[0][0]
    best_class_thresh = class_thresh_arr[best_ind]

    ROC = RejectOptionClassification(
        unprivileged_groups=unprivileged_groups,
        privileged_groups=privileged_groups,
        low_class_thresh=0.01,
        high_class_thresh=0.99,
        num_class_thresh=100,
        num_ROC_margin=50,
        metric_name="Statistical parity difference",
        metric_ub=metric_ub,
        metric_lb=metric_lb)

    ROC = ROC.fit(dataset_orig_valid, dataset_orig_valid_pred)

    fav_inds = dataset_orig_test_pred.scores > best_class_thresh
    dataset_orig_test_pred.labels[
        fav_inds] = dataset_orig_test_pred.favorable_label
    dataset_orig_test_pred.labels[
        ~fav_inds] = dataset_orig_test_pred.unfavorable_label

    dataset_transf_test_pred = ROC.predict(dataset_orig_test_pred)

    return dataset_transf_test_pred
    '''
def get_confusion_matrix(test_list, prediction_list):
    privileged_groups = [{'sex': 1}]
    unprivileged_groups = [{'sex': 0}]

    model_metric = ClassificationMetric(
        test_list,
        prediction_list,
        unprivileged_groups=unprivileged_groups,
        privileged_groups=privileged_groups)

    priv_conf_mat = model_metric.binary_confusion_matrix(privileged=True)
    unpriv_conf_mat = model_metric.binary_confusion_matrix(privileged=False)
    print('Confusion Matrix for Men {}'.format(priv_conf_mat))
    print('Confusion Matrix for Women {}'.format(unpriv_conf_mat))
    return priv_conf_mat, unpriv_conf_mat
    def __classifier_score__aif360_(self, scoring_df, output_index, nn):

        original_df = scoring_df
        predictions_df = original_df#.copy()
        predictions = nn.predict(original_df.drop(columns=[self.output_label]).values)
        predictions_df[self.output_label] = clip_at_threshold(predictions, self.threshold)

        orig_dataset = BinaryLabelDataset(df=original_df,
                                          favorable_label=1.0,
                                          unfavorable_label=0.0,
                                          label_names=[self.output_label],
                                          protected_attribute_names=[self.protected_feature],
                                          privileged_protected_attributes=[self.privileged_value])

        transformed_dataset = BinaryLabelDataset(df=predictions_df,
                                                 favorable_label=1.0,
                                                 unfavorable_label=0.0,
                                                 label_names=[self.output_label],
                                                 protected_attribute_names=[self.protected_feature],
                                                 privileged_protected_attributes=[self.privileged_value])

        privileged_groups = {}
        privileged_groups[self.protected_feature] = self.privileged_value
        unprivileged_groups = {}
        unprivileged_groups[self.protected_feature] = 1 - self.privileged_value

        transformed_dataset.scores = predictions
        classification_dataset = ClassificationMetric(orig_dataset,
                                                      transformed_dataset,
                                                      privileged_groups=[privileged_groups],
                                                      unprivileged_groups=[unprivileged_groups])
        return np.asanyarray([np.abs(getattr(classification_dataset, metric)()) for metric in self.metrics])
Ejemplo n.º 13
0
    def __init__(
        self,
        raw_dataset: BinaryLabelDataset,
        predicted_dataset: BinaryLabelDataset,
        privileged_groups=None,
        unprivileged_groups=None,
    ):
        """
        Args:
            raw_dataset (BinaryLabelDataset): Dataset with ground-truth labels.
            predicted_dataset (BinaryLabelDataset): Dataset after predictions.
            privileged_groups (list(dict)): Privileged groups. Format is a list
                of `dicts` where the keys are `protected_attribute_names` and
                the values are values in `protected_attributes`. Each `dict`
                element describes a single group.
            unprivileged_groups (list(dict)): Unprivileged groups. Same format
                as privileged_groups.
        """
        self._raw_dataset = raw_dataset
        self._predicted_dataset = predicted_dataset

        if privileged_groups is None:
            privileged_groups = [
                dict(
                    zip(
                        predicted_dataset.protected_attribute_names,
                        predicted_dataset.privileged_protected_attributes,
                    )
                )
            ]

        if unprivileged_groups is None:
            unprivileged_groups = [
                dict(
                    zip(
                        predicted_dataset.protected_attribute_names,
                        predicted_dataset.unprivileged_protected_attributes,
                    )
                )
            ]

        self._classification_metric = ClassificationMetric(
            raw_dataset,
            predicted_dataset,
            unprivileged_groups=unprivileged_groups,
            privileged_groups=privileged_groups,
        )
Ejemplo n.º 14
0
def test_classifier(classifier, scale, test_data, fairness_metric,
                    accuracy_metric, keep_features, privileged_threshold,
                    unprivileged_threshold, privileged_groups,
                    unprivileged_groups):
    """
    Test the provided classifier on specified data set, and calculate fitness scores.

    :param classifier: The classifier to test
    :param scale: Scaler to transform the test set
    :param test_data: The test data set to test the classifier on
    :param fairness_metric: The fairness metric to calculate
    :param accuracy_metric: The accuracy metric to calculate
    :param keep_features: The features to keep for SVC
    :param privileged_threshold: The classification threshold to be used for the privileged group
    :param unprivileged_threshold: The classification threshold to be used for the unprivileged group
    :param privileged_groups: The privileged group in the data set
    :param unprivileged_groups: The unprivileged group in the data set
    :return:
    """
    dataset_orig_test = test_data

    # Prepare data
    dataset_test_pred = dataset_orig_test.copy(deepcopy=True)
    X_test = scale.transform(dataset_test_pred.features)
    if len(keep_features) > 0:  # If keep_features empty, use all features
        X_test = X_test[:, keep_features]

    # Test
    pos_ind = np.where(classifier.classes_ == dataset_orig_test.favorable_label
                       )[0][0]  # positive class index
    dataset_test_pred.scores = classifier.predict_proba(
        X_test)[:, pos_ind].reshape(-1, 1)
    # Assign labels using the classification thresholds
    for i in range(len(dataset_test_pred.labels)):
        # 4 = index of the sensitive attr, 1 = privileged value
        if dataset_test_pred.features[i][4] == 1.:  # Privileged,
            if dataset_test_pred.scores[
                    i] > privileged_threshold:  # Above threshold
                dataset_test_pred.labels[i] = dataset_test_pred.favorable_label
            else:
                dataset_test_pred.labels[
                    i] = dataset_test_pred.unfavorable_label
        else:  # Unprivileged
            if dataset_test_pred.scores[
                    i] > unprivileged_threshold:  # Above threshold
                dataset_test_pred.labels[i] = dataset_test_pred.favorable_label
            else:
                dataset_test_pred.labels[
                    i] = dataset_test_pred.unfavorable_label

    # Calculate metrics
    cm = ClassificationMetric(dataset_orig_test,
                              dataset_test_pred,
                              unprivileged_groups=unprivileged_groups,
                              privileged_groups=privileged_groups)

    accuracy_score = accuracy_metric(cm)
    fairness_score = fairness_metric(cm)
    return accuracy_score, fairness_score
Ejemplo n.º 15
0
def test(dataset, model, x_test, thresh_arr, unprivileged_groups,
         privileged_groups):

    bld = BinaryLabelDataset(df=dataset,
                             label_names=['labels'],
                             protected_attribute_names=['age'])

    if np.isin(k, model_AIF):
        y_val_pred_prob = model.predict_proba(bld)
    else:
        y_val_pred_prob, A_val_pred_prob = model.predict_proba(x_test)

    metric_arrs = np.empty([0, 8])
    for thresh in thresh_arr:
        if np.isin(k, model_AIF):
            y_val_pred = (y_val_pred_prob > thresh).astype(np.float64)
        else:
            y_val_pred = (y_val_pred_prob.numpy() > thresh).astype(np.float64)

        metric_arrs = np.append(metric_arrs,
                                roc_auc_score(y_test, y_val_pred_prob))

        if np.isin(k, model_AIF):
            metric_arrs = np.append(metric_arrs, 0)
        else:
            metric_arrs = np.append(metric_arrs,
                                    roc_auc_score(A_test, A_val_pred_prob))

        dataset_pred = dataset.copy()
        dataset_pred.labels = y_val_pred
        bld2 = BinaryLabelDataset(df=dataset_pred,
                                  label_names=['labels'],
                                  protected_attribute_names=['age'])

        metric = ClassificationMetric(bld,
                                      bld2,
                                      unprivileged_groups=unprivileged_groups,
                                      privileged_groups=privileged_groups)

        metric_arrs = np.append(
            metric_arrs,
            ((metric.true_positive_rate() + metric.true_negative_rate()) / 2))
        metric_arrs = np.append(metric_arrs, metric.average_odds_difference())
        metric_arrs = np.append(metric_arrs, metric.disparate_impact())
        metric_arrs = np.append(metric_arrs,
                                metric.statistical_parity_difference())
        metric_arrs = np.append(metric_arrs,
                                metric.equal_opportunity_difference())
        metric_arrs = np.append(metric_arrs, metric.theil_index())

    return metric_arrs
Ejemplo n.º 16
0
def prejudice(train, test, unprivileged_groups, privileged_groups):
    prejudice_model = PrejudiceRemover(eta=100, sensitive_attr='sex')
    prejudice_model.fit(train)

    # predict outcome using the test set
    pred_prejudice = prejudice_model.predict(test)

    # calculate accuracy
    accuracy = accuracy_score(y_true=test.labels, y_pred=pred_prejudice.labels)

    # calculate fairness metrics
    metric_test = BinaryLabelDatasetMetric(
        pred_prejudice,
        unprivileged_groups=unprivileged_groups,
        privileged_groups=privileged_groups)
    acc_test = ClassificationMetric(test,
                                    pred_prejudice,
                                    unprivileged_groups=unprivileged_groups,
                                    privileged_groups=privileged_groups)
    equal_opportunity_difference = equal_opp_diff(test,
                                                  pred_prejudice,
                                                  'sex',
                                                  privileged=1,
                                                  unprivileged=0,
                                                  favourable=1,
                                                  unfavourable=0)
    average_odds_difference = avg_odds_diff(test,
                                            pred_prejudice,
                                            'sex',
                                            privileged=1,
                                            unprivileged=0,
                                            favourable=1,
                                            unfavourable=0)

    if acc_test.disparate_impact() == math.inf:
        disparate_impact = 5.0
    else:
        disparate_impact = acc_test.disparate_impact()

    metrics = [
        metric_test.mean_difference(), disparate_impact,
        equal_opportunity_difference, average_odds_difference,
        acc_test.theil_index()
    ]

    return pred_prejudice, accuracy, metrics
Ejemplo n.º 17
0
def ensemble(test, pred_adversarial, pred_prejudice, pred_nondebiased,
             unprivileged_groups, privileged_groups):
    pred_labels = []
    for i in range(0, len(test.features)):
        arr = mode([
            pred_adversarial.labels[i], pred_prejudice.labels[i],
            pred_nondebiased.labels[i]
        ])
        pred_labels.append(arr[0][0])

    pred_ensemble = test.copy()
    pred_ensemble.labels = np.array(pred_labels)

    accuracy = accuracy_score(y_true=test.labels, y_pred=pred_ensemble.labels)

    metric_test = BinaryLabelDatasetMetric(
        pred_ensemble,
        unprivileged_groups=unprivileged_groups,
        privileged_groups=privileged_groups)
    acc_test = ClassificationMetric(test,
                                    pred_ensemble,
                                    unprivileged_groups=unprivileged_groups,
                                    privileged_groups=privileged_groups)
    equal_opportunity_difference = equal_opp_diff(test,
                                                  pred_ensemble,
                                                  'sex',
                                                  privileged=1,
                                                  unprivileged=0,
                                                  favourable=1,
                                                  unfavourable=0)
    average_odds_difference = avg_odds_diff(test,
                                            pred_ensemble,
                                            'sex',
                                            privileged=1,
                                            unprivileged=0,
                                            favourable=1,
                                            unfavourable=0)

    metrics = [
        metric_test.mean_difference(),
        acc_test.disparate_impact(), equal_opportunity_difference,
        average_odds_difference,
        acc_test.theil_index()
    ]

    return accuracy, metrics
Ejemplo n.º 18
0
def test_theil_index():
    data = np.array([[0, 1], [0, 0], [1, 0], [1, 1], [1, 0], [1, 0], [2, 1],
                     [2, 0], [2, 1], [2, 1]])
    pred = data.copy()
    pred[[3, 9], -1] = 0
    pred[[4, 5], -1] = 1
    df = pd.DataFrame(data, columns=['feat', 'label'])
    df2 = pd.DataFrame(pred, columns=['feat', 'label'])
    bld = BinaryLabelDataset(df=df,
                             label_names=['label'],
                             protected_attribute_names=['feat'])
    bld2 = BinaryLabelDataset(df=df2,
                              label_names=['label'],
                              protected_attribute_names=['feat'])
    cm = ClassificationMetric(bld, bld2)

    assert cm.theil_index() == 4 * np.log(2) / 10
def test_multiclass_confusion_matrix():
    data = np.array([[0, 1], [0, 0], [1, 0], [1, 1], [1, 0], [1, 2], [2, 1],
                     [2, 0], [2, 2], [2, 1]])
    pred = data.copy()
    pred[3, 1] = 0
    pred[4, 1] = 2

    df = pd.DataFrame(data, columns=['feat', 'label'])
    df2 = pd.DataFrame(pred, columns=['feat', 'label'])

    favorable_values = [0, 1]
    unfavorable_values = [2]
    mcld = MulticlassLabelDataset(favorable_label=favorable_values,
                                  unfavorable_label=unfavorable_values,
                                  df=df,
                                  label_names=['label'],
                                  protected_attribute_names=['feat'])
    mcld2 = MulticlassLabelDataset(favorable_label=favorable_values,
                                   unfavorable_label=unfavorable_values,
                                   df=df2,
                                   label_names=['label'],
                                   protected_attribute_names=['feat'])
    cm = ClassificationMetric(mcld,
                              mcld2,
                              unprivileged_groups=[{
                                  'feat': 2
                              }],
                              privileged_groups=[{
                                  'feat': 0
                              }, {
                                  'feat': 1
                              }])
    confusion_matrix = cm.binary_confusion_matrix()

    actual_labels_df = df[['label']].values
    actual_labels_df2 = df2[['label']].values

    assert np.all(actual_labels_df == mcld.labels)
    assert np.all(actual_labels_df2 == mcld2.labels)

    assert confusion_matrix == {'TP': 7.0, 'FN': 1.0, 'TN': 2.0, 'FP': 0.0}

    fnr = cm.false_negative_rate_difference()
    assert fnr == -0.2
Ejemplo n.º 20
0
def get_metrics(dataset_orig, preds):
    '''
    Description: This code computes accuracy, balanced accuracy, max gap and gap rms for race and gender
    Input: dataset_orig: a BinaryLabelDataset (from the aif360 module)
            preds: predictions
    '''
    dataset_learned_model = dataset_orig.copy()
    dataset_learned_model.labels = preds

    # wrt gender
    privileged_groups = [{'sex_ Male': 1}]
    unprivileged_groups = [{'sex_ Male': 0}]

    classified_metric = ClassificationMetric(
        dataset_orig,
        dataset_learned_model,
        unprivileged_groups=unprivileged_groups,
        privileged_groups=privileged_groups)

    bal_acc = compute_balanced_accuracy(classified_metric)

    gender_gap_rms, gender_max_gap = compute_gap_RMS_and_gap_max(
        classified_metric)
    print("Test set: gender gap rms = %f" % gender_gap_rms)
    print("Test set: gender max gap rms = %f" % gender_max_gap)
    print("Test set: Balanced TPR = %f" % bal_acc)

    # wrt race
    privileged_groups = [{'race_ White': 1}]
    unprivileged_groups = [{'race_ White': 0}]

    classified_metric = ClassificationMetric(
        dataset_orig,
        dataset_learned_model,
        unprivileged_groups=unprivileged_groups,
        privileged_groups=privileged_groups)

    race_gap_rms, race_max_gap = compute_gap_RMS_and_gap_max(classified_metric)
    print("Test set: race gap rms = %f" % race_gap_rms)
    print("Test set: race max gap rms = %f" % race_max_gap)

    return classified_metric.accuracy(
    ), bal_acc, race_gap_rms, race_max_gap, gender_gap_rms, gender_max_gap
Ejemplo n.º 21
0
def get_classification_metric_object(dataset_true, dataset_pred,
                                     unprivileged_groups, privileged_groups):
    """Build the ClassificationMetric object"""
    classified_metric_pred = ClassificationMetric(
        dataset_true,
        dataset_pred,
        unprivileged_groups=unprivileged_groups,
        privileged_groups=privileged_groups)

    return classified_metric_pred
Ejemplo n.º 22
0
def test_between_all_groups():
    data = np.array([[0, 1], [0, 0], [1, 0], [1, 1], [1, 0], [1, 0], [2, 1],
                     [2, 0], [2, 1], [2, 1]])
    pred = data.copy()
    pred[[3, 9], -1] = 0
    pred[[4, 5], -1] = 1
    df = pd.DataFrame(data, columns=['feat', 'label'])
    df2 = pd.DataFrame(pred, columns=['feat', 'label'])
    bld = BinaryLabelDataset(df=df,
                             label_names=['label'],
                             protected_attribute_names=['feat'])
    bld2 = BinaryLabelDataset(df=df2,
                              label_names=['label'],
                              protected_attribute_names=['feat'])
    cm = ClassificationMetric(bld, bld2)

    b = np.array([1, 1, 1.25, 1.25, 1.25, 1.25, 0.75, 0.75, 0.75, 0.75])
    assert cm.between_all_groups_generalized_entropy_index(
    ) == 1 / 20 * np.sum(b**2 - 1)
Ejemplo n.º 23
0
def _preprocess_data(
    data, protected_attribute_name, protected_attribute_index, label_name, required_fairness
):
    from pandas import DataFrame
    from aif360.datasets import BinaryLabelDataset

    dataset = BinaryLabelDataset(
        df=DataFrame(data),
        protected_attribute_names={protected_attribute_name},
        label_names={label_name},
        favorable_label=2,
        unfavorable_label=1,
    )
    train, test = dataset.split([0.8])

    from aif360.algorithms.inprocessing import AdversarialDebiasing

    sess = tf.compat.v1.Session()
    debiaser = AdversarialDebiasing(
        unprivileged_groups=({protected_attribute_name: 0},),
        privileged_groups=({protected_attribute_name: 1},),
        scope_name="debiaser",
        debias=True,
        sess=sess,
    )
    debiaser.fit(train)

    from sklearn.ensemble import RandomForestClassifier

    model = RandomForestClassifier(class_weight="balanced")

    X_tr = np.delete(train.features, protected_attribute_index, axis=1)
    y_tr = train.labels.ravel()
    model.fit(X_tr, y_tr)

    test_pred = test.copy(deepcopy=True)
    test_pred.scores = model.predict(np.delete(debiaser.predict(test).features, protected_attribute_index, axis=1))

    accuracy = np.sum(np.equal(test.scores, test_pred.scores))

    from aif360.metrics import ClassificationMetric
    disparate_impact = ClassificationMetric(
        test,
        test_pred,
        unprivileged_groups=({protected_attribute_name: 0},),
        privileged_groups=({protected_attribute_name: 1},),
    ).disparate_impact()

    print(f"Accuracy: {accuracy}")
    print(f"Disparate impact: {disparate_impact}")
    if disparate_impact > float(required_fairness):
        raise ValueError(
            f"Too unfair! Disparate impact was {disparate_impact} but must be less than {required_fairness}"
        )
def compute_metrics(data, predictions, unpriv_group, priv_group):
    transformed_data = BinaryLabelDataset(df=data,
                                          label_names=["two_year_recid"],
                                          protected_attribute_names=["race"],
                                          favorable_label=0,
                                          unfavorable_label=1) if isinstance(
                                              data, pd.DataFrame) else data
    t_data_train_true = transformed_data.copy(deepcopy=True)
    t_data_train_pred = transformed_data.copy(deepcopy=True)
    t_data_train_pred.labels = predictions.reshape(-1, 1)
    metric_test_data = ClassificationMetric(
        t_data_train_true,
        t_data_train_pred,
        unprivileged_groups=unpriv_group,
        privileged_groups=priv_group,
    )
    tpr_difference = metric_test_data.true_positive_rate_difference()
    tpr_priviledged = metric_test_data.true_positive_rate(True)
    tpr_unpriviledged = metric_test_data.true_positive_rate(False)
    return tpr_difference, tpr_priviledged, tpr_unpriviledged
def compute_aif_metrics(dataset_true, dataset_pred, unprivileged_groups, privileged_groups,\
                        ret_eval_dict=True):

    metrics_cls = ClassificationMetric(dataset_true, dataset_pred, 
                                                 unprivileged_groups=unprivileged_groups,
                                                 privileged_groups=privileged_groups)
    metrics_dict = {}
    metrics_dict["BA"] = 0.5*(metrics_cls.true_positive_rate()+
                                             metrics_cls.true_negative_rate())
    metrics_dict["SPD"] = metrics_cls.statistical_parity_difference()
    metrics_dict["DI"] = metrics_cls.disparate_impact()
    metrics_dict["AOD"] = metrics_cls.average_odds_difference()
    metrics_dict["EOD"] = metrics_cls.equal_opportunity_difference()
    metrics_dict["DFBA"] = metrics_cls.differential_fairness_bias_amplification()
    metrics_dict["TI"] = metrics_cls.theil_index()
    
    if ret_eval_dict:
        return metrics_dict, metrics_cls
    else:
        return metrics_cls
Ejemplo n.º 26
0
def fairness_IBM(y_pred, Ztr, ytr, verbose=0):
    from aif360.datasets import BinaryLabelDataset
    from aif360.metrics import ClassificationMetric

    assert np.array_equal(np.unique(Ztr),
                          np.array([0, 1])), "Z must contain either 0 or 1"
    # if len(ytr.shape) == 1:
    # ytr = np.expand_dims(ytr, -1)

    Ztr = np.squeeze(Ztr)
    if verbose:
        print(ytr.shape)
        print(Ztr.shape)
    unprivileged_groups = [{"zs": [0]}]
    privileged_groups = [{"zs": [1]}]
    metric_arrs = defaultdict(list)
    dict_ = {"y_true": ytr, "zs": Ztr}
    df = pd.DataFrame(dict_)
    dataset = BinaryLabelDataset(df=df,
                                 label_names=["y_true"],
                                 protected_attribute_names=["zs"],
                                 unprivileged_protected_attributes=[[0]],
                                 privileged_protected_attributes=[[1]])

    dataset_pred = dataset.copy()
    dataset_pred.labels = y_pred
    metric = ClassificationMetric(dataset,
                                  dataset_pred,
                                  unprivileged_groups=unprivileged_groups,
                                  privileged_groups=privileged_groups)

    # metric_arrs['bal_acc'].append((metric.true_positive_rate()
    #                              + metric.true_negative_rate()) / 2)
    metric_arrs["EA"].append(
        metric.accuracy(privileged=False) - metric.accuracy(privileged=True))
    # ASSUMING ALL OTHER METRICS RETURN U - P
    metric_arrs['EO'].append(metric.average_odds_difference())
    # The ideal value of this metric is 1.0
    # A value < 1 implies higher benefit for the privileged group
    # and a value >1 implies a higher
    metric_arrs['DI'].append(metric.disparate_impact() - 1)
    metric_arrs['DP'].append(metric.statistical_parity_difference())
    metric_arrs['EQ'].append(metric.equal_opportunity_difference())
    metric_arrs['TH'].append(metric.between_group_theil_index() * 10)
    results = pd.DataFrame(metric_arrs)
    return results
def test_generalized_binary_confusion_matrix():
    data = np.array([[0, 1], [0, 0], [1, 0], [1, 1], [1, 0], [1, 0], [1, 2],
                     [0, 0], [0, 0], [1, 2]])

    pred = np.array([[0, 1, 0.8], [0, 0, 0.6], [1, 0, 0.7], [1, 1, 0.8],
                     [1, 2, 0.36], [1, 0, 0.82], [1, 1, 0.79], [0, 2, 0.42],
                     [0, 1, 0.81], [1, 2, 0.3]])
    df = pd.DataFrame(data, columns=['feat', 'label'])
    df2 = pd.DataFrame(pred, columns=['feat', 'label', 'score'])

    favorable_values = [0, 1]
    unfavorable_values = [2]

    mcld = MulticlassLabelDataset(df=df,
                                  label_names=['label'],
                                  protected_attribute_names=['feat'],
                                  favorable_label=favorable_values,
                                  unfavorable_label=unfavorable_values)

    mcld2 = MulticlassLabelDataset(df=df2,
                                   label_names=['label'],
                                   scores_names=['score'],
                                   protected_attribute_names=['feat'],
                                   favorable_label=favorable_values,
                                   unfavorable_label=unfavorable_values)

    cm = ClassificationMetric(mcld,
                              mcld2,
                              unprivileged_groups=[{
                                  'feat': 0
                              }],
                              privileged_groups=[{
                                  'feat': 1
                              }])

    gen_confusion_matrix = cm.generalized_binary_confusion_matrix()

    gtp = cm.num_generalized_true_positives()
    assert round(gtp, 2) == 5.31
    gfp = cm.num_generalized_false_positives()
    assert gfp == 1.09
Ejemplo n.º 28
0
def test_eqodds():
    eqo = EqOddsPostprocessing(unprivileged_groups=[{
        'sex': 0
    }],
                               privileged_groups=[{
                                   'sex': 1
                               }],
                               seed=1234567)
    pred_eqo = eqo.fit(val, val_pred).predict(pred)
    cm_eqo = ClassificationMetric(test,
                                  pred_eqo,
                                  unprivileged_groups=[{
                                      'sex': 0
                                  }],
                                  privileged_groups=[{
                                      'sex': 1
                                  }])
    # accuracy drop should be less than 10% (arbitrary)
    assert (cm_lr.accuracy() - cm_eqo.accuracy()) / cm_lr.accuracy() < 0.1
    # approximately equal odds
    assert cm_eqo.average_abs_odds_difference() < 0.1
Ejemplo n.º 29
0
def test_adult_sr():
    biased_model = MetaFairClassifier(tau=0,
                                      sensitive_attr=protected,
                                      type='sr',
                                      seed=123).fit(train)
    dataset_bias_test = biased_model.predict(test)

    biased_cm = ClassificationMetric(test,
                                     dataset_bias_test,
                                     unprivileged_groups=[{
                                         protected: 0
                                     }],
                                     privileged_groups=[{
                                         protected: 1
                                     }])
    spd1 = biased_cm.disparate_impact()
    spd1 = min(spd1, 1 / spd1)

    debiased_model = MetaFairClassifier(tau=0.9,
                                        sensitive_attr=protected,
                                        type='sr',
                                        seed=123).fit(train)
    dataset_debiasing_test = debiased_model.predict(test)

    debiased_cm = ClassificationMetric(test,
                                       dataset_debiasing_test,
                                       unprivileged_groups=[{
                                           protected: 0
                                       }],
                                       privileged_groups=[{
                                           protected: 1
                                       }])
    spd2 = debiased_cm.disparate_impact()
    spd2 = min(spd2, 1 / spd2)
    assert (spd2 >= spd1)
Ejemplo n.º 30
0
def test_adult_fdr():
    biased_model = MetaFairClassifier(tau=0,
                                      sensitive_attr=protected,
                                      type='fdr',
                                      seed=123).fit(train)
    dataset_bias_test = biased_model.predict(test)

    biased_cm = ClassificationMetric(test,
                                     dataset_bias_test,
                                     unprivileged_groups=[{
                                         protected: 0
                                     }],
                                     privileged_groups=[{
                                         protected: 1
                                     }])
    fdr1 = biased_cm.false_discovery_rate_ratio()
    fdr1 = min(fdr1, 1 / fdr1)

    debiased_model = MetaFairClassifier(tau=0.9,
                                        sensitive_attr=protected,
                                        type='fdr',
                                        seed=123).fit(train)
    dataset_debiasing_test = debiased_model.predict(test)

    debiased_cm = ClassificationMetric(test,
                                       dataset_debiasing_test,
                                       unprivileged_groups=[{
                                           protected: 0
                                       }],
                                       privileged_groups=[{
                                           protected: 1
                                       }])
    fdr2 = debiased_cm.false_discovery_rate_ratio()
    fdr2 = min(fdr2, 1 / fdr2)
    assert (fdr2 >= fdr1)