def __init__(self, step_name, step, df, sensitive_att, target_col, input_score=True, clf_threshold=0.5):
        """
        :param step_name: str, name of the current input step.
        :param step: object of the initialized class.
        :param df: pandas dataframe, stores the data.
        :param sensitive_att: str, the name of a sensitive attribute.
        :param target_col: str, the name of the target attribute.
        :param input_score: boolean, represent whether the post-processor takes predicted score as input. Default is True.
        :param clf_threshold: float in [0, 1], represents the threshold to categorize class labels from predicted scores.
        """
        if "pred_"+target_col not in df.columns:
            print("Require the predictions for ",target_col, " existing in the data!")
            raise ValueError
        super().__init__(step_name=step_name, df=df, sensitive_att=sensitive_att, target_col=target_col)
        # assume the data set has been encoded to numerical values,
        # intitialize a BinaryLabelDataset from AIF 360
        aif_true_df = BinaryLabelDataset(df=df.drop(columns=["pred_"+target_col]), label_names=[target_col], protected_attribute_names=[sensitive_att])

        aif_pred_df = aif_true_df.copy()

        if input_score:
            aif_pred_df.scores = df["pred_"+target_col]
        else:
            aif_pred_df.labels = np.array([int(x >= clf_threshold) for x in df["pred_"+target_col]])
        self.input_score = input_score
        self.step = step.fit(aif_true_df, aif_pred_df)
        self.clf_threshold = clf_threshold
Example #2
0
def fairness_IBM(y_pred, Ztr, ytr, verbose=0):
    from aif360.datasets import BinaryLabelDataset
    from aif360.metrics import ClassificationMetric

    assert np.array_equal(np.unique(Ztr),
                          np.array([0, 1])), "Z must contain either 0 or 1"
    # if len(ytr.shape) == 1:
    # ytr = np.expand_dims(ytr, -1)

    Ztr = np.squeeze(Ztr)
    if verbose:
        print(ytr.shape)
        print(Ztr.shape)
    unprivileged_groups = [{"zs": [0]}]
    privileged_groups = [{"zs": [1]}]
    metric_arrs = defaultdict(list)
    dict_ = {"y_true": ytr, "zs": Ztr}
    df = pd.DataFrame(dict_)
    dataset = BinaryLabelDataset(df=df,
                                 label_names=["y_true"],
                                 protected_attribute_names=["zs"],
                                 unprivileged_protected_attributes=[[0]],
                                 privileged_protected_attributes=[[1]])

    dataset_pred = dataset.copy()
    dataset_pred.labels = y_pred
    metric = ClassificationMetric(dataset,
                                  dataset_pred,
                                  unprivileged_groups=unprivileged_groups,
                                  privileged_groups=privileged_groups)

    # metric_arrs['bal_acc'].append((metric.true_positive_rate()
    #                              + metric.true_negative_rate()) / 2)
    metric_arrs["EA"].append(
        metric.accuracy(privileged=False) - metric.accuracy(privileged=True))
    # ASSUMING ALL OTHER METRICS RETURN U - P
    metric_arrs['EO'].append(metric.average_odds_difference())
    # The ideal value of this metric is 1.0
    # A value < 1 implies higher benefit for the privileged group
    # and a value >1 implies a higher
    metric_arrs['DI'].append(metric.disparate_impact() - 1)
    metric_arrs['DP'].append(metric.statistical_parity_difference())
    metric_arrs['EQ'].append(metric.equal_opportunity_difference())
    metric_arrs['TH'].append(metric.between_group_theil_index() * 10)
    results = pd.DataFrame(metric_arrs)
    return results