def get_metric_reports(true_dataset,classfied_dataset,privileged_groups,unprivileged_groups): mirror_dataset=classfied_dataset.copy(deepcopy=True) mirror_dataset.labels=copy.deepcopy(true_dataset.labels) metric=ClassificationMetric( dataset=mirror_dataset, classified_dataset=classfied_dataset, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups) #Measuring unfairness end report=OrderedDict() report['TPR']=metric.true_positive_rate() report['TNR']=metric.true_negative_rate() report['FPR']=metric.false_positive_rate() report['FNR']=metric.false_negative_rate() report['Balanced_Acc']=0.5*(report['TPR']+report['TNR']) report['Acc']=metric.accuracy() report["Statistical parity difference"]=metric.statistical_parity_difference() report["Disparate impact"]=metric.disparate_impact() report["Equal opportunity difference"]=metric.equal_opportunity_difference() report["Average odds difference"]=metric.average_odds_difference() report["Theil index"]=metric.theil_index() report["United Fairness"]=metric.generalized_entropy_index() return report
def fit(self, dataset_true, dataset_pred): """Compute parameters for equalizing odds using true and predicted labels. Args: true_dataset (BinaryLabelDataset): Dataset containing true labels. pred_dataset (BinaryLabelDataset): Dataset containing predicted labels. Returns: EqOddsPostprocessing: Returns self. """ metric = ClassificationMetric( dataset_true, dataset_pred, unprivileged_groups=self.unprivileged_groups, privileged_groups=self.privileged_groups) # compute basic statistics sbr = metric.num_instances(privileged=True) / metric.num_instances() obr = metric.num_instances(privileged=False) / metric.num_instances() fpr0 = metric.false_positive_rate(privileged=True) fpr1 = metric.false_positive_rate(privileged=False) fnr0 = metric.false_negative_rate(privileged=True) fnr1 = metric.false_negative_rate(privileged=False) tpr0 = metric.true_positive_rate(privileged=True) tpr1 = metric.true_positive_rate(privileged=False) tnr0 = metric.true_negative_rate(privileged=True) tnr1 = metric.true_negative_rate(privileged=False) # linear program has 4 decision variables: # [Pr[label_tilde = 1 | label_hat = 1, protected_attributes = 0]; # Pr[label_tilde = 1 | label_hat = 0, protected_attributes = 0]; # Pr[label_tilde = 1 | label_hat = 1, protected_attributes = 1]; # Pr[label_tilde = 1 | label_hat = 0, protected_attributes = 1]] # Coefficients of the linear objective function to be minimized. c = np.array([fpr0 - tpr0, tnr0 - fnr0, fpr1 - tpr1, tnr1 - fnr1]) # A_ub - 2-D array which, when matrix-multiplied by x, gives the values # of the upper-bound inequality constraints at x # b_ub - 1-D array of values representing the upper-bound of each # inequality constraint (row) in A_ub. # Just to keep these between zero and one A_ub = np.array( [[1, 0, 0, 0], [-1, 0, 0, 0], [0, 1, 0, 0], [0, -1, 0, 0], [0, 0, 1, 0], [0, 0, -1, 0], [0, 0, 0, 1], [0, 0, 0, -1]], dtype=np.float64) b_ub = np.array([1, 0, 1, 0, 1, 0, 1, 0], dtype=np.float64) # Create boolean conditioning vectors for protected groups cond_vec_priv = utils.compute_boolean_conditioning_vector( dataset_pred.protected_attributes, dataset_pred.protected_attribute_names, self.privileged_groups)[0] cond_vec_unpriv = utils.compute_boolean_conditioning_vector( dataset_pred.protected_attributes, dataset_pred.protected_attribute_names, self.unprivileged_groups)[0] sconst = np.ravel( dataset_pred.labels[cond_vec_priv] == dataset_pred.favorable_label) sflip = np.ravel(dataset_pred.labels[cond_vec_priv] == dataset_pred.unfavorable_label) oconst = np.ravel(dataset_pred.labels[cond_vec_unpriv] == dataset_pred.favorable_label) oflip = np.ravel(dataset_pred.labels[cond_vec_unpriv] == dataset_pred.unfavorable_label) y_true = dataset_true.labels.ravel() sm_tn = np.logical_and( sflip, y_true[cond_vec_priv] == dataset_true.unfavorable_label, dtype=np.float64) sm_fn = np.logical_and( sflip, y_true[cond_vec_priv] == dataset_true.favorable_label, dtype=np.float64) sm_fp = np.logical_and( sconst, y_true[cond_vec_priv] == dataset_true.unfavorable_label, dtype=np.float64) sm_tp = np.logical_and( sconst, y_true[cond_vec_priv] == dataset_true.favorable_label, dtype=np.float64) om_tn = np.logical_and( oflip, y_true[cond_vec_unpriv] == dataset_true.unfavorable_label, dtype=np.float64) om_fn = np.logical_and( oflip, y_true[cond_vec_unpriv] == dataset_true.favorable_label, dtype=np.float64) om_fp = np.logical_and( oconst, y_true[cond_vec_unpriv] == dataset_true.unfavorable_label, dtype=np.float64) om_tp = np.logical_and( oconst, y_true[cond_vec_unpriv] == dataset_true.favorable_label, dtype=np.float64) # A_eq - 2-D array which, when matrix-multiplied by x, # gives the values of the equality constraints at x # b_eq - 1-D array of values representing the RHS of each equality # constraint (row) in A_eq. # Used to impose equality of odds constraint A_eq = [ [(np.mean(sconst * sm_tp) - np.mean(sflip * sm_tp)) / sbr, (np.mean(sflip * sm_fn) - np.mean(sconst * sm_fn)) / sbr, (np.mean(oflip * om_tp) - np.mean(oconst * om_tp)) / obr, (np.mean(oconst * om_fn) - np.mean(oflip * om_fn)) / obr], [(np.mean(sconst * sm_fp) - np.mean(sflip * sm_fp)) / (1 - sbr), (np.mean(sflip * sm_tn) - np.mean(sconst * sm_tn)) / (1 - sbr), (np.mean(oflip * om_fp) - np.mean(oconst * om_fp)) / (1 - obr), (np.mean(oconst * om_tn) - np.mean(oflip * om_tn)) / (1 - obr)] ] b_eq = [ (np.mean(oflip * om_tp) + np.mean(oconst * om_fn)) / obr - (np.mean(sflip * sm_tp) + np.mean(sconst * sm_fn)) / sbr, (np.mean(oflip * om_fp) + np.mean(oconst * om_tn)) / (1 - obr) - (np.mean(sflip * sm_fp) + np.mean(sconst * sm_tn)) / (1 - sbr) ] # Linear program self.model_params = linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq) return self
dataset_transf_valid_pred = cpp.predict(dataset_orig_valid_pred) dataset_transf_test_pred = cpp.predict(dataset_orig_test_pred) cm_transf_valid = ClassificationMetric(dataset_orig_valid, dataset_transf_valid_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups) cm_transf_test = ClassificationMetric(dataset_orig_test, dataset_transf_test_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups) #cm_transf_test.difference for idx,PR in enumerate(privileged_options): gfnr[idx] += cm_transf_test.false_negative_rate(privileged=PR) gfpr[idx] += cm_transf_test.false_positive_rate(privileged=PR) result = cm_transf_test.accuracy(privileged=PR) acc[idx] += float(result) pbar.update(1) fns.append(gfnr/N_reps) fps.append(gfpr/N_reps) accs.append(acc/N_reps) negs.append(neg) collapse = lambda param, idx : [v[idx] for v in param] getnames = {None:"full data", True:"privileged", False:"unprivileged"}