def test_generalized_entropy_index(): data = np.array([[0, 1], [0, 0], [1, 0], [1, 1], [1, 0], [1, 0], [2, 1], [2, 0], [2, 1], [2, 1]]) pred = data.copy() pred[[3, 9], -1] = 0 pred[[4, 5], -1] = 1 df = pd.DataFrame(data, columns=['feat', 'label']) df2 = pd.DataFrame(pred, columns=['feat', 'label']) bld = BinaryLabelDataset(df=df, label_names=['label'], protected_attribute_names=['feat']) bld2 = BinaryLabelDataset(df=df2, label_names=['label'], protected_attribute_names=['feat']) cm = ClassificationMetric(bld, bld2) assert cm.generalized_entropy_index() == 0.2 pred = data.copy() pred[:, -1] = np.array([0, 1, 1, 0, 0, 0, 0, 1, 1, 1]) df2 = pd.DataFrame(pred, columns=['feat', 'label']) bld2 = BinaryLabelDataset(df=df2, label_names=['label'], protected_attribute_names=['feat']) cm = ClassificationMetric(bld, bld2) assert cm.generalized_entropy_index() == 0.3
def get_metric_reports(true_dataset,classfied_dataset,privileged_groups,unprivileged_groups): mirror_dataset=classfied_dataset.copy(deepcopy=True) mirror_dataset.labels=copy.deepcopy(true_dataset.labels) metric=ClassificationMetric( dataset=mirror_dataset, classified_dataset=classfied_dataset, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups) #Measuring unfairness end report=OrderedDict() report['TPR']=metric.true_positive_rate() report['TNR']=metric.true_negative_rate() report['FPR']=metric.false_positive_rate() report['FNR']=metric.false_negative_rate() report['Balanced_Acc']=0.5*(report['TPR']+report['TNR']) report['Acc']=metric.accuracy() report["Statistical parity difference"]=metric.statistical_parity_difference() report["Disparate impact"]=metric.disparate_impact() report["Equal opportunity difference"]=metric.equal_opportunity_difference() report["Average odds difference"]=metric.average_odds_difference() report["Theil index"]=metric.theil_index() report["United Fairness"]=metric.generalized_entropy_index() return report
def compute_metrics(dataset_true, dataset_pred, unprivileged_groups, privileged_groups, disp=True): """ Compute the key metrics """ classified_metric_pred = ClassificationMetric( dataset_true, dataset_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups) metrics = OrderedDict() metrics["Classification accuracy"] = classified_metric_pred.accuracy() metrics["Balanced accuracy"] = 0.5 * ( classified_metric_pred.true_positive_rate() + classified_metric_pred.true_negative_rate()) metrics[ "Statistical parity difference"] = classified_metric_pred.statistical_parity_difference( ) metrics["Disparate impact"] = classified_metric_pred.disparate_impact() metrics[ "Average odds difference"] = classified_metric_pred.average_odds_difference( ) metrics[ "Equal opportunity difference"] = classified_metric_pred.equal_opportunity_difference( ) metrics["Theil index"] = classified_metric_pred.theil_index() metrics[ "United Fairness"] = classified_metric_pred.generalized_entropy_index( ) if disp: for k in metrics: print("%s = %.4f" % (k, metrics[k])) return metrics
def comb_algorithm(l, m, n, dataset_original1, privileged_groups1, unprivileged_groups1, optim_options1): dataset_original2 = copy.deepcopy(dataset_original1) privileged_groups2 = copy.deepcopy(privileged_groups1) unprivileged_groups2 = copy.deepcopy(unprivileged_groups1) optim_options2 = copy.deepcopy(optim_options1) print(l, m, n) dataset_orig_train, dataset_orig_vt = dataset_original2.split([0.7], shuffle=True) dataset_orig_valid, dataset_orig_test = dataset_orig_vt.split([0.5], shuffle=True) if l == 0: dataset_transf_train, dataset_transf_valid, dataset_transf_test = dataset_orig_train, dataset_orig_valid, dataset_orig_test else: pre_used = preAlgorithm[l - 1] dataset_transf_train, dataset_transf_valid, dataset_transf_test = Pre( pre_used, dataset_orig_train, dataset_orig_valid, dataset_orig_test, privileged_groups2, unprivileged_groups2, optim_options2) #assert (l,m,n)!=(2,0,0) #assert not np.all(dataset_transf_train.labels.flatten()==1.0) if m == 0: dataset_transf_valid_pred, dataset_transf_test_pred = train( dataset_transf_train, dataset_transf_valid, dataset_transf_test, privileged_groups2, unprivileged_groups2) else: in_used = inAlgorithm[m - 1] if in_used == "adversarial_debiasing": dataset_transf_valid_pred, dataset_transf_test_pred = adversarial_debiasing( dataset_transf_train, dataset_transf_valid, dataset_transf_test, privileged_groups2, unprivileged_groups2) elif in_used == "art_classifier": dataset_transf_valid_pred, dataset_transf_test_pred = art_classifier( dataset_transf_train, dataset_transf_valid, dataset_transf_test, privileged_groups2, unprivileged_groups2) elif in_used == "prejudice_remover": for key, value in privileged_groups2[0].items(): sens_attr = key dataset_transf_valid_pred, dataset_transf_test_pred = prejudice_remover( dataset_transf_train, dataset_transf_valid, dataset_transf_test, privileged_groups2, unprivileged_groups2, sens_attr) if n == 0: dataset_transf_test_pred_transf = dataset_transf_test_pred else: post_used = postAlgorithm[n - 1] if post_used == "calibrated_eqodds": cpp = CalibratedEqOddsPostprocessing( privileged_groups=privileged_groups2, unprivileged_groups=unprivileged_groups2, cost_constraint=cost_constraint, seed=1) cpp = cpp.fit(dataset_transf_valid, dataset_transf_valid_pred) dataset_transf_test_pred_transf = cpp.predict( dataset_transf_test_pred) elif post_used == "eqodds": EO = EqOddsPostprocessing(unprivileged_groups=unprivileged_groups2, privileged_groups=privileged_groups2, seed=1) EO = EO.fit(dataset_transf_valid, dataset_transf_valid_pred) dataset_transf_test_pred_transf = EO.predict( dataset_transf_test_pred) elif post_used == "reject_option": ROC = RejectOptionClassification( unprivileged_groups=unprivileged_groups2, privileged_groups=privileged_groups2, low_class_thresh=0.01, high_class_thresh=0.99, num_class_thresh=100, num_ROC_margin=50, metric_name=allowed_metrics[0], metric_ub=metric_ub, metric_lb=metric_lb) ROC = ROC.fit(dataset_transf_valid, dataset_transf_valid_pred) dataset_transf_test_pred_transf = ROC.predict( dataset_transf_test_pred) metric = ClassificationMetric(dataset_transf_test, dataset_transf_test_pred_transf, unprivileged_groups=unprivileged_groups2, privileged_groups=privileged_groups2) metrics = OrderedDict() metrics["Classification accuracy"] = metric.accuracy() TPR = metric.true_positive_rate() TNR = metric.true_negative_rate() bal_acc_nodebiasing_test = 0.5 * (TPR + TNR) metrics["Balanced classification accuracy"] = bal_acc_nodebiasing_test metrics[ "Statistical parity difference"] = metric.statistical_parity_difference( ) metrics["Disparate impact"] = metric.disparate_impact() metrics[ "Equal opportunity difference"] = metric.equal_opportunity_difference( ) metrics["Average odds difference"] = metric.average_odds_difference() metrics["Theil index"] = metric.theil_index() metrics["United Fairness"] = metric.generalized_entropy_index() # print(metrics) feature = "[" for m in metrics: feature = feature + " " + str(round(metrics[m], 4)) feature = feature + "]" return feature
def compute_metrics(model, X_test, y_test, X_train, y_train, dataset_test, unprivileged_groups, privileged_groups, protect_attribute, print_result): """ Calculate and return: model accuracy and fairness metrics Parameters ---------- model: scikit-learn classifier X_test: numpy 2d array y_test: numpy 1d array X_train: numpy 2d array y_train: numpy 1d array dataset_test: aif360.datasets.BinaryLabelDataset unprivileged_groups: list<dict> Dictionary where the key is the name of the sensitive column in the dataset, and the value is the value of the unprivileged group in the dataset privileged_groups: list<dict> Dictionary where the key is the name of the sensitive column in the dataset, and the value is the value of the privileged group in the dataset protect_attribute print_result """ result = {} y_pred_test = model.predict(X_test) result['acc_test'] = accuracy_score(y_true=y_test, y_pred=y_pred_test) y_pred_train = model.predict(X_train) result['acc_train'] = accuracy_score(y_true=y_train, y_pred=y_pred_train) dataset_pred = dataset_test.copy() dataset_pred.labels = y_pred_test bin_metric = BinaryLabelDatasetMetric(dataset_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups) result['disp_impact'] = bin_metric.disparate_impact() result['stat_parity'] = bin_metric.mean_difference() classif_metric = ClassificationMetric(dataset_test, dataset_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups) result['avg_odds'] = classif_metric.average_odds_difference() result['equal_opport'] = classif_metric.equal_opportunity_difference() result['false_discovery_rate'] = classif_metric.false_discovery_rate_difference() result['entropy_index'] = classif_metric.generalized_entropy_index() result['acc_test_clf'] = classif_metric.accuracy(privileged=None) result['acc_test_priv'] = classif_metric.accuracy(privileged=True) result['acc_test_unpriv'] = classif_metric.accuracy(privileged=False) result['consistency'] = consitency(X_test, y_pred_test, protect_attribute, n_neighbors=5) result['counterfactual'] = counterfactual(X_test, model, protect_attribute) if print_result: print("Train accuracy: ", result['acc_train']) print("Test accuracy: ", result['acc_test']) print("Test accuracy clf: ", result['acc_test_clf']) print("Test accuracy priv.: ", result['acc_test_priv']) print("Test accuracy unpriv.: ", result['acc_test_unpriv']) print('Disparate impact: ', result['disp_impact']) print('Mean difference: ', result['stat_parity']) print('Average odds difference:', result['avg_odds']) print('Equality of opportunity:', result['equal_opport']) print('False discovery rate difference:', result['false_discovery_rate']) print('Generalized entropy index:', result['entropy_index']) print('Consistency: ', result['consistency']) print('Counterfactual fairness: ', result['counterfactual']) return result
# Mapping the groups defined by the sensitive attribute unprivileged_map = [{'race': 1}] privileged_map = [{'race': 0}] #%% # ============================================================================= # TESTING FAIRNESS METRIC FUNCTIONALITY OF AIF360 # ============================================================================= # Compute the classification metrics for the black vs white comparison fairness_metrics = ClassificationMetric(dataset=bld_true, classified_dataset=bld_pred, unprivileged_groups=unprivileged_map, privileged_groups=privileged_map) fairness_metrics.generalized_entropy_index() # Confusion matrices for total pop, whites, blacks conf_mat_total = fairness_metrics.binary_confusion_matrix(privileged=None) conf_mat_white = fairness_metrics.binary_confusion_matrix(privileged=True) conf_mat_black = fairness_metrics.binary_confusion_matrix(privileged=False) for i, j in conf_mat_total.items(): print('The number of ' + i + 's is ' + str(j)) type(conf_mat_total) conf_mat_total.keys() conf_mat_total.values() conf_mat_keys_ordered = ['TN', 'FP', 'FN', 'TP'] conf_mat_values_ordered = []
def comb_algorithm(l, m, n, dataset_original1, privileged_groups1, unprivileged_groups1, optim_options1): dataset_original2 = copy.deepcopy(dataset_original1) privileged_groups2 = copy.deepcopy(privileged_groups1) unprivileged_groups2 = copy.deepcopy(unprivileged_groups1) optim_options2 = copy.deepcopy(optim_options1) print(l, m, n) dataset_original_train, dataset_original_vt = dataset_original2.split( [0.7], shuffle=True) dataset_original_valid, dataset_original_test = dataset_original_vt.split( [0.5], shuffle=True) dataset_original_test.labels = dataset_original_test.labels print('=======================') #print(dataset_original_test.labels) dataset_orig_train = copy.deepcopy(dataset_original_train) dataset_orig_valid = copy.deepcopy(dataset_original_valid) dataset_orig_test = copy.deepcopy(dataset_original_test) if l == 0: dataset_transfer_train = copy.deepcopy(dataset_original_train) dataset_transfer_valid = copy.deepcopy(dataset_original_valid) dataset_transfer_test = copy.deepcopy(dataset_original_test) #dataset_transf_train, dataset_transf_valid, dataset_transf_test = dataset_orig_train, dataset_orig_valid, dataset_orig_test else: pre_used = preAlgorithm[l - 1] dataset_transfer_train, dataset_transfer_valid, dataset_transfer_test = Pre( pre_used, dataset_orig_train, dataset_orig_valid, dataset_orig_test, privileged_groups2, unprivileged_groups2, optim_options2) dataset_transf_train = copy.deepcopy(dataset_transfer_train) dataset_transf_valid = copy.deepcopy(dataset_transfer_valid) dataset_transf_test = copy.deepcopy(dataset_transfer_test) if m == 0: dataset_transfer_valid_pred, dataset_transfer_test_pred = plain_model( dataset_transf_train, dataset_transf_valid, dataset_transf_test, privileged_groups2, unprivileged_groups2) else: in_used = inAlgorithm[m - 1] if in_used == "adversarial_debiasing": dataset_transfer_valid_pred, dataset_transfer_test_pred = adversarial_debiasing( dataset_transf_train, dataset_transf_valid, dataset_transf_test, privileged_groups2, unprivileged_groups2) elif in_used == "art_classifier": dataset_transfer_valid_pred, dataset_transfer_test_pred = art_classifier( dataset_transf_train, dataset_transf_valid, dataset_transf_test, privileged_groups2, unprivileged_groups2) elif in_used == "prejudice_remover": for key, value in privileged_groups2[0].items(): sens_attr = key dataset_transfer_valid_pred, dataset_transfer_test_pred = prejudice_remover( dataset_transf_train, dataset_transf_valid, dataset_transf_test, privileged_groups2, unprivileged_groups2, sens_attr) dataset_transf_valid_pred = copy.deepcopy(dataset_transfer_valid_pred) dataset_transf_test_pred = copy.deepcopy(dataset_transfer_test_pred) if n == 0: dataset_transf_test_pred_transf = copy.deepcopy( dataset_transfer_test_pred) else: post_used = postAlgorithm[n - 1] if post_used == "calibrated_eqodds": cpp = CalibratedEqOddsPostprocessing( privileged_groups=privileged_groups2, unprivileged_groups=unprivileged_groups2, cost_constraint=cost_constraint) cpp = cpp.fit(dataset_transfer_valid, dataset_transf_valid_pred) dataset_transf_test_pred_transf = cpp.predict( dataset_transf_test_pred) elif post_used == "eqodds": EO = EqOddsPostprocessing(unprivileged_groups=unprivileged_groups2, privileged_groups=privileged_groups2) EO = EO.fit(dataset_transfer_valid, dataset_transf_valid_pred) dataset_transf_test_pred_transf = EO.predict( dataset_transf_test_pred) elif post_used == "reject_option": #dataset_transf_test_pred_transf = reject_option(dataset_transf_valid, dataset_transf_valid_pred, dataset_transf_test, dataset_transf_test_pred, privileged_groups2, unprivileged_groups2) ROC = RejectOptionClassification( unprivileged_groups=unprivileged_groups2, privileged_groups=privileged_groups2) ROC = ROC.fit(dataset_transfer_valid, dataset_transf_valid_pred) dataset_transf_test_pred_transf = ROC.predict( dataset_transf_test_pred) #print('=======================') org_labels = dataset_orig_test.labels print(dataset_orig_test.labels) #print(dataset_transf_test.labels) #print('=======================') pred_labels = dataset_transf_test_pred.labels print(dataset_transf_test_pred.labels) true_pred = org_labels == pred_labels print("acc after in: ", float(np.sum(true_pred)) / pred_labels.shape[1]) #print('=======================') #print(dataset_transf_test_pred_transf.labels) #print(dataset_transf_test_pred_transf.labels.shape) metric = ClassificationMetric(dataset_transfer_test, dataset_transf_test_pred_transf, unprivileged_groups=unprivileged_groups2, privileged_groups=privileged_groups2) metrics = OrderedDict() metrics["Classification accuracy"] = metric.accuracy() TPR = metric.true_positive_rate() TNR = metric.true_negative_rate() bal_acc_nodebiasing_test = 0.5 * (TPR + TNR) metrics["Balanced classification accuracy"] = bal_acc_nodebiasing_test metrics[ "Statistical parity difference"] = metric.statistical_parity_difference( ) metrics["Disparate impact"] = metric.disparate_impact() metrics[ "Equal opportunity difference"] = metric.equal_opportunity_difference( ) metrics["Average odds difference"] = metric.average_odds_difference() metrics["Theil index"] = metric.theil_index() metrics["United Fairness"] = metric.generalized_entropy_index() feature = [] feature_str = "[" for m in metrics: data = round(metrics[m], 4) feature.append(data) feature_str = feature_str + str(data) + " " feature_str = feature_str + "]" return feature, feature_str
def compute_metrics(model, X_test, y_test, X_train, y_train, dataset_test, dataset_name, model_name, unprivileged_groups, privileged_groups, position): """ Calculate and return: model accuracy and fairness metrics Parameters ---------- model: scikit-learn classifier X_test: numpy 2d array y_test: numpy 1d array X_train: numpy 2d array y_train: numpy 1d array dataset_test: aif360.datasets.BinaryLabelDataset dataset_name: string Dataset name used in the analysis model_name: string unprivileged_groups: list<dict> Dictionary where the key is the name of the sensitive column in the dataset, and the value is the value of the unprivileged group in the dataset privileged_groups: list<dict> Dictionary where the key is the name of the sensitive column in the dataset, and the value is the value of the privileged group in the dataset position: int Column position of the sensitive group in the dataset """ y_pred_test = model.predict(X_test) acc_test = accuracy_score(y_true=y_test, y_pred=y_pred_test) print("Test accuracy: ", acc_test) y_pred_train = model.predict(X_train) acc_train = accuracy_score(y_true=y_train, y_pred=y_pred_train) print("Train accuracy: ", acc_train) dataset_pred = dataset_test.copy() dataset_pred.labels = y_pred_test bin_metric = BinaryLabelDatasetMetric( dataset_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups) disparate_impact_bin = bin_metric.disparate_impact() print('Disparate impact: ', disparate_impact_bin) mean_difference = bin_metric.mean_difference() print('Mean difference: ', mean_difference) classif_metric = ClassificationMetric( dataset_test, dataset_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups) classif_disparete_impact = classif_metric.disparate_impact() avg_odds = classif_metric.average_odds_difference() print('Average odds difference:', avg_odds) equal_opport = classif_metric.equal_opportunity_difference() print('Equality of opportunity:', equal_opport) false_discovery_rate = classif_metric.false_discovery_rate_difference() print('False discovery rate difference:', false_discovery_rate) entropy_index = classif_metric.generalized_entropy_index() print('Generalized entropy index:', entropy_index) cons_comp = consitency_mod(bin_metric, position, n_neighbors=5) print('Consistency: ', cons_comp) result = (dataset_name, model_name, acc_test, disparate_impact_bin, mean_difference, classif_disparete_impact, avg_odds, equal_opport, false_discovery_rate, entropy_index, cons_comp) return result