def test_threshold_optimization_equalized_odds_e2e( sensitive_features, sensitive_feature_names, expected_positive_p0, expected_positive_p1, expected_negative_p0, expected_negative_p1, X_transform, y_transform, sensitive_features_transform): X = X_transform(_format_as_list_of_lists(sensitive_features)) y = y_transform(labels_ex) sensitive_features_ = sensitive_features_transform(sensitive_features) adjusted_predictor = ThresholdOptimizer( unconstrained_predictor=ExamplePredictor(), constraints=EQUALIZED_ODDS) adjusted_predictor.fit(X, y, sensitive_features=sensitive_features_) predictions = adjusted_predictor._pmf_predict( X, sensitive_features=sensitive_features_) # assert equalized odds for a in sensitive_feature_names: positive_indices = (np.array(sensitive_features) == a) * \ (np.array(labels_ex) == 1) negative_indices = (np.array(sensitive_features) == a) * \ (np.array(labels_ex) == 0) average_probs_positive_indices = np.average( predictions[positive_indices], axis=0) average_probs_negative_indices = np.average( predictions[negative_indices], axis=0) assert np.isclose(average_probs_positive_indices[0], expected_positive_p0) assert np.isclose(average_probs_positive_indices[1], expected_positive_p1) assert np.isclose(average_probs_negative_indices[0], expected_negative_p0) assert np.isclose(average_probs_negative_indices[1], expected_negative_p1)
def test_threshold_optimization_equalized_odds_e2e(data_X_y_sf): adjusted_predictor = ThresholdOptimizer( estimator=ExamplePredictor(scores_ex), constraints=EQUALIZED_ODDS) adjusted_predictor.fit(data_X_y_sf.X, data_X_y_sf.y, sensitive_features=data_X_y_sf.sensitive_features) predictions = adjusted_predictor._pmf_predict( data_X_y_sf.X, sensitive_features=data_X_y_sf.sensitive_features) expected_ps = _expected_ps_equalized_odds[data_X_y_sf.example_name] mapped_sensitive_features = _map_into_single_column( data_X_y_sf.sensitive_features) # assert equalized odds for a in data_X_y_sf.feature_names: pos_indices = (mapped_sensitive_features == a) * (labels_ex == 1) neg_indices = (mapped_sensitive_features == a) * (labels_ex == 0) average_probs_positive_indices = np.average(predictions[pos_indices], axis=0) average_probs_negative_indices = np.average(predictions[neg_indices], axis=0) assert np.isclose(average_probs_positive_indices[0], expected_ps[_POS_P0]) assert np.isclose(average_probs_positive_indices[1], expected_ps[_POS_P1]) assert np.isclose(average_probs_negative_indices[0], expected_ps[_NEG_P0]) assert np.isclose(average_probs_negative_indices[1], expected_ps[_NEG_P1])
def test_threshold_optimization_demographic_parity_e2e(data_X_y_sf): adjusted_predictor = ThresholdOptimizer( estimator=ExamplePredictor(scores_ex), constraints=DEMOGRAPHIC_PARITY) adjusted_predictor.fit(data_X_y_sf.X, data_X_y_sf.y, sensitive_features=data_X_y_sf.sensitive_features) predictions = adjusted_predictor._pmf_predict( data_X_y_sf.X, sensitive_features=data_X_y_sf.sensitive_features) expected_ps = _expected_ps_demographic_parity[data_X_y_sf.example_name] # assert demographic parity for sensitive_feature_name in data_X_y_sf.feature_names: average_probs = np.average(predictions[_map_into_single_column( data_X_y_sf.sensitive_features) == sensitive_feature_name], axis=0) assert np.isclose(average_probs[0], expected_ps[_P0]) assert np.isclose(average_probs[1], expected_ps[_P1])
class demographic_parity_classifier(base_binary_classifier): def fit(self, _X, _Y, _classifier_name="logistic", _predictor="hard"): my_erm_classifier = erm_classifier(self.train_X, self.train_Y) my_erm_classifier.fit(self.train_X, self.train_Y, classifier_name=_classifier_name) self.model = ThresholdOptimizer(estimator=my_erm_classifier, \ constraints="demographic_parity", prefit=True) self.model.fit(self.train_X, self.train_Y, \ sensitive_features=self.sensitive_train, _predictor=_predictor) def predict(self, x_samples, sensitive_features): y_samples = self.model.predict(x_samples, sensitive_features=sensitive_features) return y_samples def get_accuracy(self, X, y_true, sensitive_features): y_pred = self.predict(X, sensitive_features) return 1 - np.sum(np.power(y_pred - y_true, 2))/len(y_true) def predict_proba(self, x_samples, sensitive_features): y_samples = self.model._pmf_predict(x_samples, sensitive_features=sensitive_features) return y_samples
def test_threshold_optimization_demographic_parity_e2e( sensitive_features, sensitive_feature_names, expected_p0, expected_p1, X_transform, y_transform, sensitive_features_transform): X = X_transform(_format_as_list_of_lists(sensitive_features)) y = y_transform(labels_ex) sensitive_features_ = sensitive_features_transform(sensitive_features) adjusted_predictor = ThresholdOptimizer( unconstrained_predictor=ExamplePredictor(), constraints=DEMOGRAPHIC_PARITY) adjusted_predictor.fit(X, y, sensitive_features=sensitive_features_) predictions = adjusted_predictor._pmf_predict( X, sensitive_features=sensitive_features_) # assert demographic parity for sensitive_feature_name in sensitive_feature_names: average_probs = np.average(predictions[np.array(sensitive_features) == sensitive_feature_name], axis=0) assert np.isclose(average_probs[0], expected_p0) assert np.isclose(average_probs[1], expected_p1)
class fair_classifier(pseudo_classifier): def __init__(self, train_X, train_y, train_score_y, sensitive_train, \ test_X, test_y, test_score_y, sensitive_test, metric, sensitive_features_dict=None, HARD=False): self.train_X = train_X self.train_Y = train_y if HARD: self.train_score_Y = np.round(train_score_y) else: self.train_score_Y = train_score_y self.sensitive_train = sensitive_train self.test_X = test_X self.test_Y = test_y if HARD: self.test_score_Y = np.round(test_score_y) else: self.test_score_Y = test_score_y self.sensitive_test = sensitive_test self.sensitive_features_dict = sensitive_features_dict self.erm_classifier = pseudo_classifier(self.train_X, self.train_Y, self.train_score_Y, \ self.sensitive_train, self.test_X, self.test_Y, self.test_score_Y, self.sensitive_test) assert (metric in ["equalized_odds", "demographic_parity"]) self.metric = metric def fit(self): self.erm_classifier.fit(self.train_X, self.train_Y) self.model = ThresholdOptimizer(estimator=self.erm_classifier, constraints=self.metric, prefit=True) self.model.fit(self.train_X, self.train_Y, sensitive_features=self.sensitive_train) def predict(self, x_samples, sensitive_features): y_samples = self.model.predict(x_samples, sensitive_features=sensitive_features) return y_samples def get_accuracy(self, X, y_true, sensitive_features): y_pred = self.predict(X, sensitive_features) return 1 - np.sum(np.power(y_pred - y_true, 2)) / len(y_true) def predict_prob(self, x_samples, sensitive_features): y_samples = self.model._pmf_predict( x_samples, sensitive_features=sensitive_features) return y_samples def get_avg_group_confusion_matrix(self, sensitive_features, X, true_Y): # produces average tp/fp/tn/fn/acc per group # Basically get_group_confusion_matrix but modified to return average values where possible # For a trained classifier, get the true positive and true negative rates based on # group identity. Dobased on groups (currently only works for binary) # sensitive_index is the index of the sensitive attribute. groups = np.unique(sensitive_features) tp_rate = {} fp_rate = {} tn_rate = {} fn_rate = {} true_pos_index = np.where(true_Y == 1) true_neg_index = np.where(true_Y == 0) # Calculate probability of classification for each input y_pred_prob = self.predict_prob(X, sensitive_features) # Calculate average probability of correct classification (i.e. expected accuracy) avg_micro_acc = (np.sum(y_pred_prob[true_pos_index][:, 1]) + np.sum( y_pred_prob[true_neg_index][:, 0])) / len(true_Y) print("Average Overall Accuracy: ", avg_micro_acc) micro_auc = roc_auc_score(true_Y, y_pred_prob[:, 1]) print("Overall AUC: ", micro_auc) out_dict = {} # The format is: {group:[tp, fp, tn, fn]} avg_macro_acc = 0 macro_auc = 0 for index, group in enumerate(groups): indicies = np.where(sensitive_features == group)[0] true_class = true_Y[indicies] pred_prob = y_pred_prob[indicies] true_pos_index = np.where(true_class == 1)[0] true_neg_index = np.where(true_class == 0)[0] if len(true_pos_index) == 0 or len(true_neg_index) == 0: print("No True positives or no true negatives in this group") continue # Find avg rates (i.e. avg probability of tp/tn/fp/fn) tp = np.sum(pred_prob[true_pos_index][:, 1]) / len(true_pos_index) tn = np.sum(pred_prob[true_neg_index][:, 0]) / len(true_neg_index) fp = np.sum(pred_prob[true_neg_index][:, 1]) / len(true_neg_index) fn = np.sum(pred_prob[true_pos_index][:, 0]) / len(true_pos_index) tp_rate[group] = tp tn_rate[group] = tn fp_rate[group] = fp fn_rate[group] = fn # Expected accuracy accuracy = (np.sum(pred_prob[true_pos_index][:, 1]) + np.sum( pred_prob[true_neg_index][:, 0])) / len(true_class) avg_macro_acc += accuracy auc = roc_auc_score(true_class, pred_prob[:, 1]) macro_auc += auc out_dict[group] = [tp, tn, fp, fn, accuracy, auc] print(group, "average confusion matrix") if tp == 0 and fp == 0: print("None classified as Positive in group", group) print("\t Average Group Accuracy: ", accuracy) else: # Can't compute F1 out of these since dealing with average values #precision = tp / (tp + fp) #recall = tp / (tp + fn) #f1 = 2 * precision * recall / (precision + recall) #print("\t F1 score: ", f1) print("\t Average Group Accuracy: ", accuracy) print("\t Group AUC: ", auc) print("\t Average True positive rate:", tp) print("\t Average True negative rate:", tn) print("\t Average False positive rate:", fp) print("\t Average False negative rate:", fn) avg_macro_acc /= len(groups) macro_auc /= len(groups) return out_dict, { "Accuracy": (avg_micro_acc, avg_macro_acc), "AUC": (micro_auc, macro_auc) }