def write_adult_data_to_disk(): X, y, x_control = load_adult_data( ) # set the argument to none, or no arguments if you want to test with the whole data -- we are subsampling for performance speedup x_train, y_train, x_control_train, x_test, y_test, x_control_test = ut.split_into_train_test( X, y, x_control, 0.7) adult_train = { "x": x_train.tolist(), "class": y_train.tolist(), "sensitive": { "sex": x_control_train["sex"].tolist() } } adult_test = { "x": x_test.tolist(), "class": y_test.tolist(), "sensitive": { "sex": x_control_test["sex"].tolist() } } train_out = open("adult_train.json", "w") json.dump(adult_train, train_out) train_out.close() test_out = open("adult_test.json", "w") json.dump(adult_test, test_out) test_out.close()
def setup_data(self, X, y, x_control, train_split=.7, val_split=0.): print('Loaded {} dataset with dimension {}'.format( self.ds_name, X.shape)) # Split data into training and testing self.x_train, self.y_train, self.x_control_train, \ self.x_test, self.y_test, self.x_control_test = \ ut.split_into_train_test(X, y, x_control, train_split) # create a validation set if specified if val_split > 0.: self.x_train, self.y_train, self.x_control_train, \ self.x_val, self.y_val, self.x_control_val = \ ut.split_into_train_test(self.x_train, self.y_train, self.x_control_train, val_split) else: self.x_val, self.y_val, self.x_control_val = None, None, None
def test_compas_data(): """ Generate the synthetic data """ data_type = 1 X, y, x_control = load_compas_data() sensitive_attrs = x_control.keys() """ Split the data into train and test """ train_fold_size = 0.5 x_train, y_train, x_control_train, x_test, y_test, x_control_test = ut.split_into_train_test(X, y, x_control, train_fold_size) cons_params = None # constraint parameters, will use them later loss_function = "logreg" # perform the experiments with logistic regression EPS = 1e-6 def train_test_classifier(): w = fdm.train_model_disp_mist(x_train, y_train, x_control_train, loss_function, EPS, cons_params) train_score, test_score, cov_all_train, cov_all_test, s_attr_to_fp_fn_train, s_attr_to_fp_fn_test = fdm.get_clf_stats(w, x_train, y_train, x_control_train, x_test, y_test, x_control_test, sensitive_attrs) # accuracy and FPR are for the test because we need of for plotting return w, test_score, s_attr_to_fp_fn_test """ Classify the data while optimizing for accuracy """ print print "== Unconstrained (original) classifier ==" w_uncons, acc_uncons, s_attr_to_fp_fn_test_uncons = train_test_classifier() print "\n-----------------------------------------------------------------------------------\n" """ Now classify such that we optimize for accuracy while achieving perfect fairness """ print print "\n\n== Constraints on FPR ==" # setting parameter for constraints cons_type = 1 # FPR constraint -- just change the cons_type, the rest of parameters should stay the same tau = 5.0 mu = 1.2 sensitive_attrs_to_cov_thresh = {"race": {0:{0:0, 1:0}, 1:{0:0, 1:0}, 2:{0:0, 1:0}}} # zero covariance threshold, means try to get the fairest solution cons_params = {"cons_type": cons_type, "tau": tau, "mu": mu, "sensitive_attrs_to_cov_thresh": sensitive_attrs_to_cov_thresh} w_cons, acc_cons, s_attr_to_fp_fn_test_cons = train_test_classifier() print "\n-----------------------------------------------------------------------------------\n" return
def test_synthetic_data(): """ Generate the synthetic data """ data_type = 1 X, y, x_control = generate_synthetic_data( data_type=data_type, plot_data=False) # set plot_data to False to skip the data plot sensitive_attrs = x_control.keys() """ Split the data into train and test """ train_fold_size = 0.5 x_train, y_train, x_control_train, x_test, y_test, x_control_test = ut.split_into_train_test( X, y, x_control, train_fold_size) cons_params = None # constraint parameters, will use them later loss_function = "logreg" # perform the experiments with logistic regression EPS = 1e-4 def train_test_classifier(): w = fdm.train_model_disp_mist(x_train, y_train, x_control_train, loss_function, EPS, cons_params) train_score, test_score, cov_all_train, cov_all_test, s_attr_to_fp_fn_train, s_attr_to_fp_fn_test = fdm.get_clf_stats( w, x_train, y_train, x_control_train, x_test, y_test, x_control_test, sensitive_attrs) # accuracy and FPR are for the test because we need of for plotting # the covariance is for train, because we need it for setting the thresholds return w, test_score, s_attr_to_fp_fn_test, cov_all_train """ Classify the data while optimizing for accuracy """ print print "== Unconstrained (original) classifier ==" w_uncons, acc_uncons, s_attr_to_fp_fn_test_uncons, cov_all_train_uncons = train_test_classifier( ) print "\n-----------------------------------------------------------------------------------\n" """ Now classify such that we optimize for accuracy while achieving perfect fairness """ print print "== Classifier with fairness constraint ==" it = 0.05 mult_range = np.arange(1.0, 0.0 - it, -it).tolist() acc_arr = [] fpr_per_group = {0: [], 1: []} fnr_per_group = {0: [], 1: []} cons_type = 1 # FPR constraint -- just change the cons_type, the rest of parameters should stay the same tau = 5.0 mu = 1.2 for m in mult_range: sensitive_attrs_to_cov_thresh = deepcopy(cov_all_train_uncons) for s_attr in sensitive_attrs_to_cov_thresh.keys(): for cov_type in sensitive_attrs_to_cov_thresh[s_attr].keys(): for s_val in sensitive_attrs_to_cov_thresh[s_attr][cov_type]: sensitive_attrs_to_cov_thresh[s_attr][cov_type][s_val] *= m cons_params = { "cons_type": cons_type, "tau": tau, "mu": mu, "sensitive_attrs_to_cov_thresh": sensitive_attrs_to_cov_thresh } w_cons, acc_cons, s_attr_to_fp_fn_test_cons, cov_all_train_cons = train_test_classifier( ) fpr_per_group[0].append(s_attr_to_fp_fn_test_cons["s1"][0.0]["fpr"]) fpr_per_group[1].append(s_attr_to_fp_fn_test_cons["s1"][1.0]["fpr"]) fnr_per_group[0].append(s_attr_to_fp_fn_test_cons["s1"][0.0]["fnr"]) fnr_per_group[1].append(s_attr_to_fp_fn_test_cons["s1"][1.0]["fnr"]) acc_arr.append(acc_cons) fs = 15 ax = plt.subplot(2, 1, 1) plt.plot(mult_range, fpr_per_group[0], "-o", color="green", label="Group-0") plt.plot(mult_range, fpr_per_group[1], "-o", color="blue", label="Group-1") ax.set_xlim([max(mult_range), min(mult_range)]) plt.ylabel('False positive rate', fontsize=fs) ax.legend(fontsize=fs) ax = plt.subplot(2, 1, 2) plt.plot(mult_range, acc_arr, "-o", color="green", label="") ax.set_xlim([max(mult_range), min(mult_range)]) plt.xlabel('Covariance multiplicative factor (m)', fontsize=fs) plt.ylabel('Accuracy', fontsize=fs) plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.5) plt.savefig("img/fairness_acc_tradeoff_cons_type_%d.png" % cons_type) plt.show() return
def test_adult_data(): """ Load the adult data """ X, y, x_control = load_adult_data(load_data_size=10000) # set the argument to none, or no arguments if you want to test with the whole data -- we are subsampling for performance speedup ut.compute_p_rule(x_control["sex"], y) # compute the p-rule in the original data """ Split the data into train and test """ X = ut.add_intercept(X) # add intercept to X before applying the linear classifier train_fold_size = 0.7 x_train, y_train, x_control_train, x_test, y_test, x_control_test = ut.split_into_train_test(X, y, x_control, train_fold_size) apply_fairness_constraints = None apply_accuracy_constraint = None sep_constraint = None loss_function = lf._logistic_loss sensitive_attrs = ["sex"] sensitive_attrs_to_cov_thresh = {} gamma = None def train_test_classifier(): w = ut.train_model(x_train, y_train, x_control_train, loss_function, apply_fairness_constraints, apply_accuracy_constraint, sep_constraint, sensitive_attrs, sensitive_attrs_to_cov_thresh, gamma) train_score, test_score, correct_answers_train, correct_answers_test = ut.check_accuracy(w, x_train, y_train, x_test, y_test, None, None) distances_boundary_test = (np.dot(x_test, w)).tolist() all_class_labels_assigned_test = np.sign(distances_boundary_test) correlation_dict_test = ut.get_correlations(None, None, all_class_labels_assigned_test, x_control_test, sensitive_attrs) cov_dict_test = ut.print_covariance_sensitive_attrs(None, x_test, distances_boundary_test, x_control_test, sensitive_attrs) p_rule = ut.print_classifier_fairness_stats([test_score], [correlation_dict_test], [cov_dict_test], sensitive_attrs[0]) return w, p_rule, test_score """ Classify the data while optimizing for accuracy """ print print "== Unconstrained (original) classifier ==" # all constraint flags are set to 0 since we want to train an unconstrained (original) classifier apply_fairness_constraints = 0 apply_accuracy_constraint = 0 sep_constraint = 0 w_uncons, p_uncons, acc_uncons = train_test_classifier() """ Now classify such that we optimize for accuracy while achieving perfect fairness """ apply_fairness_constraints = 1 # set this flag to one since we want to optimize accuracy subject to fairness constraints apply_accuracy_constraint = 0 sep_constraint = 0 sensitive_attrs_to_cov_thresh = {"sex":0} print print "== Classifier with fairness constraint ==" w_f_cons, p_f_cons, acc_f_cons = train_test_classifier() """ Classify such that we optimize for fairness subject to a certain loss in accuracy """ apply_fairness_constraints = 0 # flag for fairness constraint is set back to0 since we want to apply the accuracy constraint now apply_accuracy_constraint = 1 # now, we want to optimize fairness subject to accuracy constraints sep_constraint = 0 gamma = 0.5 # gamma controls how much loss in accuracy we are willing to incur to achieve fairness -- increase gamme to allow more loss in accuracy print "== Classifier with accuracy constraint ==" w_a_cons, p_a_cons, acc_a_cons = train_test_classifier() """ Classify such that we optimize for fairness subject to a certain loss in accuracy In addition, make sure that no points classified as positive by the unconstrained (original) classifier are misclassified. """ apply_fairness_constraints = 0 # flag for fairness constraint is set back to0 since we want to apply the accuracy constraint now apply_accuracy_constraint = 1 # now, we want to optimize accuracy subject to fairness constraints sep_constraint = 1 # set the separate constraint flag to one, since in addition to accuracy constrains, we also want no misclassifications for certain points (details in demo README.md) gamma = 1000.0 print "== Classifier with accuracy constraint (no +ve misclassification) ==" w_a_cons_fine, p_a_cons_fine, acc_a_cons_fine = train_test_classifier() return
def test_synthetic_data(): """ Generate the synthetic data """ X, y, x_control = generate_synthetic_data(plot_data=True) # set plot_data to False to skip the data plot ut.compute_p_rule(x_control["s1"], y) # compute the p-rule in the original data """ Split the data into train and test """ X = ut.add_intercept(X) # add intercept to X before applying the linear classifier train_fold_size = 0.7 x_train, y_train, x_control_train, x_test, y_test, x_control_test = ut.split_into_train_test(X, y, x_control, train_fold_size) apply_fairness_constraints = None apply_accuracy_constraint = None sep_constraint = None loss_function = lf._logistic_loss sensitive_attrs = ["s1"] sensitive_attrs_to_cov_thresh = {} gamma = None def train_test_classifier(): w = ut.train_model(x_train, y_train, x_control_train, loss_function, apply_fairness_constraints, apply_accuracy_constraint, sep_constraint, sensitive_attrs, sensitive_attrs_to_cov_thresh, gamma) train_score, test_score, correct_answers_train, correct_answers_test = ut.check_accuracy(w, x_train, y_train, x_test, y_test, None, None) distances_boundary_test = (np.dot(x_test, w)).tolist() all_class_labels_assigned_test = np.sign(distances_boundary_test) correlation_dict_test = ut.get_correlations(None, None, all_class_labels_assigned_test, x_control_test, sensitive_attrs) cov_dict_test = ut.print_covariance_sensitive_attrs(None, x_test, distances_boundary_test, x_control_test, sensitive_attrs) p_rule = ut.print_classifier_fairness_stats([test_score], [correlation_dict_test], [cov_dict_test], sensitive_attrs[0]) return w, p_rule, test_score def plot_boundaries(w1, w2, p1, p2, acc1, acc2, fname): num_to_draw = 200 # we will only draw a small number of points to avoid clutter x_draw = X[:num_to_draw] y_draw = y[:num_to_draw] x_control_draw = x_control["s1"][:num_to_draw] X_s_0 = x_draw[x_control_draw == 0.0] X_s_1 = x_draw[x_control_draw == 1.0] y_s_0 = y_draw[x_control_draw == 0.0] y_s_1 = y_draw[x_control_draw == 1.0] plt.scatter(X_s_0[y_s_0==1.0][:, 1], X_s_0[y_s_0==1.0][:, 2], color='green', marker='x', s=30, linewidth=1.5) plt.scatter(X_s_0[y_s_0==-1.0][:, 1], X_s_0[y_s_0==-1.0][:, 2], color='red', marker='x', s=30, linewidth=1.5) plt.scatter(X_s_1[y_s_1==1.0][:, 1], X_s_1[y_s_1==1.0][:, 2], color='green', marker='o', facecolors='none', s=30) plt.scatter(X_s_1[y_s_1==-1.0][:, 1], X_s_1[y_s_1==-1.0][:, 2], color='red', marker='o', facecolors='none', s=30) x1,x2 = max(x_draw[:,1]), min(x_draw[:,1]) y1,y2 = ut.get_line_coordinates(w1, x1, x2) plt.plot([x1,x2], [y1,y2], 'c-', linewidth=3, label = "Acc=%0.2f; p%% rule=%0.0f%% - Original"%(acc1, p1)) y1,y2 = ut.get_line_coordinates(w2, x1, x2) plt.plot([x1,x2], [y1,y2], 'b--', linewidth=3, label = "Acc=%0.2f; p%% rule=%0.0f%% - Constrained"%(acc2, p2)) plt.tick_params(axis='x', which='both', bottom='off', top='off', labelbottom='off') # dont need the ticks to see the data distribution plt.tick_params(axis='y', which='both', left='off', right='off', labelleft='off') plt.legend(loc=2, fontsize=15) plt.xlim((-15,10)) plt.ylim((-10,15)) plt.savefig(fname) plt.show() """ Classify the data while optimizing for accuracy """ print print "== Unconstrained (original) classifier ==" # all constraint flags are set to 0 since we want to train an unconstrained (original) classifier apply_fairness_constraints = 0 apply_accuracy_constraint = 0 sep_constraint = 0 w_uncons, p_uncons, acc_uncons = train_test_classifier() """ Now classify such that we optimize for accuracy while achieving perfect fairness """ apply_fairness_constraints = 1 # set this flag to one since we want to optimize accuracy subject to fairness constraints apply_accuracy_constraint = 0 sep_constraint = 0 sensitive_attrs_to_cov_thresh = {"s1":0} print print "== Classifier with fairness constraint ==" w_f_cons, p_f_cons, acc_f_cons = train_test_classifier() plot_boundaries(w_uncons, w_f_cons, p_uncons, p_f_cons, acc_uncons, acc_f_cons, "img/f_cons.png") """ Classify such that we optimize for fairness subject to a certain loss in accuracy """ apply_fairness_constraints = 0 # flag for fairness constraint is set back to0 since we want to apply the accuracy constraint now apply_accuracy_constraint = 1 # now, we want to optimize fairness subject to accuracy constraints sep_constraint = 0 gamma = 0.5 # gamma controls how much loss in accuracy we are willing to incur to achieve fairness -- increase gamme to allow more loss in accuracy print "== Classifier with accuracy constraint ==" w_a_cons, p_a_cons, acc_a_cons = train_test_classifier() plot_boundaries(w_uncons, w_a_cons, p_uncons, p_a_cons, acc_uncons, acc_a_cons, "img/a_cons.png") """ Classify such that we optimize for fairness subject to a certain loss in accuracy In addition, make sure that no points classified as positive by the unconstrained (original) classifier are misclassified. """ apply_fairness_constraints = 0 # flag for fairness constraint is set back to0 since we want to apply the accuracy constraint now apply_accuracy_constraint = 1 # now, we want to optimize accuracy subject to fairness constraints sep_constraint = 1 # set the separate constraint flag to one, since in addition to accuracy constrains, we also want no misclassifications for certain points (details in demo README.md) gamma = 2000.0 print "== Classifier with accuracy constraint (no +ve misclassification) ==" w_a_cons_fine, p_a_cons_fine, acc_a_cons_fine = train_test_classifier() plot_boundaries(w_uncons, w_a_cons_fine, p_uncons, p_a_cons_fine, acc_uncons, acc_a_cons_fine, "img/a_cons_fine.png") return
def test_synthetic_data(): """ Generate the synthetic data """ data_type = 1 X, y, x_control = generate_synthetic_data(data_type=data_type, plot_data=False) # set plot_data to False to skip the data plot sensitive_attrs = x_control.keys() """ Split the data into train and test """ train_fold_size = 0.5 x_train, y_train, x_control_train, x_test, y_test, x_control_test = ut.split_into_train_test(X, y, x_control, train_fold_size) cons_params = None # constraint parameters, will use them later loss_function = "logreg" # perform the experiments with logistic regression EPS = 1e-4 def train_test_classifier(): w = fdm.train_model_disp_mist(x_train, y_train, x_control_train, loss_function, EPS, cons_params) train_score, test_score, cov_all_train, cov_all_test, s_attr_to_fp_fn_train, s_attr_to_fp_fn_test = fdm.get_clf_stats(w, x_train, y_train, x_control_train, x_test, y_test, x_control_test, sensitive_attrs) # accuracy and FPR are for the test because we need of for plotting # the covariance is for train, because we need it for setting the thresholds return w, test_score, s_attr_to_fp_fn_test, cov_all_train """ Classify the data while optimizing for accuracy """ print print "== Unconstrained (original) classifier ==" w_uncons, acc_uncons, s_attr_to_fp_fn_test_uncons, cov_all_train_uncons = train_test_classifier() print "\n-----------------------------------------------------------------------------------\n" """ Now classify such that we optimize for accuracy while achieving perfect fairness """ print print "== Classifier with fairness constraint ==" it = 0.05 mult_range = np.arange(1.0, 0.0-it, -it).tolist() acc_arr = [] fpr_per_group = {0:[], 1:[]} fnr_per_group = {0:[], 1:[]} cons_type = 1 # FPR constraint -- just change the cons_type, the rest of parameters should stay the same tau = 5.0 mu = 1.2 for m in mult_range: sensitive_attrs_to_cov_thresh = deepcopy(cov_all_train_uncons) for s_attr in sensitive_attrs_to_cov_thresh.keys(): for cov_type in sensitive_attrs_to_cov_thresh[s_attr].keys(): for s_val in sensitive_attrs_to_cov_thresh[s_attr][cov_type]: sensitive_attrs_to_cov_thresh[s_attr][cov_type][s_val] *= m cons_params = {"cons_type": cons_type, "tau": tau, "mu": mu, "sensitive_attrs_to_cov_thresh": sensitive_attrs_to_cov_thresh} w_cons, acc_cons, s_attr_to_fp_fn_test_cons, cov_all_train_cons = train_test_classifier() fpr_per_group[0].append(s_attr_to_fp_fn_test_cons["s1"][0.0]["fpr"]) fpr_per_group[1].append(s_attr_to_fp_fn_test_cons["s1"][1.0]["fpr"]) fnr_per_group[0].append(s_attr_to_fp_fn_test_cons["s1"][0.0]["fnr"]) fnr_per_group[1].append(s_attr_to_fp_fn_test_cons["s1"][1.0]["fnr"]) acc_arr.append(acc_cons) fs = 15 ax = plt.subplot(2,1,1) plt.plot(mult_range, fpr_per_group[0], "-o" , color="green", label = "Group-0") plt.plot(mult_range, fpr_per_group[1], "-o", color="blue", label = "Group-1") ax.set_xlim([max(mult_range), min(mult_range) ]) plt.ylabel('False positive rate', fontsize=fs) ax.legend(fontsize=fs) ax = plt.subplot(2,1,2) plt.plot(mult_range, acc_arr, "-o" , color="green", label = "") ax.set_xlim([max(mult_range), min(mult_range) ]) plt.xlabel('Covariance multiplicative factor (m)', fontsize=fs) plt.ylabel('Accuracy', fontsize=fs) plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.5) plt.savefig("img/fairness_acc_tradeoff_cons_type_%d.png" % cons_type) plt.show() return
def test_compas_data(): """ Generate the synthetic data """ data_type = 1 X, y, x_control = load_compas_data() sensitive_attrs = x_control.keys() """ Split the data into train and test """ train_fold_size = 0.5 x_train, y_train, x_control_train, x_test, y_test, x_control_test = ut.split_into_train_test( X, y, x_control, train_fold_size) cons_params = None # constraint parameters, will use them later loss_function = "logreg" # perform the experiments with logistic regression EPS = 1e-6 def train_test_classifier(): w = fdm.train_model_disp_mist(x_train, y_train, x_control_train, loss_function, EPS, cons_params) train_score, test_score, cov_all_train, cov_all_test, s_attr_to_fp_fn_train, s_attr_to_fp_fn_test = fdm.get_clf_stats( w, x_train, y_train, x_control_train, x_test, y_test, x_control_test, sensitive_attrs) # accuracy and FPR are for the test because we need of for plotting return w, test_score, s_attr_to_fp_fn_test """ Classify the data while optimizing for accuracy """ print print "== Unconstrained (original) classifier ==" w_uncons, acc_uncons, s_attr_to_fp_fn_test_uncons = train_test_classifier() print "\n-----------------------------------------------------------------------------------\n" """ Now classify such that we optimize for accuracy while achieving perfect fairness """ print print "\n\n== Constraints on FPR ==" # setting parameter for constraints cons_type = 1 # FPR constraint -- just change the cons_type, the rest of parameters should stay the same tau = 5.0 mu = 1.2 sensitive_attrs_to_cov_thresh = { "race": { 0: { 0: 0, 1: 0 }, 1: { 0: 0, 1: 0 }, 2: { 0: 0, 1: 0 } } } # zero covariance threshold, means try to get the fairest solution cons_params = { "cons_type": cons_type, "tau": tau, "mu": mu, "sensitive_attrs_to_cov_thresh": sensitive_attrs_to_cov_thresh } w_cons, acc_cons, s_attr_to_fp_fn_test_cons = train_test_classifier() print "\n-----------------------------------------------------------------------------------\n" return
def test_adult_data(): """ Load the adult data """ X, y, x_control = load_adult_data( load_data_size=None ) # set the argument to none, or no arguments if you want to test with the whole data -- we are subsampling for performance speedup ut.compute_p_rule(x_control["sex"], y) # compute the p-rule in the original data """ Split the data into train and test """ X = ut.add_intercept( X) # add intercept to X before applying the linear classifier train_fold_size = 0.7 x_train, y_train, x_control_train, x_test, y_test, x_control_test = ut.split_into_train_test( X, y, x_control, train_fold_size) apply_fairness_constraints = None apply_accuracy_constraint = None sep_constraint = None loss_function = lf._logistic_loss sensitive_attrs = ["sex"] sensitive_attrs_to_cov_thresh = {} gamma = None def train_test_classifier(): w = ut.train_model(x_train, y_train, x_control_train, loss_function, apply_fairness_constraints, apply_accuracy_constraint, sep_constraint, sensitive_attrs, sensitive_attrs_to_cov_thresh, gamma) train_score, test_score, correct_answers_train, correct_answers_test = ut.check_accuracy( w, x_train, y_train, x_test, y_test, None, None) distances_boundary_test = (np.dot(x_test, w)).tolist() all_class_labels_assigned_test = np.sign(distances_boundary_test) correlation_dict_test = ut.get_correlations( None, None, all_class_labels_assigned_test, x_control_test, sensitive_attrs) cov_dict_test = ut.print_covariance_sensitive_attrs( None, x_test, distances_boundary_test, x_control_test, sensitive_attrs) p_rule = ut.print_classifier_fairness_stats([test_score], [correlation_dict_test], [cov_dict_test], sensitive_attrs[0]) eq_op_acc, chance_bin_zero, chance_bin_one = ut.get_eq_op_acc( w, x_train, y_train, x_control_train, None) eq_odds_acc = ut.get_eq_odds_acc(w, x_train, y_train, x_control_train, None) pred_rate_par_acc = ut.get_pred_rate_par_acc(w, x_train, y_train, x_control_train, None) demo_par_acc_f_cons = ut.get_dem_par_acc(w, x_train, y_train, x_control_train, None) return w, p_rule, test_score, eq_op_acc, eq_odds_acc, pred_rate_par_acc, demo_par_acc_f_cons """ Classify the data while optimizing for accuracy """ print() print("== Unconstrained (original) classifier ==") # all constraint flags are set to 0 since we want to train an unconstrained (original) classifier apply_fairness_constraints = 0 apply_accuracy_constraint = 0 sep_constraint = 0 w_uncons, p_uncons, acc_uncons, eq_op_acc_uncons, eq_odds_acc_uncons, pred_rate_par_acc_uncons, demo_par_acc_uncons = train_test_classifier( ) temp_eq_op_acc_f = [] temp_eq_odds_acc_f = [] temp_pred_rate_par_acc_f = [] temp_demo_par_acc_f = [] """ Now classify such that we optimize for accuracy while achieving perfect fairness """ apply_fairness_constraints = 1 # set this flag to one since we want to optimize accuracy subject to fairness constraints apply_accuracy_constraint = 0 sep_constraint = 0 for num in np.arange(0, 0.51, 0.1): sensitive_attrs_to_cov_thresh = {"sex": num} print() print("== Classifier with fairness constraint, cov: ", num, " ==") w_f_cons, p_f_cons, acc_f_cons, eq_op_acc_f_cons, eq_odds_acc_f_cons, pred_rate_par_acc_f_cons, demo_par_acc_f_cons = train_test_classifier( ) temp_eq_op_acc_f.append(eq_op_acc_f_cons) temp_eq_odds_acc_f.append(eq_odds_acc_f_cons) temp_pred_rate_par_acc_f.append(pred_rate_par_acc_f_cons) temp_demo_par_acc_f.append(demo_par_acc_f_cons) sensitive_attrs_to_cov_thresh = {"sex": 1} print() print("== Classifier with fairness constraint, cov: 1 ==") w_f_cons, p_f_cons, acc_f_cons, eq_op_acc_f_cons, eq_odds_acc_f_cons, pred_rate_par_acc_f_cons, demo_par_acc_f_cons = train_test_classifier( ) temp_eq_op_acc_f.append(eq_op_acc_f_cons) temp_eq_odds_acc_f.append(eq_odds_acc_f_cons) temp_pred_rate_par_acc_f.append(pred_rate_par_acc_f_cons) temp_demo_par_acc_f.append(demo_par_acc_f_cons) return eq_op_acc_uncons, eq_odds_acc_uncons, pred_rate_par_acc_uncons, demo_par_acc_uncons, temp_eq_op_acc_f, temp_eq_odds_acc_f, temp_pred_rate_par_acc_f, temp_demo_par_acc_f