コード例 #1
0
def write_adult_data_to_disk():
    X, y, x_control = load_adult_data(
    )  # set the argument to none, or no arguments if you want to test with the whole data -- we are subsampling for performance speedup
    x_train, y_train, x_control_train, x_test, y_test, x_control_test = ut.split_into_train_test(
        X, y, x_control, 0.7)

    adult_train = {
        "x": x_train.tolist(),
        "class": y_train.tolist(),
        "sensitive": {
            "sex": x_control_train["sex"].tolist()
        }
    }
    adult_test = {
        "x": x_test.tolist(),
        "class": y_test.tolist(),
        "sensitive": {
            "sex": x_control_test["sex"].tolist()
        }
    }
    train_out = open("adult_train.json", "w")
    json.dump(adult_train, train_out)
    train_out.close()
    test_out = open("adult_test.json", "w")
    json.dump(adult_test, test_out)
    test_out.close()
コード例 #2
0
    def setup_data(self, X, y, x_control, train_split=.7, val_split=0.):
        print('Loaded {} dataset with dimension {}'.format(
            self.ds_name, X.shape))

        # Split data into training and testing
        self.x_train, self.y_train, self.x_control_train, \
                self.x_test, self.y_test, self.x_control_test = \
                ut.split_into_train_test(X, y, x_control, train_split)

        # create a validation set if specified
        if val_split > 0.:
            self.x_train, self.y_train, self.x_control_train, \
                    self.x_val, self.y_val, self.x_control_val = \
                    ut.split_into_train_test(self.x_train,
                            self.y_train, self.x_control_train, val_split)
        else:
            self.x_val, self.y_val, self.x_control_val = None, None, None
コード例 #3
0
def test_compas_data():
	
	""" Generate the synthetic data """
	data_type = 1
	X, y, x_control = load_compas_data()
	sensitive_attrs = x_control.keys()

	
	""" Split the data into train and test """
	train_fold_size = 0.5
	x_train, y_train, x_control_train, x_test, y_test, x_control_test = ut.split_into_train_test(X, y, x_control, train_fold_size)

	cons_params = None # constraint parameters, will use them later
	loss_function = "logreg" # perform the experiments with logistic regression
	EPS = 1e-6

	def train_test_classifier():
		w = fdm.train_model_disp_mist(x_train, y_train, x_control_train, loss_function, EPS, cons_params)

		train_score, test_score, cov_all_train, cov_all_test, s_attr_to_fp_fn_train, s_attr_to_fp_fn_test = fdm.get_clf_stats(w, x_train, y_train, x_control_train, x_test, y_test, x_control_test, sensitive_attrs)

		
		# accuracy and FPR are for the test because we need of for plotting
		return w, test_score, s_attr_to_fp_fn_test
		

	""" Classify the data while optimizing for accuracy """
	print
	print "== Unconstrained (original) classifier =="
	w_uncons, acc_uncons, s_attr_to_fp_fn_test_uncons = train_test_classifier()
	print "\n-----------------------------------------------------------------------------------\n"

	""" Now classify such that we optimize for accuracy while achieving perfect fairness """
	
	print
	
	print "\n\n== Constraints on FPR =="	# setting parameter for constraints
	cons_type = 1 # FPR constraint -- just change the cons_type, the rest of parameters should stay the same
	tau = 5.0
	mu = 1.2
	sensitive_attrs_to_cov_thresh = {"race": {0:{0:0, 1:0}, 1:{0:0, 1:0}, 2:{0:0, 1:0}}} # zero covariance threshold, means try to get the fairest solution
	cons_params = {"cons_type": cons_type, 
					"tau": tau, 
					"mu": mu, 
					"sensitive_attrs_to_cov_thresh": sensitive_attrs_to_cov_thresh}

	w_cons, acc_cons, s_attr_to_fp_fn_test_cons  = train_test_classifier()
	print "\n-----------------------------------------------------------------------------------\n"

	return
コード例 #4
0
def test_synthetic_data():
    """ Generate the synthetic data """
    data_type = 1
    X, y, x_control = generate_synthetic_data(
        data_type=data_type,
        plot_data=False)  # set plot_data to False to skip the data plot
    sensitive_attrs = x_control.keys()
    """ Split the data into train and test """
    train_fold_size = 0.5
    x_train, y_train, x_control_train, x_test, y_test, x_control_test = ut.split_into_train_test(
        X, y, x_control, train_fold_size)

    cons_params = None  # constraint parameters, will use them later
    loss_function = "logreg"  # perform the experiments with logistic regression
    EPS = 1e-4

    def train_test_classifier():
        w = fdm.train_model_disp_mist(x_train, y_train, x_control_train,
                                      loss_function, EPS, cons_params)

        train_score, test_score, cov_all_train, cov_all_test, s_attr_to_fp_fn_train, s_attr_to_fp_fn_test = fdm.get_clf_stats(
            w, x_train, y_train, x_control_train, x_test, y_test,
            x_control_test, sensitive_attrs)

        # accuracy and FPR are for the test because we need of for plotting
        # the covariance is for train, because we need it for setting the thresholds
        return w, test_score, s_attr_to_fp_fn_test, cov_all_train

    """ Classify the data while optimizing for accuracy """
    print
    print "== Unconstrained (original) classifier =="
    w_uncons, acc_uncons, s_attr_to_fp_fn_test_uncons, cov_all_train_uncons = train_test_classifier(
    )
    print "\n-----------------------------------------------------------------------------------\n"
    """ Now classify such that we optimize for accuracy while achieving perfect fairness """

    print
    print "== Classifier with fairness constraint =="

    it = 0.05
    mult_range = np.arange(1.0, 0.0 - it, -it).tolist()

    acc_arr = []
    fpr_per_group = {0: [], 1: []}
    fnr_per_group = {0: [], 1: []}

    cons_type = 1  # FPR constraint -- just change the cons_type, the rest of parameters should stay the same
    tau = 5.0
    mu = 1.2

    for m in mult_range:
        sensitive_attrs_to_cov_thresh = deepcopy(cov_all_train_uncons)
        for s_attr in sensitive_attrs_to_cov_thresh.keys():
            for cov_type in sensitive_attrs_to_cov_thresh[s_attr].keys():
                for s_val in sensitive_attrs_to_cov_thresh[s_attr][cov_type]:
                    sensitive_attrs_to_cov_thresh[s_attr][cov_type][s_val] *= m

        cons_params = {
            "cons_type": cons_type,
            "tau": tau,
            "mu": mu,
            "sensitive_attrs_to_cov_thresh": sensitive_attrs_to_cov_thresh
        }

        w_cons, acc_cons, s_attr_to_fp_fn_test_cons, cov_all_train_cons = train_test_classifier(
        )

        fpr_per_group[0].append(s_attr_to_fp_fn_test_cons["s1"][0.0]["fpr"])
        fpr_per_group[1].append(s_attr_to_fp_fn_test_cons["s1"][1.0]["fpr"])
        fnr_per_group[0].append(s_attr_to_fp_fn_test_cons["s1"][0.0]["fnr"])
        fnr_per_group[1].append(s_attr_to_fp_fn_test_cons["s1"][1.0]["fnr"])

        acc_arr.append(acc_cons)

    fs = 15

    ax = plt.subplot(2, 1, 1)
    plt.plot(mult_range,
             fpr_per_group[0],
             "-o",
             color="green",
             label="Group-0")
    plt.plot(mult_range, fpr_per_group[1], "-o", color="blue", label="Group-1")
    ax.set_xlim([max(mult_range), min(mult_range)])
    plt.ylabel('False positive rate', fontsize=fs)
    ax.legend(fontsize=fs)

    ax = plt.subplot(2, 1, 2)
    plt.plot(mult_range, acc_arr, "-o", color="green", label="")
    ax.set_xlim([max(mult_range), min(mult_range)])
    plt.xlabel('Covariance multiplicative factor (m)', fontsize=fs)
    plt.ylabel('Accuracy', fontsize=fs)

    plt.subplots_adjust(left=None,
                        bottom=None,
                        right=None,
                        top=None,
                        wspace=None,
                        hspace=0.5)
    plt.savefig("img/fairness_acc_tradeoff_cons_type_%d.png" % cons_type)
    plt.show()

    return
コード例 #5
0
def test_adult_data():
	

	""" Load the adult data """
	X, y, x_control = load_adult_data(load_data_size=10000) # set the argument to none, or no arguments if you want to test with the whole data -- we are subsampling for performance speedup
	ut.compute_p_rule(x_control["sex"], y) # compute the p-rule in the original data



	""" Split the data into train and test """
	X = ut.add_intercept(X) # add intercept to X before applying the linear classifier
	train_fold_size = 0.7
	x_train, y_train, x_control_train, x_test, y_test, x_control_test = ut.split_into_train_test(X, y, x_control, train_fold_size)




	apply_fairness_constraints = None
	apply_accuracy_constraint = None
	sep_constraint = None

	loss_function = lf._logistic_loss
	sensitive_attrs = ["sex"]
	sensitive_attrs_to_cov_thresh = {}
	gamma = None

	def train_test_classifier():
		w = ut.train_model(x_train, y_train, x_control_train, loss_function, apply_fairness_constraints, apply_accuracy_constraint, sep_constraint, sensitive_attrs, sensitive_attrs_to_cov_thresh, gamma)
		train_score, test_score, correct_answers_train, correct_answers_test = ut.check_accuracy(w, x_train, y_train, x_test, y_test, None, None)
		distances_boundary_test = (np.dot(x_test, w)).tolist()
		all_class_labels_assigned_test = np.sign(distances_boundary_test)
		correlation_dict_test = ut.get_correlations(None, None, all_class_labels_assigned_test, x_control_test, sensitive_attrs)
		cov_dict_test = ut.print_covariance_sensitive_attrs(None, x_test, distances_boundary_test, x_control_test, sensitive_attrs)
		p_rule = ut.print_classifier_fairness_stats([test_score], [correlation_dict_test], [cov_dict_test], sensitive_attrs[0])	
		return w, p_rule, test_score


	""" Classify the data while optimizing for accuracy """
	print
	print "== Unconstrained (original) classifier =="
	# all constraint flags are set to 0 since we want to train an unconstrained (original) classifier
	apply_fairness_constraints = 0
	apply_accuracy_constraint = 0
	sep_constraint = 0
	w_uncons, p_uncons, acc_uncons = train_test_classifier()
	
	""" Now classify such that we optimize for accuracy while achieving perfect fairness """
	apply_fairness_constraints = 1 # set this flag to one since we want to optimize accuracy subject to fairness constraints
	apply_accuracy_constraint = 0
	sep_constraint = 0
	sensitive_attrs_to_cov_thresh = {"sex":0}
	print
	print "== Classifier with fairness constraint =="
	w_f_cons, p_f_cons, acc_f_cons  = train_test_classifier()

	

	""" Classify such that we optimize for fairness subject to a certain loss in accuracy """
	apply_fairness_constraints = 0 # flag for fairness constraint is set back to0 since we want to apply the accuracy constraint now
	apply_accuracy_constraint = 1 # now, we want to optimize fairness subject to accuracy constraints
	sep_constraint = 0
	gamma = 0.5 # gamma controls how much loss in accuracy we are willing to incur to achieve fairness -- increase gamme to allow more loss in accuracy
	print "== Classifier with accuracy constraint =="
	w_a_cons, p_a_cons, acc_a_cons = train_test_classifier()	

	""" 
	Classify such that we optimize for fairness subject to a certain loss in accuracy 
	In addition, make sure that no points classified as positive by the unconstrained (original) classifier are misclassified.

	"""
	apply_fairness_constraints = 0 # flag for fairness constraint is set back to0 since we want to apply the accuracy constraint now
	apply_accuracy_constraint = 1 # now, we want to optimize accuracy subject to fairness constraints
	sep_constraint = 1 # set the separate constraint flag to one, since in addition to accuracy constrains, we also want no misclassifications for certain points (details in demo README.md)
	gamma = 1000.0
	print "== Classifier with accuracy constraint (no +ve misclassification) =="
	w_a_cons_fine, p_a_cons_fine, acc_a_cons_fine  = train_test_classifier()

	return
def test_synthetic_data():
	
	""" Generate the synthetic data """
	X, y, x_control = generate_synthetic_data(plot_data=True) # set plot_data to False to skip the data plot
	ut.compute_p_rule(x_control["s1"], y) # compute the p-rule in the original data


	""" Split the data into train and test """
	X = ut.add_intercept(X) # add intercept to X before applying the linear classifier
	train_fold_size = 0.7
	x_train, y_train, x_control_train, x_test, y_test, x_control_test = ut.split_into_train_test(X, y, x_control, train_fold_size)



	apply_fairness_constraints = None
	apply_accuracy_constraint = None
	sep_constraint = None

	loss_function = lf._logistic_loss
	sensitive_attrs = ["s1"]
	sensitive_attrs_to_cov_thresh = {}
	gamma = None

	def train_test_classifier():
		w = ut.train_model(x_train, y_train, x_control_train, loss_function, apply_fairness_constraints, apply_accuracy_constraint, sep_constraint, sensitive_attrs, sensitive_attrs_to_cov_thresh, gamma)
		train_score, test_score, correct_answers_train, correct_answers_test = ut.check_accuracy(w, x_train, y_train, x_test, y_test, None, None)
		distances_boundary_test = (np.dot(x_test, w)).tolist()
		all_class_labels_assigned_test = np.sign(distances_boundary_test)
		correlation_dict_test = ut.get_correlations(None, None, all_class_labels_assigned_test, x_control_test, sensitive_attrs)
		cov_dict_test = ut.print_covariance_sensitive_attrs(None, x_test, distances_boundary_test, x_control_test, sensitive_attrs)
		p_rule = ut.print_classifier_fairness_stats([test_score], [correlation_dict_test], [cov_dict_test], sensitive_attrs[0])	
		return w, p_rule, test_score


	def plot_boundaries(w1, w2, p1, p2, acc1, acc2, fname):

		num_to_draw = 200 # we will only draw a small number of points to avoid clutter
		x_draw = X[:num_to_draw]
		y_draw = y[:num_to_draw]
		x_control_draw = x_control["s1"][:num_to_draw]

		X_s_0 = x_draw[x_control_draw == 0.0]
		X_s_1 = x_draw[x_control_draw == 1.0]
		y_s_0 = y_draw[x_control_draw == 0.0]
		y_s_1 = y_draw[x_control_draw == 1.0]
		plt.scatter(X_s_0[y_s_0==1.0][:, 1], X_s_0[y_s_0==1.0][:, 2], color='green', marker='x', s=30, linewidth=1.5)
		plt.scatter(X_s_0[y_s_0==-1.0][:, 1], X_s_0[y_s_0==-1.0][:, 2], color='red', marker='x', s=30, linewidth=1.5)
		plt.scatter(X_s_1[y_s_1==1.0][:, 1], X_s_1[y_s_1==1.0][:, 2], color='green', marker='o', facecolors='none', s=30)
		plt.scatter(X_s_1[y_s_1==-1.0][:, 1], X_s_1[y_s_1==-1.0][:, 2], color='red', marker='o', facecolors='none', s=30)


		x1,x2 = max(x_draw[:,1]), min(x_draw[:,1])
		y1,y2 = ut.get_line_coordinates(w1, x1, x2)
		plt.plot([x1,x2], [y1,y2], 'c-', linewidth=3, label = "Acc=%0.2f; p%% rule=%0.0f%% - Original"%(acc1, p1))
		y1,y2 = ut.get_line_coordinates(w2, x1, x2)
		plt.plot([x1,x2], [y1,y2], 'b--', linewidth=3, label = "Acc=%0.2f; p%% rule=%0.0f%% - Constrained"%(acc2, p2))



		plt.tick_params(axis='x', which='both', bottom='off', top='off', labelbottom='off') # dont need the ticks to see the data distribution
		plt.tick_params(axis='y', which='both', left='off', right='off', labelleft='off')
		plt.legend(loc=2, fontsize=15)
		plt.xlim((-15,10))
		plt.ylim((-10,15))
		plt.savefig(fname)
		plt.show()


	""" Classify the data while optimizing for accuracy """
	print
	print "== Unconstrained (original) classifier =="
	# all constraint flags are set to 0 since we want to train an unconstrained (original) classifier
	apply_fairness_constraints = 0
	apply_accuracy_constraint = 0
	sep_constraint = 0
	w_uncons, p_uncons, acc_uncons = train_test_classifier()
	
	""" Now classify such that we optimize for accuracy while achieving perfect fairness """
	apply_fairness_constraints = 1 # set this flag to one since we want to optimize accuracy subject to fairness constraints
	apply_accuracy_constraint = 0
	sep_constraint = 0
	sensitive_attrs_to_cov_thresh = {"s1":0}
	print
	print "== Classifier with fairness constraint =="
	w_f_cons, p_f_cons, acc_f_cons  = train_test_classifier()
	plot_boundaries(w_uncons, w_f_cons, p_uncons, p_f_cons, acc_uncons, acc_f_cons, "img/f_cons.png")


	""" Classify such that we optimize for fairness subject to a certain loss in accuracy """
	apply_fairness_constraints = 0 # flag for fairness constraint is set back to0 since we want to apply the accuracy constraint now
	apply_accuracy_constraint = 1 # now, we want to optimize fairness subject to accuracy constraints
	sep_constraint = 0
	gamma = 0.5 # gamma controls how much loss in accuracy we are willing to incur to achieve fairness -- increase gamme to allow more loss in accuracy
	print "== Classifier with accuracy constraint =="
	w_a_cons, p_a_cons, acc_a_cons = train_test_classifier()	
	plot_boundaries(w_uncons, w_a_cons, p_uncons, p_a_cons, acc_uncons, acc_a_cons, "img/a_cons.png")

	""" 
	Classify such that we optimize for fairness subject to a certain loss in accuracy 
	In addition, make sure that no points classified as positive by the unconstrained (original) classifier are misclassified.

	"""
	apply_fairness_constraints = 0 # flag for fairness constraint is set back to0 since we want to apply the accuracy constraint now
	apply_accuracy_constraint = 1 # now, we want to optimize accuracy subject to fairness constraints
	sep_constraint = 1 # set the separate constraint flag to one, since in addition to accuracy constrains, we also want no misclassifications for certain points (details in demo README.md)
	gamma = 2000.0
	print "== Classifier with accuracy constraint (no +ve misclassification) =="
	w_a_cons_fine, p_a_cons_fine, acc_a_cons_fine  = train_test_classifier()
	plot_boundaries(w_uncons, w_a_cons_fine, p_uncons, p_a_cons_fine, acc_uncons, acc_a_cons_fine, "img/a_cons_fine.png")

	return
コード例 #7
0
def test_synthetic_data():
	
	""" Generate the synthetic data """
	data_type = 1
	X, y, x_control = generate_synthetic_data(data_type=data_type, plot_data=False) # set plot_data to False to skip the data plot
	sensitive_attrs = x_control.keys()

	""" Split the data into train and test """
	train_fold_size = 0.5
	x_train, y_train, x_control_train, x_test, y_test, x_control_test = ut.split_into_train_test(X, y, x_control, train_fold_size)

	cons_params = None # constraint parameters, will use them later
	loss_function = "logreg" # perform the experiments with logistic regression
	EPS = 1e-4

	def train_test_classifier():
		w = fdm.train_model_disp_mist(x_train, y_train, x_control_train, loss_function, EPS, cons_params)

		train_score, test_score, cov_all_train, cov_all_test, s_attr_to_fp_fn_train, s_attr_to_fp_fn_test = fdm.get_clf_stats(w, x_train, y_train, x_control_train, x_test, y_test, x_control_test, sensitive_attrs)

		
		# accuracy and FPR are for the test because we need of for plotting
		# the covariance is for train, because we need it for setting the thresholds
		return w, test_score, s_attr_to_fp_fn_test, cov_all_train
		

	""" Classify the data while optimizing for accuracy """
	print
	print "== Unconstrained (original) classifier =="
	w_uncons, acc_uncons, s_attr_to_fp_fn_test_uncons, cov_all_train_uncons = train_test_classifier()
	print "\n-----------------------------------------------------------------------------------\n"

	""" Now classify such that we optimize for accuracy while achieving perfect fairness """
	
	print
	print "== Classifier with fairness constraint =="

	it = 0.05
	mult_range = np.arange(1.0, 0.0-it, -it).tolist()


	acc_arr = []
	fpr_per_group = {0:[], 1:[]}
	fnr_per_group = {0:[], 1:[]}

	cons_type = 1 # FPR constraint -- just change the cons_type, the rest of parameters should stay the same
	tau = 5.0
	mu = 1.2

	for m in mult_range:
		sensitive_attrs_to_cov_thresh = deepcopy(cov_all_train_uncons)
		for s_attr in sensitive_attrs_to_cov_thresh.keys():
			for cov_type in sensitive_attrs_to_cov_thresh[s_attr].keys():
				for s_val in sensitive_attrs_to_cov_thresh[s_attr][cov_type]:
					sensitive_attrs_to_cov_thresh[s_attr][cov_type][s_val] *= m


		cons_params = {"cons_type": cons_type, 
						"tau": tau, 
						"mu": mu, 
						"sensitive_attrs_to_cov_thresh": sensitive_attrs_to_cov_thresh}

		w_cons, acc_cons, s_attr_to_fp_fn_test_cons, cov_all_train_cons  = train_test_classifier()
		
		fpr_per_group[0].append(s_attr_to_fp_fn_test_cons["s1"][0.0]["fpr"])
		fpr_per_group[1].append(s_attr_to_fp_fn_test_cons["s1"][1.0]["fpr"])
		fnr_per_group[0].append(s_attr_to_fp_fn_test_cons["s1"][0.0]["fnr"])
		fnr_per_group[1].append(s_attr_to_fp_fn_test_cons["s1"][1.0]["fnr"])


		acc_arr.append(acc_cons)


	fs = 15

	ax = plt.subplot(2,1,1)
	plt.plot(mult_range, fpr_per_group[0], "-o" , color="green", label = "Group-0")
	plt.plot(mult_range, fpr_per_group[1], "-o", color="blue", label = "Group-1")
	ax.set_xlim([max(mult_range), min(mult_range) ])
	plt.ylabel('False positive rate', fontsize=fs)
	ax.legend(fontsize=fs)
	

	ax = plt.subplot(2,1,2)
	plt.plot(mult_range, acc_arr, "-o" , color="green", label = "")
	ax.set_xlim([max(mult_range), min(mult_range) ])
	plt.xlabel('Covariance multiplicative factor (m)', fontsize=fs)
	plt.ylabel('Accuracy', fontsize=fs)

	plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.5)
	plt.savefig("img/fairness_acc_tradeoff_cons_type_%d.png" % cons_type)
	plt.show()


	return
コード例 #8
0
def test_compas_data():
    """ Generate the synthetic data """
    data_type = 1
    X, y, x_control = load_compas_data()
    sensitive_attrs = x_control.keys()
    """ Split the data into train and test """
    train_fold_size = 0.5
    x_train, y_train, x_control_train, x_test, y_test, x_control_test = ut.split_into_train_test(
        X, y, x_control, train_fold_size)

    cons_params = None  # constraint parameters, will use them later
    loss_function = "logreg"  # perform the experiments with logistic regression
    EPS = 1e-6

    def train_test_classifier():
        w = fdm.train_model_disp_mist(x_train, y_train, x_control_train,
                                      loss_function, EPS, cons_params)

        train_score, test_score, cov_all_train, cov_all_test, s_attr_to_fp_fn_train, s_attr_to_fp_fn_test = fdm.get_clf_stats(
            w, x_train, y_train, x_control_train, x_test, y_test,
            x_control_test, sensitive_attrs)

        # accuracy and FPR are for the test because we need of for plotting
        return w, test_score, s_attr_to_fp_fn_test

    """ Classify the data while optimizing for accuracy """
    print
    print "== Unconstrained (original) classifier =="
    w_uncons, acc_uncons, s_attr_to_fp_fn_test_uncons = train_test_classifier()
    print "\n-----------------------------------------------------------------------------------\n"
    """ Now classify such that we optimize for accuracy while achieving perfect fairness """

    print

    print "\n\n== Constraints on FPR =="  # setting parameter for constraints
    cons_type = 1  # FPR constraint -- just change the cons_type, the rest of parameters should stay the same
    tau = 5.0
    mu = 1.2
    sensitive_attrs_to_cov_thresh = {
        "race": {
            0: {
                0: 0,
                1: 0
            },
            1: {
                0: 0,
                1: 0
            },
            2: {
                0: 0,
                1: 0
            }
        }
    }  # zero covariance threshold, means try to get the fairest solution
    cons_params = {
        "cons_type": cons_type,
        "tau": tau,
        "mu": mu,
        "sensitive_attrs_to_cov_thresh": sensitive_attrs_to_cov_thresh
    }

    w_cons, acc_cons, s_attr_to_fp_fn_test_cons = train_test_classifier()
    print "\n-----------------------------------------------------------------------------------\n"

    return
コード例 #9
0
def test_adult_data():
    """ Load the adult data """
    X, y, x_control = load_adult_data(
        load_data_size=None
    )  # set the argument to none, or no arguments if you want to test with the whole data -- we are subsampling for performance speedup
    ut.compute_p_rule(x_control["sex"],
                      y)  # compute the p-rule in the original data
    """ Split the data into train and test """
    X = ut.add_intercept(
        X)  # add intercept to X before applying the linear classifier
    train_fold_size = 0.7
    x_train, y_train, x_control_train, x_test, y_test, x_control_test = ut.split_into_train_test(
        X, y, x_control, train_fold_size)

    apply_fairness_constraints = None
    apply_accuracy_constraint = None
    sep_constraint = None

    loss_function = lf._logistic_loss
    sensitive_attrs = ["sex"]
    sensitive_attrs_to_cov_thresh = {}
    gamma = None

    def train_test_classifier():
        w = ut.train_model(x_train, y_train, x_control_train, loss_function,
                           apply_fairness_constraints,
                           apply_accuracy_constraint, sep_constraint,
                           sensitive_attrs, sensitive_attrs_to_cov_thresh,
                           gamma)
        train_score, test_score, correct_answers_train, correct_answers_test = ut.check_accuracy(
            w, x_train, y_train, x_test, y_test, None, None)
        distances_boundary_test = (np.dot(x_test, w)).tolist()
        all_class_labels_assigned_test = np.sign(distances_boundary_test)
        correlation_dict_test = ut.get_correlations(
            None, None, all_class_labels_assigned_test, x_control_test,
            sensitive_attrs)
        cov_dict_test = ut.print_covariance_sensitive_attrs(
            None, x_test, distances_boundary_test, x_control_test,
            sensitive_attrs)
        p_rule = ut.print_classifier_fairness_stats([test_score],
                                                    [correlation_dict_test],
                                                    [cov_dict_test],
                                                    sensitive_attrs[0])
        eq_op_acc, chance_bin_zero, chance_bin_one = ut.get_eq_op_acc(
            w, x_train, y_train, x_control_train, None)
        eq_odds_acc = ut.get_eq_odds_acc(w, x_train, y_train, x_control_train,
                                         None)
        pred_rate_par_acc = ut.get_pred_rate_par_acc(w, x_train, y_train,
                                                     x_control_train, None)
        demo_par_acc_f_cons = ut.get_dem_par_acc(w, x_train, y_train,
                                                 x_control_train, None)
        return w, p_rule, test_score, eq_op_acc, eq_odds_acc, pred_rate_par_acc, demo_par_acc_f_cons

    """ Classify the data while optimizing for accuracy """
    print()
    print("== Unconstrained (original) classifier ==")
    # all constraint flags are set to 0 since we want to train an unconstrained (original) classifier
    apply_fairness_constraints = 0
    apply_accuracy_constraint = 0
    sep_constraint = 0
    w_uncons, p_uncons, acc_uncons, eq_op_acc_uncons, eq_odds_acc_uncons, pred_rate_par_acc_uncons, demo_par_acc_uncons = train_test_classifier(
    )

    temp_eq_op_acc_f = []
    temp_eq_odds_acc_f = []
    temp_pred_rate_par_acc_f = []
    temp_demo_par_acc_f = []
    """ Now classify such that we optimize for accuracy while achieving perfect fairness """
    apply_fairness_constraints = 1  # set this flag to one since we want to optimize accuracy subject to fairness constraints
    apply_accuracy_constraint = 0
    sep_constraint = 0
    for num in np.arange(0, 0.51, 0.1):
        sensitive_attrs_to_cov_thresh = {"sex": num}
        print()
        print("== Classifier with fairness constraint, cov: ", num, " ==")
        w_f_cons, p_f_cons, acc_f_cons, eq_op_acc_f_cons, eq_odds_acc_f_cons, pred_rate_par_acc_f_cons, demo_par_acc_f_cons = train_test_classifier(
        )
        temp_eq_op_acc_f.append(eq_op_acc_f_cons)
        temp_eq_odds_acc_f.append(eq_odds_acc_f_cons)
        temp_pred_rate_par_acc_f.append(pred_rate_par_acc_f_cons)
        temp_demo_par_acc_f.append(demo_par_acc_f_cons)

    sensitive_attrs_to_cov_thresh = {"sex": 1}
    print()
    print("== Classifier with fairness constraint, cov: 1 ==")
    w_f_cons, p_f_cons, acc_f_cons, eq_op_acc_f_cons, eq_odds_acc_f_cons, pred_rate_par_acc_f_cons, demo_par_acc_f_cons = train_test_classifier(
    )
    temp_eq_op_acc_f.append(eq_op_acc_f_cons)
    temp_eq_odds_acc_f.append(eq_odds_acc_f_cons)
    temp_pred_rate_par_acc_f.append(pred_rate_par_acc_f_cons)
    temp_demo_par_acc_f.append(demo_par_acc_f_cons)

    return eq_op_acc_uncons, eq_odds_acc_uncons, pred_rate_par_acc_uncons, demo_par_acc_uncons, temp_eq_op_acc_f, temp_eq_odds_acc_f, temp_pred_rate_par_acc_f, temp_demo_par_acc_f