Ejemplo n.º 1
0
def reject_option(dataset_orig_valid, dataset_orig_valid_pred,
                  dataset_orig_test, dataset_orig_test_pred, privileged_groups,
                  unprivileged_groups):

    num_thresh = 100
    ba_arr = np.zeros(num_thresh)
    class_thresh_arr = np.linspace(0.01, 0.99, num_thresh)
    for idx, class_thresh in enumerate(class_thresh_arr):

        fav_inds = dataset_orig_valid_pred.scores > class_thresh
        dataset_orig_valid_pred.labels[
            fav_inds] = dataset_orig_valid_pred.favorable_label
        dataset_orig_valid_pred.labels[
            ~fav_inds] = dataset_orig_valid_pred.unfavorable_label

        classified_metric_orig_valid = ClassificationMetric(
            dataset_orig_valid,
            dataset_orig_valid_pred,
            unprivileged_groups=unprivileged_groups,
            privileged_groups=privileged_groups)

        ba_arr[idx] = 0.5*(classified_metric_orig_valid.true_positive_rate()\
                       +classified_metric_orig_valid.true_negative_rate())

    best_ind = np.where(ba_arr == np.max(ba_arr))[0][0]
    best_class_thresh = class_thresh_arr[best_ind]

    ROC = RejectOptionClassification(
        unprivileged_groups=unprivileged_groups,
        privileged_groups=privileged_groups,
        low_class_thresh=0.01,
        high_class_thresh=0.99,
        num_class_thresh=100,
        num_ROC_margin=50,
        metric_name="Statistical parity difference",
        metric_ub=metric_ub,
        metric_lb=metric_lb)

    ROC = ROC.fit(dataset_orig_valid, dataset_orig_valid_pred)

    fav_inds = dataset_orig_test_pred.scores > best_class_thresh
    dataset_orig_test_pred.labels[
        fav_inds] = dataset_orig_test_pred.favorable_label
    dataset_orig_test_pred.labels[
        ~fav_inds] = dataset_orig_test_pred.unfavorable_label

    dataset_transf_test_pred = ROC.predict(dataset_orig_test_pred)

    return dataset_transf_test_pred
    '''
def Postprocessing(reweighted_data,
                   pred,
                   label,
                   unprivileged_groups,
                   privileged_groups,
                   protected_attribute,
                   favorable_label,
                   unfavorable_label,
                   threshold=0.01):
    reweighted_binary_dataset = generate_binary_label_dataset(
        reweighted_data, label, protected_attribute, favorable_label,
        unfavorable_label)
    prediction_binary_dataset = generate_binary_label_dataset(
        pred, label, protected_attribute, favorable_label, unfavorable_label)
    ROC = RejectOptionClassification(unprivileged_groups=unprivileged_groups,
                                     privileged_groups=privileged_groups,
                                     low_class_thresh=threshold)
    ROC.fit(reweighted_binary_dataset, prediction_binary_dataset)

    return ROC.predict(prediction_binary_dataset).convert_to_dataframe()[0]
Ejemplo n.º 3
0
def calculate(pre_process,in_process,post_process,dataset_original,privileged_groups,unprivileged_groups,optim_options,in_process_epochs):

	dataset_original_train, dataset_original_test = dataset_original.split([0.3], shuffle=True)

	min_max_scaler=MinMaxScaler()
	dataset_original_train.features=min_max_scaler.fit_transform(dataset_original_train.features)
	dataset_original_test.features=min_max_scaler.transform(dataset_original_test.features)

	#Pre-processing begin
	dataset_after_pre_train=copy.deepcopy(dataset_original_train)
	dataset_after_pre_test=copy.deepcopy(dataset_original_test)
	if pre_process==0:
		pass
	if pre_process==1:
		pre_DIR=DisparateImpactRemover(repair_level=1.0)
		dataset_after_pre_train=pre_DIR.fit_transform(dataset_after_pre_train)
		dataset_after_pre_test=pre_DIR.fit_transform(dataset_after_pre_test)
	if pre_process==2:
		pre_LFR=LFR(unprivileged_groups=unprivileged_groups,privileged_groups=privileged_groups)
		pre_LFR.fit(dataset_after_pre_train)
		dataset_after_pre_train=pre_LFR.transform(dataset_after_pre_train)
		dataset_after_pre_test=pre_LFR.transform(dataset_after_pre_test)
	if pre_process==3:
		pre_OP=OptimPreproc(OptTools,optim_options,unprivileged_groups=unprivileged_groups,privileged_groups=privileged_groups)
		pre_OP.fit(dataset_original_train)
		dataset_after_pre_train=pre_OP.transform(dataset_original_train,transform_Y=True)
		dataset_after_pre_test=pre_OP.transform(dataset_original_test,transform_Y=True)
	if pre_process==4:
		pre_RW=Reweighing(unprivileged_groups=unprivileged_groups,privileged_groups=privileged_groups)
		pre_RW.fit(dataset_original_train)
		dataset_after_pre_train=pre_RW.transform(dataset_original_train)
		dataset_after_pre_test=pre_RW.transform(dataset_original_test)
	#Pre-processing end

	report=get_metric_reports(
		true_dataset=dataset_original_test,
		classfied_dataset=dataset_after_pre_test,
		privileged_groups=privileged_groups,
		unprivileged_groups=unprivileged_groups
	)
	# print('After Pre-process:')
	# print(report)

	#In-processing begin
	dataset_after_in_train=copy.deepcopy(dataset_after_pre_train)
	dataset_after_in_test=copy.deepcopy(dataset_after_pre_test)
	if in_process==0:
		sess = tf.Session()
		in_PM=PlainModel(
			privileged_groups=privileged_groups,
			unprivileged_groups=unprivileged_groups,
			scope_name='plain_classifier',
			num_epochs=in_process_epochs,
			sess=sess)
		in_PM.fit(dataset_after_in_train)
		dataset_after_in_train=in_PM.predict(dataset_after_in_train)
		dataset_after_in_test=in_PM.predict(dataset_after_in_test)
		sess.close()
		tf.reset_default_graph()
	if in_process==1:
		sess = tf.Session()
		in_AD=AdversarialDebiasing(
			privileged_groups=privileged_groups,
			unprivileged_groups=unprivileged_groups,
			scope_name='debiased_classifier',
			num_epochs=in_process_epochs,
			debias=True,
			sess=sess)
		in_AD.fit(dataset_after_in_train)
		dataset_after_in_train=in_AD.predict(dataset_after_in_train)
		dataset_after_in_test=in_AD.predict(dataset_after_in_test)
		sess.close()
		tf.reset_default_graph()
	if in_process==2:
		in_ART=ARTClassifier(SklearnClassifier(model=LogisticRegression(max_iter=in_process_epochs)))
		in_ART.fit(dataset_after_in_train)
		dataset_after_in_train=in_ART.predict(dataset_after_in_train)
		dataset_after_in_test=in_ART.predict(dataset_after_in_test)
	if in_process==3:
		sens_attr=list(privileged_groups[0].keys())[0]
		in_PM=PrejudiceRemover(sensitive_attr=sens_attr,eta=25.0)
		in_PM.fit(dataset_after_in_train)
		dataset_after_in_train=in_PM.predict(dataset_after_in_train)
		dataset_after_in_test=in_PM.predict(dataset_after_in_test)
	#In-process end

	report=get_metric_reports(
		true_dataset=dataset_original_test,
		classfied_dataset=dataset_after_in_test,
		privileged_groups=privileged_groups,
		unprivileged_groups=unprivileged_groups
	)
	# print('After In-process:')
	# print(report)

	#Post-process begin
	dataset_after_post_train=copy.deepcopy(dataset_after_in_train)
	dataset_after_post_test=copy.deepcopy(dataset_after_in_test)
	if post_process==0:
		pass
	if post_process==1:
		post_CEO=CalibratedEqOddsPostprocessing(
			privileged_groups=privileged_groups,
			unprivileged_groups=unprivileged_groups)
		post_CEO.fit(dataset_true=dataset_after_pre_train,dataset_pred=dataset_after_in_train)
		dataset_after_post_train=post_CEO.predict(dataset_after_post_train)
		dataset_after_post_test=post_CEO.predict(dataset_after_post_test)
	if post_process==2:
		post_EO=EqOddsPostprocessing(unprivileged_groups=unprivileged_groups,privileged_groups=privileged_groups)
		post_EO.fit(dataset_true=dataset_after_pre_train,dataset_pred=dataset_after_in_train)
		dataset_after_post_train=post_EO.predict(dataset_after_post_train)
		dataset_after_post_test=post_EO.predict(dataset_after_post_test)
	if post_process==3:
		metric_ub=0.05
		metric_lb=-0.05
		post_ROC=RejectOptionClassification(
			unprivileged_groups=unprivileged_groups, 
			privileged_groups=privileged_groups,
			low_class_thresh=0.01, high_class_thresh=0.99,
			num_class_thresh=100, num_ROC_margin=50,
			metric_name="Statistical parity difference",
			metric_ub=metric_ub, metric_lb=metric_lb)
		post_ROC.fit(dataset_true=dataset_after_pre_train,dataset_pred=dataset_after_in_train)
		dataset_after_post_train=post_ROC.predict(dataset_after_post_train)
		dataset_after_post_test=post_ROC.predict(dataset_after_post_test)
	#Post-processing end

	#Measuring unfairness begin
	report=get_metric_reports(
		true_dataset=dataset_original_test,
		classfied_dataset=dataset_after_post_test,
		privileged_groups=privileged_groups,
		unprivileged_groups=unprivileged_groups
	)

	# print('After Post-process:')
	# print(report)

	return report
Ejemplo n.º 4
0
def comb_algorithm(l, m, n, dataset_original1, privileged_groups1,
                   unprivileged_groups1, optim_options1):

    dataset_original2 = copy.deepcopy(dataset_original1)
    privileged_groups2 = copy.deepcopy(privileged_groups1)
    unprivileged_groups2 = copy.deepcopy(unprivileged_groups1)
    optim_options2 = copy.deepcopy(optim_options1)

    print(l, m, n)
    dataset_orig_train, dataset_orig_vt = dataset_original2.split([0.7],
                                                                  shuffle=True)
    dataset_orig_valid, dataset_orig_test = dataset_orig_vt.split([0.5],
                                                                  shuffle=True)

    if l == 0:
        dataset_transf_train, dataset_transf_valid, dataset_transf_test = dataset_orig_train, dataset_orig_valid, dataset_orig_test
    else:
        pre_used = preAlgorithm[l - 1]
        dataset_transf_train, dataset_transf_valid, dataset_transf_test = Pre(
            pre_used, dataset_orig_train, dataset_orig_valid,
            dataset_orig_test, privileged_groups2, unprivileged_groups2,
            optim_options2)

    #assert (l,m,n)!=(2,0,0)
    #assert not np.all(dataset_transf_train.labels.flatten()==1.0)

    if m == 0:
        dataset_transf_valid_pred, dataset_transf_test_pred = train(
            dataset_transf_train, dataset_transf_valid, dataset_transf_test,
            privileged_groups2, unprivileged_groups2)
    else:
        in_used = inAlgorithm[m - 1]
        if in_used == "adversarial_debiasing":
            dataset_transf_valid_pred, dataset_transf_test_pred = adversarial_debiasing(
                dataset_transf_train, dataset_transf_valid,
                dataset_transf_test, privileged_groups2, unprivileged_groups2)
        elif in_used == "art_classifier":
            dataset_transf_valid_pred, dataset_transf_test_pred = art_classifier(
                dataset_transf_train, dataset_transf_valid,
                dataset_transf_test, privileged_groups2, unprivileged_groups2)
        elif in_used == "prejudice_remover":
            for key, value in privileged_groups2[0].items():
                sens_attr = key
            dataset_transf_valid_pred, dataset_transf_test_pred = prejudice_remover(
                dataset_transf_train, dataset_transf_valid,
                dataset_transf_test, privileged_groups2, unprivileged_groups2,
                sens_attr)

    if n == 0:
        dataset_transf_test_pred_transf = dataset_transf_test_pred

    else:
        post_used = postAlgorithm[n - 1]
        if post_used == "calibrated_eqodds":
            cpp = CalibratedEqOddsPostprocessing(
                privileged_groups=privileged_groups2,
                unprivileged_groups=unprivileged_groups2,
                cost_constraint=cost_constraint,
                seed=1)
            cpp = cpp.fit(dataset_transf_valid, dataset_transf_valid_pred)
            dataset_transf_test_pred_transf = cpp.predict(
                dataset_transf_test_pred)

        elif post_used == "eqodds":
            EO = EqOddsPostprocessing(unprivileged_groups=unprivileged_groups2,
                                      privileged_groups=privileged_groups2,
                                      seed=1)
            EO = EO.fit(dataset_transf_valid, dataset_transf_valid_pred)
            dataset_transf_test_pred_transf = EO.predict(
                dataset_transf_test_pred)

        elif post_used == "reject_option":
            ROC = RejectOptionClassification(
                unprivileged_groups=unprivileged_groups2,
                privileged_groups=privileged_groups2,
                low_class_thresh=0.01,
                high_class_thresh=0.99,
                num_class_thresh=100,
                num_ROC_margin=50,
                metric_name=allowed_metrics[0],
                metric_ub=metric_ub,
                metric_lb=metric_lb)
            ROC = ROC.fit(dataset_transf_valid, dataset_transf_valid_pred)
            dataset_transf_test_pred_transf = ROC.predict(
                dataset_transf_test_pred)

    metric = ClassificationMetric(dataset_transf_test,
                                  dataset_transf_test_pred_transf,
                                  unprivileged_groups=unprivileged_groups2,
                                  privileged_groups=privileged_groups2)

    metrics = OrderedDict()
    metrics["Classification accuracy"] = metric.accuracy()
    TPR = metric.true_positive_rate()
    TNR = metric.true_negative_rate()
    bal_acc_nodebiasing_test = 0.5 * (TPR + TNR)
    metrics["Balanced classification accuracy"] = bal_acc_nodebiasing_test
    metrics[
        "Statistical parity difference"] = metric.statistical_parity_difference(
        )
    metrics["Disparate impact"] = metric.disparate_impact()
    metrics[
        "Equal opportunity difference"] = metric.equal_opportunity_difference(
        )
    metrics["Average odds difference"] = metric.average_odds_difference()
    metrics["Theil index"] = metric.theil_index()
    metrics["United Fairness"] = metric.generalized_entropy_index()
    # print(metrics)

    feature = "["
    for m in metrics:
        feature = feature + " " + str(round(metrics[m], 4))
    feature = feature + "]"

    return feature
Ejemplo n.º 5
0
def comb_algorithm(l, m, n, dataset_original1, privileged_groups1,
                   unprivileged_groups1, optim_options1):

    dataset_original2 = copy.deepcopy(dataset_original1)
    privileged_groups2 = copy.deepcopy(privileged_groups1)
    unprivileged_groups2 = copy.deepcopy(unprivileged_groups1)
    optim_options2 = copy.deepcopy(optim_options1)

    print(l, m, n)
    dataset_original_train, dataset_original_vt = dataset_original2.split(
        [0.7], shuffle=True)
    dataset_original_valid, dataset_original_test = dataset_original_vt.split(
        [0.5], shuffle=True)
    dataset_original_test.labels = dataset_original_test.labels
    print('=======================')
    #print(dataset_original_test.labels)
    dataset_orig_train = copy.deepcopy(dataset_original_train)
    dataset_orig_valid = copy.deepcopy(dataset_original_valid)
    dataset_orig_test = copy.deepcopy(dataset_original_test)

    if l == 0:
        dataset_transfer_train = copy.deepcopy(dataset_original_train)
        dataset_transfer_valid = copy.deepcopy(dataset_original_valid)
        dataset_transfer_test = copy.deepcopy(dataset_original_test)
        #dataset_transf_train, dataset_transf_valid, dataset_transf_test = dataset_orig_train, dataset_orig_valid, dataset_orig_test
    else:
        pre_used = preAlgorithm[l - 1]
        dataset_transfer_train, dataset_transfer_valid, dataset_transfer_test = Pre(
            pre_used, dataset_orig_train, dataset_orig_valid,
            dataset_orig_test, privileged_groups2, unprivileged_groups2,
            optim_options2)

    dataset_transf_train = copy.deepcopy(dataset_transfer_train)
    dataset_transf_valid = copy.deepcopy(dataset_transfer_valid)
    dataset_transf_test = copy.deepcopy(dataset_transfer_test)
    if m == 0:
        dataset_transfer_valid_pred, dataset_transfer_test_pred = plain_model(
            dataset_transf_train, dataset_transf_valid, dataset_transf_test,
            privileged_groups2, unprivileged_groups2)
    else:
        in_used = inAlgorithm[m - 1]
        if in_used == "adversarial_debiasing":
            dataset_transfer_valid_pred, dataset_transfer_test_pred = adversarial_debiasing(
                dataset_transf_train, dataset_transf_valid,
                dataset_transf_test, privileged_groups2, unprivileged_groups2)
        elif in_used == "art_classifier":
            dataset_transfer_valid_pred, dataset_transfer_test_pred = art_classifier(
                dataset_transf_train, dataset_transf_valid,
                dataset_transf_test, privileged_groups2, unprivileged_groups2)
        elif in_used == "prejudice_remover":
            for key, value in privileged_groups2[0].items():
                sens_attr = key
            dataset_transfer_valid_pred, dataset_transfer_test_pred = prejudice_remover(
                dataset_transf_train, dataset_transf_valid,
                dataset_transf_test, privileged_groups2, unprivileged_groups2,
                sens_attr)

    dataset_transf_valid_pred = copy.deepcopy(dataset_transfer_valid_pred)
    dataset_transf_test_pred = copy.deepcopy(dataset_transfer_test_pred)
    if n == 0:
        dataset_transf_test_pred_transf = copy.deepcopy(
            dataset_transfer_test_pred)

    else:
        post_used = postAlgorithm[n - 1]
        if post_used == "calibrated_eqodds":
            cpp = CalibratedEqOddsPostprocessing(
                privileged_groups=privileged_groups2,
                unprivileged_groups=unprivileged_groups2,
                cost_constraint=cost_constraint)
            cpp = cpp.fit(dataset_transfer_valid, dataset_transf_valid_pred)
            dataset_transf_test_pred_transf = cpp.predict(
                dataset_transf_test_pred)

        elif post_used == "eqodds":
            EO = EqOddsPostprocessing(unprivileged_groups=unprivileged_groups2,
                                      privileged_groups=privileged_groups2)
            EO = EO.fit(dataset_transfer_valid, dataset_transf_valid_pred)
            dataset_transf_test_pred_transf = EO.predict(
                dataset_transf_test_pred)

        elif post_used == "reject_option":
            #dataset_transf_test_pred_transf = reject_option(dataset_transf_valid, dataset_transf_valid_pred, dataset_transf_test, dataset_transf_test_pred, privileged_groups2, unprivileged_groups2)

            ROC = RejectOptionClassification(
                unprivileged_groups=unprivileged_groups2,
                privileged_groups=privileged_groups2)
            ROC = ROC.fit(dataset_transfer_valid, dataset_transf_valid_pred)
            dataset_transf_test_pred_transf = ROC.predict(
                dataset_transf_test_pred)

    #print('=======================')
    org_labels = dataset_orig_test.labels
    print(dataset_orig_test.labels)
    #print(dataset_transf_test.labels)
    #print('=======================')
    pred_labels = dataset_transf_test_pred.labels
    print(dataset_transf_test_pred.labels)

    true_pred = org_labels == pred_labels
    print("acc after in: ", float(np.sum(true_pred)) / pred_labels.shape[1])
    #print('=======================')
    #print(dataset_transf_test_pred_transf.labels)
    #print(dataset_transf_test_pred_transf.labels.shape)

    metric = ClassificationMetric(dataset_transfer_test,
                                  dataset_transf_test_pred_transf,
                                  unprivileged_groups=unprivileged_groups2,
                                  privileged_groups=privileged_groups2)

    metrics = OrderedDict()
    metrics["Classification accuracy"] = metric.accuracy()
    TPR = metric.true_positive_rate()
    TNR = metric.true_negative_rate()
    bal_acc_nodebiasing_test = 0.5 * (TPR + TNR)
    metrics["Balanced classification accuracy"] = bal_acc_nodebiasing_test
    metrics[
        "Statistical parity difference"] = metric.statistical_parity_difference(
        )
    metrics["Disparate impact"] = metric.disparate_impact()
    metrics[
        "Equal opportunity difference"] = metric.equal_opportunity_difference(
        )
    metrics["Average odds difference"] = metric.average_odds_difference()
    metrics["Theil index"] = metric.theil_index()
    metrics["United Fairness"] = metric.generalized_entropy_index()

    feature = []
    feature_str = "["
    for m in metrics:
        data = round(metrics[m], 4)
        feature.append(data)
        feature_str = feature_str + str(data) + " "
    feature_str = feature_str + "]"

    return feature, feature_str
Ejemplo n.º 6
0
    def run(self):
        data_train, data_test = self.data_prepare()

        privileged_groups = [{self.target_attribute: 1}]
        unprivileged_groups = [{self.target_attribute: 0}]
        if self.fair_balance == "FairBalance":
            dataset_transf_train = FairBalance(data_train, class_balance=False)
        elif self.fair_balance == "FairBalanceClass":
            dataset_transf_train = FairBalance(data_train, class_balance=True)
        elif self.fair_balance == "Reweighing":
            RW = Reweighing(unprivileged_groups=unprivileged_groups,
                            privileged_groups=privileged_groups)
            RW.fit(data_train)
            dataset_transf_train = RW.transform(data_train)
        else:
            dataset_transf_train = data_train

        if self.fair_balance == "AdversialDebiasing":
            tf.reset_default_graph()
            sess = tf.Session()
            self.model = AdversarialDebiasing(
                privileged_groups=privileged_groups,
                unprivileged_groups=unprivileged_groups,
                scope_name='debiased_classifier',
                debias=True,
                sess=sess)
            self.model.fit(dataset_transf_train)
            preds = self.model.predict(data_test).labels.ravel()
            sess.close()
        else:
            scale_orig = StandardScaler()
            X_train = scale_orig.fit_transform(dataset_transf_train.features)
            y_train = dataset_transf_train.labels.ravel()

            self.model.fit(X_train,
                           y_train,
                           sample_weight=dataset_transf_train.instance_weights)

            X_test = scale_orig.transform(data_test.features)
            preds = self.model.predict(X_test)

        if self.fair_balance == "RejectOptionClassification":
            pos_ind = numpy.where(self.model.classes_ ==
                                  dataset_transf_train.favorable_label)[0][0]
            data_train_pred = dataset_transf_train.copy(deepcopy=True)
            data_train_pred.scores = self.model.predict_proba(
                X_train)[:, pos_ind].reshape(-1, 1)
            data_test_pred = data_test.copy(deepcopy=True)
            data_test_pred.scores = self.model.predict_proba(
                X_test)[:, pos_ind].reshape(-1, 1)
            metric_name = "Statistical parity difference"
            metric_ub = 0.05
            metric_lb = -0.05
            ROC = RejectOptionClassification(
                unprivileged_groups=unprivileged_groups,
                privileged_groups=privileged_groups,
                low_class_thresh=0.01,
                high_class_thresh=0.99,
                num_class_thresh=100,
                num_ROC_margin=50,
                metric_name=metric_name,
                metric_ub=metric_ub,
                metric_lb=metric_lb)
            try:
                ROC.fit(dataset_transf_train, data_train_pred)
            except:
                return None
            preds = ROC.predict(data_test_pred).labels.ravel()

        y_test = data_test.labels.ravel()
        result = self.evaluate(numpy.array(preds), y_test, data_test)
        return result
Ejemplo n.º 7
0
    def fit(self, dataset):
        reg = LogisticRegression(solver='liblinear', max_iter=1000000000).fit(
            self.drop_prot(dataset, dataset.features), dataset.labels.ravel())

        dataset_p = dataset.copy(deepcopy=True)
        dataset_p.scores = np.array(
            list(
                map(
                    lambda x: [x[1]],
                    reg.predict_proba(self.drop_prot(dataset,
                                                     dataset.features)))))
        #print(reg.predict_proba(dataset.features))
        #print(dataset_p.scores)
        dataset_p.labels = np.array(
            list(
                map(lambda x: [x],
                    reg.predict(self.drop_prot(dataset, dataset.features)))))

        ro = RejectOptionClassification(
            unprivileged_groups=self.unprivileged_group,
            privileged_groups=self.privileged_group,
            metric_name=self.metric_name,
            metric_ub=self.abs_bound,
            metric_lb=-self.abs_bound,
            low_class_thresh=0.45,
            high_class_thresh=.55)
        ro.fit(dataset, dataset_p)

        self.threshold = ro.classification_threshold

        def h(x):
            # add dummy labels as we're going to predict them anyway...
            x_with_labels = np.hstack(
                (x,
                 list(
                     map(lambda x: [x], reg.predict(self.drop_prot(dataset,
                                                                   x))))))
            scores = list(
                map(lambda x: [x[1]],
                    reg.predict_proba(self.drop_prot(dataset, x))))
            dataset_ = dataset_from_matrix(x_with_labels, dataset)
            dataset_.scores = np.array(scores)
            labels_pre = dataset_.labels

            dataset_ = ro.predict(dataset_)
            return dataset_.labels.ravel()

        def h_pr(x, boost=True):
            thresh = ro.classification_threshold
            scores = np.array(
                list(
                    map(lambda x: x[1],
                        reg.predict_proba(self.drop_prot(dataset, x)))))
            scores_ = np.array(
                list(
                    map(lambda x: x[1],
                        reg.predict_proba(self.drop_prot(dataset, x)))))
            orig_pred = list(
                map(lambda x: x[1] > thresh,
                    reg.predict_proba(self.drop_prot(dataset, x))))
            boosted_pred = h(x)

            changed = (boosted_pred - orig_pred)

            group_ft, unpriv_val = list(self.unprivileged_group[0].items())[0]
            _, priv_val = list(self.privileged_group[0].items())[0]
            grp_ind = dataset.feature_names.index(group_ft)

            priv_ind = x[:, grp_ind] != unpriv_val
            unpriv_ind = x[:, grp_ind] == unpriv_val

            lower_bound = ro.classification_threshold - ro.ROC_margin - 0.1
            upper_bound = ro.classification_threshold + ro.ROC_margin + 0.1

            #print(self.metric_name, lower_bound, upper_bound)

            def booster_fn(scores):
                return (expit(75 * (scores - lower_bound)) -
                        expit(75 * (scores - upper_bound))) * ro.ROC_margin

            scores[priv_ind] -= booster_fn(scores[priv_ind])

            scores[unpriv_ind] += booster_fn(scores[unpriv_ind])

            assert ((np.clip(scores, None, 1.)[priv_ind] <=
                     scores_[priv_ind]).all())
            assert ((scores != scores_).any())

            boosted_pred = np.array(np.where(boosted_pred)[0])
            score_pred = np.array(np.where(scores >= thresh)[0])
            diff = np.setdiff1d(boosted_pred, score_pred)

            #print(ro.classification_threshold, ro.ROC_margin)
            #print(diff, scores[diff], scores_[diff])
            #print(booster_fn(scores_[diff]))
            #print(list(map(lambda x: x-thresh, scores[diff])))
            #assert(len(diff)==0)

            return np.clip(scores, None, 1.) if boost else np.array(
                list(map(lambda x: x[1], reg.predict_proba(x))))

        self.h_pr = h_pr
        self.h = h

        def bak():
            bool_increased = np.where(changed == 1)
            bool_decreased = np.where(changed == -1)
            if self.threshold == 0:
                max_increase = (thresh - scores[bool_increased]).max()
                max_decrease = (scores[bool_decreased] - thresh).max()

                self.threshold = thresh
                self.max_increase = max_increase
                self.max_decrease = max_decrease

            scores[x[:, grp_ind] == unpriv_val] += self.max_increase + 0.00001
            scores[x[:, grp_ind] == priv_val] -= self.max_decrease + 0.000001

            #boosted_pred = np.array(np.where(boosted_pred)[0])
            #score_pred = np.array(np.where(scores>=thresh)[0])
            #diff = np.setdiff1d(boosted_pred, score_pred)
            #assert(len(diff)==0)

            return np.clip(scores, None, 1.)
Ejemplo n.º 8
0
                           1.0,
                           2.0).reshape(len(dataset_trainResults_test.index),
                                        1)

    dataset_orig_valid_pred.scores = scores_valid
    dataset_orig_valid_pred.labels = labels_valid

    dataset_orig_test_pred.scores = scores_test
    dataset_orig_test_pred.labels = labels_test

    # Reject Option Classification
    ROC = RejectOptionClassification(unprivileged_groups=unprivileged_groups,
                                     privileged_groups=privileged_groups,
                                     low_class_thresh=0.01,
                                     high_class_thresh=0.99,
                                     num_class_thresh=100,
                                     num_ROC_margin=50,
                                     metric_name=metric_name,
                                     metric_ub=metric_ub,
                                     metric_lb=metric_lb)
    ROC = ROC.fit(dataset_orig_valid, dataset_orig_valid_pred)

    # ROC_test results
    dataset_transf_test_pred = ROC.predict(dataset_orig_test_pred)

    ROC_test[m + "_fairScores"] = dataset_transf_test_pred.scores.flatten()
    label_names = np.where(dataset_transf_test_pred.labels == 1, 'Good', 'Bad')
    ROC_test[m + "_fairLabels"] = label_names

ROC_test.to_csv(output_path + 'taiwan_post_roc_results_test.csv',
                index=None,