def fit(self, _X, _Y, _classifier_name="logistic", _predictor="hard"):
     my_erm_classifier = erm_classifier(self.train_X, self.train_Y)
     my_erm_classifier.fit(self.train_X, self.train_Y, classifier_name=_classifier_name)
     self.model = ThresholdOptimizer(estimator=my_erm_classifier, \
             constraints="demographic_parity", prefit=True)
     self.model.fit(self.train_X, self.train_Y, \
             sensitive_features=self.sensitive_train, _predictor=_predictor) 
def train_dropped(X_train, y_train, sensitive_train, X_test, y_test, \
        sensitive_test, sensitive_feature_names):
   
    sensitive_features_dict = {0:sensitive_feature_names[0], 1:sensitive_feature_names[1]}
    sensitive_train_binary = convert_to_binary(sensitive_train, \
            sensitive_feature_names[1], sensitive_feature_names[0])
    sensitive_test_binary = convert_to_binary(sensitive_test, \
            sensitive_feature_names[1], sensitive_feature_names[0])
    
    # First train an ERM classifier and compute the unfairness (TP delta and FP delta)
    erm = erm_classifier(X_train, y_train, sensitive_train_binary, \
            sensitive_features_dict)
    y_train = y_train.astype('int')
    erm.fit(X_train, y_train)
    try:
        out_dict = erm.get_group_confusion_matrix(sensitive_train_binary, X_train, y_train)
    except:
        return (None, None), None, None

    tp_delta = abs(out_dict[max(out_dict.keys())][0] - out_dict[min(out_dict.keys())][0])
    fp_delta = abs(out_dict[max(out_dict.keys())][1] - out_dict[min(out_dict.keys())][1])

    # Now create a fair classifier and compute the liklihood of misreporting
    eo_classifier = soft_equalized_odds_classifier(X_train, y_train, sensitive_train_binary, \
            sensitive_features_dict)
    success = eo_classifier.fit(X_train, y_train, _lambda=1)
    if success is False:
        return (None, None), None, None
        
    gain_dict, loss_dict = eo_classifier.get_test_flips_expectation(X_test, sensitive_test_binary, percent=True)
    return (tp_delta, fp_delta), gain_dict, loss_dict
Exemple #3
0
def main_compas(sensitive, _classifier, _predictor="hard"):
    X_train, X_test, sensitive_features_train, sensitive_features_test, \
            y_train, y_test, sensitive_feature_names = get_data_compas(sensitive)
    if sensitive == "race":
        sensitive_train_binary = convert_to_binary(sensitive_features_train, \
                sensitive_feature_names[1], sensitive_feature_names[0])
        sensitive_test_binary = convert_to_binary(sensitive_features_test, \
                sensitive_feature_names[1], sensitive_feature_names[0])
    else:
        sensitive_train_binary, sensitive_test_binary = sensitive_features_train, \
                sensitive_features_test
    sensitive_features_dict = {
        0: sensitive_feature_names[0],
        1: sensitive_feature_names[1]
    }

    #=============================== ERM ================================================
    classifier = erm_classifier(X_train, y_train, sensitive_train_binary,
                                sensitive_features_dict)
    classifier.fit(X_train, y_train, classifier_name=_classifier)
    classifier.get_proportions(X_train, y_train, sensitive_train_binary)
    erm_gain_dict, erm_loss_dict = classifier.get_test_flips(
        X_test, sensitive_test_binary, True)
    erm_confusion_mat = classifier.get_group_confusion_matrix(
        sensitive_test_binary, X_test, y_test)

    #================================ Equalized Odds ====================================
    eo_classifier = hard_equalized_odds_classifier(X_train, y_train, sensitive_train_binary, \
            sensitive_features_dict)
    eo_classifier.fit(X_train,
                      y_train,
                      _classifier_name=_classifier,
                      _predictor=_predictor)
    eo_gain_dict, eo_loss_dict = eo_classifier.get_test_flips(
        X_test, sensitive_test_binary, True)
    eo_confusion_mat = eo_classifier.get_group_confusion_matrix(
        sensitive_test_binary, X_test, y_test)

    #================================ Demographic Parity ====================================
    dp_classifier = demographic_parity_classifier(X_train, y_train, sensitive_train_binary, \
            sensitive_features_dict)
    dp_classifier.fit(X_train,
                      y_train,
                      _classifier_name=_classifier,
                      _predictor=_predictor)
    dp_gain_dict, dp_loss_dict = dp_classifier.get_test_flips(
        X_test, sensitive_test_binary, True)
    dp_confusion_mat = dp_classifier.get_group_confusion_matrix(sensitive_test_binary, \
            X_test, y_test)

    plot_vertical(erm_confusion_mat, erm_gain_dict, erm_loss_dict, \
                            eo_confusion_mat, eo_gain_dict, eo_loss_dict, \
                            dp_confusion_mat, dp_gain_dict, dp_loss_dict, \
                            "compas_"+sensitive+"_"+_classifier+"_"+_predictor+".png", \
                            "COMPAS (" + sensitive + " sensitive)")
Exemple #4
0
def main_lawschool(sensitive, _classifier, _predictor="hard"):
    X_train, X_test, sensitive_features_train, sensitive_features_test, \
            y_train, y_test, sensitive_feature_names = get_data_lawschool(sensitive)
    sensitive_train_binary = convert_to_binary(sensitive_features_train, \
            sensitive_feature_names[1], sensitive_feature_names[0])
    sensitive_test_binary = convert_to_binary(sensitive_features_test, \
            sensitive_feature_names[1], sensitive_feature_names[0])

    # 0 should be black, 1 white
    sensitive_features_dict = {
        0: sensitive_feature_names[0],
        1: sensitive_feature_names[1]
    }

    #=============================== ERM ================================================
    classifier = erm_classifier(X_train, y_train, sensitive_train_binary,
                                sensitive_features_dict)
    classifier.fit(X_train, y_train, classifier_name=_classifier)
    classifier.get_proportions(X_train, y_train, sensitive_train_binary)
    erm_gain_dict, erm_loss_dict = classifier.get_test_flips(
        X_test, sensitive_test_binary, True)
    erm_confusion_mat = classifier.get_group_confusion_matrix(
        sensitive_test_binary, X_test, y_test)

    #================================ Equalized Odds ====================================
    eo_classifier = equalized_odds_classifier(X_train, y_train, sensitive_train_binary, \
            sensitive_features_dict)
    eo_classifier.fit(X_train,
                      y_train,
                      _classifier_name=_classifier,
                      _predictor=_predictor)
    eo_gain_dict, eo_loss_dict = eo_classifier.get_test_flips(
        X_test, sensitive_test_binary, True)
    eo_confusion_mat = eo_classifier.get_group_confusion_matrix(
        sensitive_test_binary, X_test, y_test)

    #================================ Demographic Parity ====================================
    dp_classifier = demographic_parity_classifier(X_train, y_train, sensitive_train_binary, \
            sensitive_features_dict)
    dp_classifier.fit(X_train,
                      y_train,
                      _classifier_name=_classifier,
                      _predictor=_predictor)
    dp_gain_dict, dp_loss_dict = dp_classifier.get_test_flips(
        X_test, sensitive_test_binary, True)
    dp_confusion_mat = dp_classifier.get_group_confusion_matrix(sensitive_test_binary, \
            X_test, y_test)

    plot_vertical(erm_confusion_mat, erm_gain_dict, erm_loss_dict, \
                            eo_confusion_mat, eo_gain_dict, eo_loss_dict, \
                            dp_confusion_mat, dp_gain_dict, dp_loss_dict, \
                            "lawschool_"+sensitive+"_"+_classifier+"_"+_predictor+".png", "Lawschool (" + sensitive + " sensitive)")
Exemple #5
0
    def fit(self, _classifier_name="logistic",
            _fairness="hard", _lambda=0.5,
            verbose=False, use_group_in_base_classifier=True):

        # First, create the base classifier, and get its predictions
        self.use_group_in_base_classifier = use_group_in_base_classifier
        if self.use_group_in_base_classifier:
            self.base_erm_classifier = erm_classifier(self.train_X, self.train_Y, self.sensitive_train)
            self.base_erm_classifier.fit(classifier_name=_classifier_name)
            y_pred_train = self.base_erm_classifier.predict(self.train_X, self.sensitive_train)
        y_true_train = self.train_Y
        group_train = self.sensitive_train
   
        num_group_0 = len(np.where(group_train == 0)[0]) 
        num_group_1 = len(np.where(group_train == 1)[0])
 
        assert np.array_equal(np.unique(y_true_train),np.array([0,1])), 'y_true_train has to contain -1 and 1 and only these'
        assert np.array_equal(np.unique(y_pred_train),np.array([0,1])), 'y_pred_train has to contain -1 and 1 and only these'
        
        # If this is set True, _lambda is ignored and EQ is implmented hard
        self._fairness = _fairness        

        assert np.array_equal(np.unique(group_train),np.array([0,1])), 'group_train has to contain 0 and 1 and only these'

        tp0=np.sum(np.logical_and(y_pred_train==1,np.logical_and(y_true_train == 1, group_train == 0))) / float(
            np.sum(np.logical_and(y_true_train == 1, group_train == 0)))
        tp1 = np.sum(np.logical_and(y_pred_train == 1, np.logical_and(y_true_train == 1, group_train == 1))) / float(
            np.sum(np.logical_and(y_true_train == 1, group_train == 1)))
        fp0 = np.sum(np.logical_and(y_pred_train == 1, np.logical_and(y_true_train == 0, group_train == 0))) / float(
            np.sum(np.logical_and(y_true_train == 0, group_train == 0)))
        fp1 = np.sum(np.logical_and(y_pred_train == 1, np.logical_and(y_true_train == 0, group_train == 1))) / float(
            np.sum(np.logical_and(y_true_train == 0, group_train == 1)))
        fn0 = 1 - tp0
        fn1 = 1 - tp1
        tn0 = 1 - fp0
        tn1 = 1 - fp1
    
        p2p0 = cvx.Variable(1)
        p2n0 = cvx.Variable(1)
        n2p0 = cvx.Variable(1)
        n2n0 = cvx.Variable(1)
        p2p1 = cvx.Variable(1)
        p2n1 = cvx.Variable(1)
        n2p1 = cvx.Variable(1)
        n2n1 = cvx.Variable(1)
        
        fpr0 = fp0 * p2p0 + tn0 * n2p0
        fnr0 = fn0 * n2n0 + tp0 * p2n0
        fpr1 = fp1 * p2p1 + tn1 * n2p1
        fnr1 = fn1 * n2n1 + tp1 * p2n1
        tpr0 = 1 - fnr0
        tpr1 = 1 - fnr1
        tnr0 = 1 - fpr0
        tnr1 = 1 - fpr1
        
        error = fpr0 + fnr0 + fpr1 + fnr1
        constraints = [
            p2p0 == 1 - p2n0,
            n2p0 == 1 - n2n0,
            p2p1 == 1 - p2n1,
            n2p1 == 1 - n2n1,
            p2p0 <= 1,
            p2p0 >= 0,
            n2p0 <= 1,
            n2p0 >= 0,
            p2p1 <= 1,
            p2p1 >= 0,
            n2p1 <= 1,
            n2p1 >= 0
        ]
        
        gt_group0_pos = np.sum(np.logical_and(y_true_train == 1, group_train == 0))
        gt_group1_pos = np.sum(np.logical_and(y_true_train == 1, group_train == 1))
        gt_group0_neg = np.sum(np.logical_and(y_true_train == 0, group_train == 0))
        gt_group1_neg = np.sum(np.logical_and(y_true_train == 0, group_train == 1))
        
        if self._fairness == "hard":
            constraints.append((tpr0*gt_group0_pos+fpr0*gt_group0_neg)/num_group_0 == \
                    (tpr1*gt_group1_pos+fpr1*gt_group1_neg)/num_group_1)
            prob = cvx.Problem(cvx.Minimize(error), constraints)
        else:
            group_0_rate = (tpr0*gt_group0_pos+fpr0*gt_group0_neg)/num_group_0
            group_1_rate = (tpr1*gt_group1_pos+fpr1*gt_group1_neg)/num_group_1
            penalty = cvx.abs(group_0_rate - group_1_rate)
            prob = cvx.Problem(cvx.Minimize(error + _lambda*penalty), constraints)
        
        try:
            prob.solve()
        except:
            print("You done goofed up")
            return False

        self.p2p0, self.n2p0, self.p2p1, self.n2p1 = min(max(0, p2p0.value[0]), 1), min(max(0, n2p0.value[0]), 1), min(max(p2p1.value[0], 0), 1), min(max(n2p1.value[0],0), 1)
        self.n2n0, self.p2n0, self.n2n1, self.p2n1 = min(max(0, n2n0.value[0]), 1), min(max(0, p2n0.value[0]), 1), min(max(n2n1.value[0], 0), 1), min(max(p2n1.value[0],0), 1)
        if verbose:
            print(self.p2p0, self.n2p0, self.p2p1, self.n2p1)
        self.trained = True

        fpr0 = fp0 * self.p2p0 + tn0 * self.n2p0
        fnr0 = fn0 * self.n2n0 + tp0 * self.p2n0
        fpr1 = fp1 * self.p2p1 + tn1 * self.n2p1
        fnr1 = fn1 * self.n2n1 + tp1 * self.p2n1
        tpr0 = 1 - fnr0
        tpr1 = 1 - fnr1
        tnr0 = 1 - fpr0
        tnr1 = 1 - fpr1
        
        if verbose:
            print("Group 0") 
            print("\t True positive rate:", tpr0)
            print("\t True negative rate:", tnr0)
            print("\t False positive rate:", fpr0)
            print("\t False negative rate:", fnr0)
            
            print("Group 1") 
            print("\t True positive rate:", tpr1)
            print("\t True negative rate:", tnr1)
            print("\t False positive rate:", fpr1)
            print("\t False negative rate:", fnr1)

            print(" The E[group specific rates] group 0 PR is: ", (tpr0*gt_group0_pos+fpr0*gt_group0_neg)/num_group_0)
            print(" The E[group specific rates] group 1 PR is: ", (tpr1*gt_group1_pos+fpr1*gt_group1_neg)/num_group_1)

        return True
Exemple #6
0
    def fit(self,
            train_X,
            train_Y,
            _classifier_name="logistic",
            _predictor="hard"):
        # First, create the base classifier, and get its predictions
        self.base_erm_classifier = erm_classifier(self.train_X, self.train_Y,
                                                  self.sensitive_train)
        self.base_erm_classifier.fit(self.train_X,
                                     self.train_Y,
                                     classifier_name=_classifier_name)
        y_pred_train = self.base_erm_classifier.predict(
            train_X, self.sensitive_train)
        y_true_train = train_Y
        group_train = self.sensitive_train

        assert np.array_equal(np.unique(y_true_train), np.array(
            [0, 1])), 'y_true_train has to contain -1 and 1 and only these'
        assert np.array_equal(np.unique(y_pred_train), np.array(
            [0, 1])), 'y_pred_train has to contain -1 and 1 and only these'
        assert np.array_equal(np.unique(group_train), np.array(
            [0, 1])), 'group_train has to contain 0 and 1 and only these'

        tp0 = np.sum(
            np.logical_and(y_pred_train == 1,
                           np.logical_and(y_true_train == 1, group_train == 0))
        ) / float(np.sum(np.logical_and(y_true_train == 1, group_train == 0)))
        tp1 = np.sum(
            np.logical_and(y_pred_train == 1,
                           np.logical_and(y_true_train == 1, group_train == 1))
        ) / float(np.sum(np.logical_and(y_true_train == 1, group_train == 1)))
        fp0 = np.sum(
            np.logical_and(y_pred_train == 1,
                           np.logical_and(y_true_train == 0, group_train == 0))
        ) / float(np.sum(np.logical_and(y_true_train == 0, group_train == 0)))
        fp1 = np.sum(
            np.logical_and(y_pred_train == 1,
                           np.logical_and(y_true_train == 0, group_train == 1))
        ) / float(np.sum(np.logical_and(y_true_train == 0, group_train == 1)))
        fn0 = 1 - tp0
        fn1 = 1 - tp1
        tn0 = 1 - fp0
        tn1 = 1 - fp1

        p2p0 = cvx.Variable(1)
        p2n0 = cvx.Variable(1)
        n2p0 = cvx.Variable(1)
        n2n0 = cvx.Variable(1)
        p2p1 = cvx.Variable(1)
        p2n1 = cvx.Variable(1)
        n2p1 = cvx.Variable(1)
        n2n1 = cvx.Variable(1)

        fpr0 = fp0 * p2p0 + tn0 * n2p0
        fnr0 = fn0 * n2n0 + tp0 * p2n0
        fpr1 = fp1 * p2p1 + tn1 * n2p1
        fnr1 = fn1 * n2n1 + tp1 * p2n1
        tpr0 = 1 - fnr0
        tpr1 = 1 - fnr1
        tnr0 = 1 - fpr0
        tnr1 = 1 - fpr1

        error = fpr0 + fnr0 + fpr1 + fnr1
        constraints = [
            p2p0 == 1 - p2n0, n2p0 == 1 - n2n0, p2p1 == 1 - p2n1,
            n2p1 == 1 - n2n1, p2p0 <= 1, p2p0 >= 0, n2p0 <= 1, n2p0 >= 0,
            p2p1 <= 1, p2p1 >= 0, n2p1 <= 1, n2p1 >= 0, tpr0 == tpr1,
            fpr0 == fpr1
        ]

        prob = cvx.Problem(cvx.Minimize(error), constraints)
        try:
            prob.solve()
        except:
            print("You done goofed up")

        self.p2p0, self.n2p0, self.p2p1, self.n2p1 = max(
            0,
            p2p0.value[0]), max(0,
                                n2p0.value[0]), max(p2p1.value[0],
                                                    0), max(n2p1.value[0], 0)
        self.n2n0, self.p2n0, self.n2n1, self.p2n1 = max(
            0,
            n2n0.value[0]), max(0,
                                p2n0.value[0]), max(n2n1.value[0],
                                                    0), max(p2n1.value[0], 0)
        self.trained = True

        fpr0 = fp0 * self.p2p0 + tn0 * self.n2p0
        fnr0 = fn0 * self.n2n0 + tp0 * self.p2n0
        fpr1 = fp1 * self.p2p1 + tn1 * self.n2p1
        fnr1 = fn1 * self.n2n1 + tp1 * self.p2n1
        tpr0 = 1 - fnr0
        tpr1 = 1 - fnr1
        tnr0 = 1 - fpr0
        tnr1 = 1 - fpr1

        print(
            "The expected rates group specific rates are (TPR0, TRP1, FPR0, FPR1):",
            tpr0, tpr1, fpr0, fpr1)
        return