예제 #1
0
def evaluate_eqop(fair_feature, X_train, y_train, X_test, y_test):
    # Train
    eqop_model = EqualOpportunityClassifier(sensitive_cols=fair_feature,
                                            positive_target=True,
                                            covariance_threshold=0)
    eqop_model.fit(X_train, y_train)

    # Evaluate
    eqop_metrics = evaluate_model_test(eqop_model, fair_feature, X_test,
                                       y_test)
    eqop_metrics['Approach'] = 'EqOp'

    return eqop_metrics
예제 #2
0
def test_standard_checks(test_fn):
    trf = EqualOpportunityClassifier(
        covariance_threshold=None,
        positive_target=True,
        C=1,
        penalty="none",
        sensitive_cols=[0],
        train_sensitive_cols=True,
    )
    test_fn(EqualOpportunityClassifier.__name__, trf)
def test_regularization(sensitive_classification_dataset):
    """Tests whether increasing regularization decreases the norm of the coefficient vector"""
    X, y = sensitive_classification_dataset

    prev_theta_norm = np.inf
    for C in [1, 0.5, 0.2, 0.1]:
        fair = EqualOpportunityClassifier(
            covariance_threshold=None, sensitive_cols=["x1"], C=C, positive_target=True
        ).fit(X, y)
        theta_norm = np.abs(np.sum(fair.coef_))
        assert theta_norm < prev_theta_norm
        prev_theta_norm = theta_norm
예제 #4
0
def test_fairness(sensitive_classification_dataset):
    """tests whether fairness (measured by p percent score) increases as we decrease the covariance threshold"""
    X, y = sensitive_classification_dataset
    scorer = equal_opportunity_score("x1")

    prev_fairness = -np.inf
    for cov_threshold in [None, 10, 0.5, 0.1]:
        fair = EqualOpportunityClassifier(
            covariance_threshold=cov_threshold,
            positive_target=True,
            sensitive_cols=["x1"],
            penalty="none",
            train_sensitive_cols=False,
        ).fit(X, y)
        fairness = scorer(fair, X, y)
        assert fairness >= prev_fairness
        prev_fairness = fairness
예제 #5
0
def _test_same(dataset):
    X, y = dataset
    if X.shape[1] == 1:
        # If we only have one column (which is also the sensitive one) we can't fit
        return True

    sensitive_cols = [0]
    X_without_sens = np.delete(X, sensitive_cols, axis=1)
    lr = LogisticRegression(
        penalty="none",
        solver="lbfgs",
        multi_class="ovr",
        dual=False,
        tol=1e-4,
        C=1.0,
        fit_intercept=True,
        intercept_scaling=1,
        class_weight=None,
        random_state=None,
        max_iter=100,
        verbose=0,
        warm_start=False,
        n_jobs=None,
        l1_ratio=None,
    )
    fair = EqualOpportunityClassifier(
        covariance_threshold=None,
        sensitive_cols=sensitive_cols,
        penalty="none",
        positive_target=True,
    )

    fair.fit(X, y)
    lr.fit(X_without_sens, y)

    normal_pred = lr.predict_proba(X_without_sens)
    fair_pred = fair.predict_proba(X)
    np.testing.assert_almost_equal(normal_pred, fair_pred, decimal=2)
    assert np.sum(
        lr.predict(X_without_sens) != fair.predict(X)) / len(X) < 0.01
예제 #6
0
def _test_same(dataset):
    X, y = dataset
    if X.shape[1] == 1:
        # If we only have one column (which is also the sensitive one) we can't fit
        return True

    sensitive_cols = [0]
    X_without_sens = np.delete(X, sensitive_cols, axis=1)
    lr = LogisticRegression(penalty="none", solver="lbfgs")
    fair = EqualOpportunityClassifier(
        covariance_threshold=None,
        sensitive_cols=sensitive_cols,
        penalty="none",
        positive_target=True,
    )

    fair.fit(X, y)
    lr.fit(X_without_sens, y)

    normal_pred = lr.predict_proba(X_without_sens)
    fair_pred = fair.predict_proba(X)
    np.testing.assert_almost_equal(normal_pred, fair_pred, decimal=2)
    assert np.sum(
        lr.predict(X_without_sens) != fair.predict(X)) / len(X) < 0.01
예제 #7
0
    def objective(self, trial):
        C = trial.suggest_loguniform('C', 1e-5, 1e5)
        c = trial.suggest_loguniform('c', 1e-5, 1e5)
        #print(c, C)
        try:
        #if 1==1:
            if self.base_model=='equal':
                model = EqualOpportunityClassifier(sensitive_cols=self.fair_feat, positive_target=True, covariance_threshold=c, C=C, max_iter=10**3)
                model.fit(self.X_train, self.y_train)
            elif self.base_model=='demographic':
                model = DemographicParityClassifier(sensitive_cols=self.fair_feat, covariance_threshold=c, C=C, max_iter=10**3)
                model.fit(self.X_train, self.y_train)
            elif self.base_model=='minimax':
                a_train = self.X_train[self.fair_feat].copy().astype('int')
                a_val = self.X_val[self.fair_feat].copy().astype('int')

                a_train[a_train==0] = -1
                a_val[a_val==0] = -1

                model = SKLearn_Weighted_LLR(self.X_train.values, self.y_train.values,
                             a_train.values, self.X_val.values,
                             self.y_val.values, a_val.values,
                             C_reg=C)

                mua_ini = np.ones(a_val.max() + 1)
                mua_ini /= mua_ini.sum()
                results = APSTAR(model, mua_ini, niter=200, max_patience=200, Kini=1,
                                      Kmin=20, alpha=0.5, verbose=False)
                mu_best_list = results['mu_best_list']

                mu_best = mu_best_list[-1]
                model.weighted_fit(self.X_train.values, self.y_train.values, a_train.values, mu_best)
            else:
                raise('Incorrect base_model.')

            y_pred = model.predict(self.X_val)
        except:
            return float('inf')



        if (sklearn.metrics.accuracy_score(self.y_val, y_pred)==0 or
            equal_opportunity_score(sensitive_column=self.fair_feat)(model, self.X_val, self.y_val)==0 or
            p_percent_score(sensitive_column=self.fair_feat)(model, self.X_val))==0:
            return float('inf')


        if self.metric=='accuracy':
            perf = sklearn.metrics.accuracy_score(self.y_val, y_pred)
        elif self.metric=='equal_opportunity':
            perf = equal_opportunity_score(sensitive_column=self.fair_feat)(model, self.X_val, self.y_val)
        elif self.metric=='p_percent':
            perf = p_percent_score(sensitive_column=self.fair_feat)(model, self.X_val)
        elif self.metric=='c_variation':
            perf = 1/coefficient_of_variation(model, self.X_val, self.y_val)

        if perf>self.best_perf:
            self.best_perf = perf
            self.best_model = model

        return 1/perf if perf!=0 else float('inf')