Beispiel #1
0
def test_p_percent_pandas(sensitive_classification_dataset):
    X, y = sensitive_classification_dataset
    mod_unfair = LogisticRegression().fit(X, y)
    assert p_percent_score("x2")(mod_unfair, X) == 0

    mod_fair = make_pipeline(ColumnSelector("x1"), LogisticRegression()).fit(X, y)
    assert p_percent_score("x2")(mod_fair, X) == 0.9
Beispiel #2
0
    def objective(self, trial):
        C = trial.suggest_loguniform('C', 1e-10, 1e10)
        model = LogisticRegression(C=C, max_iter=10**3, tol=10**-6)

        model.fit(self.X_train, self.y_train, sample_weight=self.sample_weight)
        y_pred = model.predict(self.X_val)

        if (sklearn.metrics.accuracy_score(self.y_val, y_pred)==0 or
            equal_opportunity_score(sensitive_column=self.fair_feat)(model, self.X_val, self.y_val)==0 or
            p_percent_score(sensitive_column=self.fair_feat)(model, self.X_val))==0:
            return float('inf')

        if self.metric=='accuracy':
            perf = sklearn.metrics.accuracy_score(self.y_val, y_pred)
        elif self.metric=='equal_opportunity':
            perf = equal_opportunity_score(sensitive_column=self.fair_feat)(model, self.X_val, self.y_val)
        elif self.metric=='p_percent':
            perf = p_percent_score(sensitive_column=self.fair_feat)(model, self.X_val)
        elif self.metric=='c_variation':
            perf = 1/coefficient_of_variation(model, self.X_val, self.y_val)

        if perf>self.best_perf:
            self.best_perf = perf
            self.best_model = model

        return 1/perf if perf!=0 else float('inf')
Beispiel #3
0
    def tune(self, metric=None):
        self.best_perf = 0
        self.best_model = None

        if metric is not None:
            self.metric = metric
        if self.moo_ is None:
            self.moo_ = monise(weightedScalar=self.scalarization, singleScalar=self.scalarization,
                          nodeTimeLimit=2, targetSize=150,
                          targetGap=0, nodeGap=0.01, norm=False)
            self.moo_.optimize()
        for solution in self.moo_.solutionsList:
            y_pred = solution.x.predict(self.X_val)
            
            if (sklearn.metrics.accuracy_score(self.y_val, y_pred)==0 or
                equal_opportunity_score(sensitive_column=self.fair_feat)(solution.x, self.X_val, self.y_val)==0 or
                p_percent_score(sensitive_column=self.fair_feat)(solution.x, self.X_val))==0:
                continue
            
            if self.metric=='accuracy':
                perf = sklearn.metrics.accuracy_score(self.y_val, y_pred)
            elif self.metric=='equal_opportunity':
                perf = equal_opportunity_score(sensitive_column=self.fair_feat)(solution.x, self.X_val, self.y_val)
            elif self.metric=='p_percent':
                perf = p_percent_score(sensitive_column=self.fair_feat)(solution.x, self.X_val)
            elif self.metric=='c_variation':
                perf = 1/coefficient_of_variation(solution.x, self.X_val, self.y_val)
            
            if perf>self.best_perf:
                self.best_perf = perf
                self.best_model = solution.x

        return self.best_model
Beispiel #4
0
def test_warning_is_logged(sensitive_classification_dataset):
    X, y = sensitive_classification_dataset
    mod_fair = make_pipeline(ColumnSelector("x1"), LogisticRegression()).fit(X, y)
    with warnings.catch_warnings(record=True) as w:
        # Cause all warnings to always be triggered.
        warnings.simplefilter("always")
        # Trigger a warning.
        p_percent_score("x2", positive_target=2)(mod_fair, X)
        assert issubclass(w[-1].category, RuntimeWarning)
Beispiel #5
0
def test_p_percent_pandas_multiclass(sensitive_multiclass_classification_dataset):
    X, y = sensitive_multiclass_classification_dataset
    mod_unfair = LogisticRegression(multi_class="ovr").fit(X, y)
    assert p_percent_score("x2")(mod_unfair, X) == 0
    assert p_percent_score("x2", positive_target=2)(mod_unfair, X) == 0

    mod_fair = make_pipeline(ColumnSelector("x1"), LogisticRegression()).fit(X, y)
    assert p_percent_score("x2")(mod_fair, X) == pytest.approx(0.9333333)
    assert p_percent_score("x2", positive_target=2)(mod_fair, X) == 0
Beispiel #6
0
def test_fairness(sensitive_classification_dataset):
    """tests whether fairness (measured by p percent score) increases as we decrease the covariance threshold"""
    X, y = sensitive_classification_dataset
    scorer = p_percent_score("x1")

    prev_fairness = -np.inf
    for cov_threshold in [None, 10, 0.5, 0.1]:
        fair = FairClassifier(
            covariance_threshold=cov_threshold,
            sensitive_cols=["x1"],
            penalty="none",
            train_sensitive_cols=False,
        ).fit(X, y)
        fairness = scorer(fair, X, y)
        assert fairness >= prev_fairness
        prev_fairness = fairness
Beispiel #7
0
def evaluate_model_test(model__, fair_feature, X_test, y_test):
    y_pred = model__.predict(X_test)

    metrics = {
        "Acc":
        accuracy_score(y_test, y_pred),
        #"BalancedAcc": balanced_accuracy_score(y_test, y_pred),
        #"F-score": f1_score(y_test, y_pred),
        "EO":
        equal_opportunity_score(sensitive_column=fair_feature)(model__, X_test,
                                                               y_test),
        "DP":
        p_percent_score(sensitive_column=fair_feature)(model__, X_test),
        "CV":
        coefficient_of_variation(model__, X_test, y_test)
    }
    metrics["SingleClass"] = False
    if len(np.unique(y_pred)) == 1:
        #raise Exception("Model classifies every point to the same class")
        metrics["SingleClass"] = True
    return metrics
Beispiel #8
0
def test_p_percent_numpy(sensitive_classification_dataset):
    X, y = sensitive_classification_dataset
    X = X.values
    mod = LogisticRegression().fit(X, y)
    assert p_percent_score(1)(mod, X) == 0
Beispiel #9
0
    def objective(self, trial):
        C = trial.suggest_loguniform('C', 1e-5, 1e5)
        c = trial.suggest_loguniform('c', 1e-5, 1e5)
        #print(c, C)
        try:
        #if 1==1:
            if self.base_model=='equal':
                model = EqualOpportunityClassifier(sensitive_cols=self.fair_feat, positive_target=True, covariance_threshold=c, C=C, max_iter=10**3)
                model.fit(self.X_train, self.y_train)
            elif self.base_model=='demographic':
                model = DemographicParityClassifier(sensitive_cols=self.fair_feat, covariance_threshold=c, C=C, max_iter=10**3)
                model.fit(self.X_train, self.y_train)
            elif self.base_model=='minimax':
                a_train = self.X_train[self.fair_feat].copy().astype('int')
                a_val = self.X_val[self.fair_feat].copy().astype('int')

                a_train[a_train==0] = -1
                a_val[a_val==0] = -1

                model = SKLearn_Weighted_LLR(self.X_train.values, self.y_train.values,
                             a_train.values, self.X_val.values,
                             self.y_val.values, a_val.values,
                             C_reg=C)

                mua_ini = np.ones(a_val.max() + 1)
                mua_ini /= mua_ini.sum()
                results = APSTAR(model, mua_ini, niter=200, max_patience=200, Kini=1,
                                      Kmin=20, alpha=0.5, verbose=False)
                mu_best_list = results['mu_best_list']

                mu_best = mu_best_list[-1]
                model.weighted_fit(self.X_train.values, self.y_train.values, a_train.values, mu_best)
            else:
                raise('Incorrect base_model.')

            y_pred = model.predict(self.X_val)
        except:
            return float('inf')



        if (sklearn.metrics.accuracy_score(self.y_val, y_pred)==0 or
            equal_opportunity_score(sensitive_column=self.fair_feat)(model, self.X_val, self.y_val)==0 or
            p_percent_score(sensitive_column=self.fair_feat)(model, self.X_val))==0:
            return float('inf')


        if self.metric=='accuracy':
            perf = sklearn.metrics.accuracy_score(self.y_val, y_pred)
        elif self.metric=='equal_opportunity':
            perf = equal_opportunity_score(sensitive_column=self.fair_feat)(model, self.X_val, self.y_val)
        elif self.metric=='p_percent':
            perf = p_percent_score(sensitive_column=self.fair_feat)(model, self.X_val)
        elif self.metric=='c_variation':
            perf = 1/coefficient_of_variation(model, self.X_val, self.y_val)

        if perf>self.best_perf:
            self.best_perf = perf
            self.best_model = model

        return 1/perf if perf!=0 else float('inf')