Example #1
0
def test_p_percent_pandas_multiclass():
    sensitive_classification_dataset = pd.DataFrame({
        "x1": [1, 0, 1, 0, 1, 0, 1, 1],
        "x2": [0, 0, 0, 0, 0, 1, 1, 1],
        "y": [1, 1, 1, 0, 1, 0, 0, 2],
    })

    X, y = (
        sensitive_classification_dataset.drop(columns="y"),
        sensitive_classification_dataset["y"],
    )

    mod_1 = types.SimpleNamespace()

    mod_1.predict = lambda X: np.array([2, 0, 1, 0, 1, 0, 1, 2])
    assert (equal_opportunity_score(sensitive_column="x2", positive_target=2)(
        mod_1, X, np.array([2, 0, 1, 0, 1, 0, 1, 2])) == 1)

    mod_1.predict = lambda X: np.array([1, 0, 1, 0, 1, 0, 0, 1])
    assert (equal_opportunity_score(sensitive_column="x2",
                                    positive_target=2)(mod_1, X, y) == 0)

    mod_1.predict = lambda X: np.array([1, 0, 1, 0, 1, 0, 0, 0])
    assert (equal_opportunity_score(sensitive_column="x2",
                                    positive_target=2)(mod_1, X, y) == 0)
Example #2
0
    def objective(self, trial):
        C = trial.suggest_loguniform('C', 1e-10, 1e10)
        model = LogisticRegression(C=C, max_iter=10**3, tol=10**-6)

        model.fit(self.X_train, self.y_train, sample_weight=self.sample_weight)
        y_pred = model.predict(self.X_val)

        if (sklearn.metrics.accuracy_score(self.y_val, y_pred)==0 or
            equal_opportunity_score(sensitive_column=self.fair_feat)(model, self.X_val, self.y_val)==0 or
            p_percent_score(sensitive_column=self.fair_feat)(model, self.X_val))==0:
            return float('inf')

        if self.metric=='accuracy':
            perf = sklearn.metrics.accuracy_score(self.y_val, y_pred)
        elif self.metric=='equal_opportunity':
            perf = equal_opportunity_score(sensitive_column=self.fair_feat)(model, self.X_val, self.y_val)
        elif self.metric=='p_percent':
            perf = p_percent_score(sensitive_column=self.fair_feat)(model, self.X_val)
        elif self.metric=='c_variation':
            perf = 1/coefficient_of_variation(model, self.X_val, self.y_val)

        if perf>self.best_perf:
            self.best_perf = perf
            self.best_model = model

        return 1/perf if perf!=0 else float('inf')
Example #3
0
    def tune(self, metric=None):
        self.best_perf = 0
        self.best_model = None

        if metric is not None:
            self.metric = metric
        if self.moo_ is None:
            self.moo_ = monise(weightedScalar=self.scalarization, singleScalar=self.scalarization,
                          nodeTimeLimit=2, targetSize=150,
                          targetGap=0, nodeGap=0.01, norm=False)
            self.moo_.optimize()
        for solution in self.moo_.solutionsList:
            y_pred = solution.x.predict(self.X_val)
            
            if (sklearn.metrics.accuracy_score(self.y_val, y_pred)==0 or
                equal_opportunity_score(sensitive_column=self.fair_feat)(solution.x, self.X_val, self.y_val)==0 or
                p_percent_score(sensitive_column=self.fair_feat)(solution.x, self.X_val))==0:
                continue
            
            if self.metric=='accuracy':
                perf = sklearn.metrics.accuracy_score(self.y_val, y_pred)
            elif self.metric=='equal_opportunity':
                perf = equal_opportunity_score(sensitive_column=self.fair_feat)(solution.x, self.X_val, self.y_val)
            elif self.metric=='p_percent':
                perf = p_percent_score(sensitive_column=self.fair_feat)(solution.x, self.X_val)
            elif self.metric=='c_variation':
                perf = 1/coefficient_of_variation(solution.x, self.X_val, self.y_val)
            
            if perf>self.best_perf:
                self.best_perf = perf
                self.best_model = solution.x

        return self.best_model
Example #4
0
def test_warning_is_logged(sensitive_classification_dataset):
    X, y = sensitive_classification_dataset
    mod_fair = make_pipeline(ColumnSelector("x1"),
                             LogisticRegression()).fit(X, y)
    with warnings.catch_warnings(record=True) as w:
        # Cause all warnings to always be triggered.
        warnings.simplefilter("always")
        # Trigger a warning.
        equal_opportunity_score("x2", positive_target=2)(mod_fair, X, y)
        assert issubclass(w[-1].category, RuntimeWarning)
Example #5
0
def simulate_eo(data, columnNames, label, model, numOfSimulation=100):
    """
    description:
        runs simulation based on the number of simulations

    args:
        data (list of list): list of attributes and values
        label (string): classification label / ground truth
        model (classification model): sckit classification model
        numOfSimulation (int): number of simulations to run

    returns:
        equalOpportunities (list): list of equal opportunities
    """
    numOfSimulation = 1000
    equalOpportunities = list()
    sensitiveVariableCount = list()
    for time in tqdm(range(numOfSimulation)):
        windowData = create_window(data, columnNames)
        X, y = create_Xy(windowData, label)
        sensitiveVarCount = compute_sensitive_ratio(X['sex'])
        X, y = preprocess_columns(X, y)
        eqTemp = equal_opportunity_score(sensitive_column="sex")(model, X, y)
        equalOpportunities.append(eqTemp)
        sensitiveVariableCount.append(sensitiveVarCount)
    return equalOpportunities, sensitiveVariableCount
Example #6
0
def test_equal_opportunity_pandas():
    sensitive_classification_dataset = pd.DataFrame({
        "x1": [1, 0, 1, 0, 1, 0, 1, 1],
        "x2": [0, 0, 0, 0, 0, 1, 1, 1],
        "y": [1, 1, 1, 0, 1, 0, 0, 1]
    })

    X, y = sensitive_classification_dataset.drop(
        columns='y'), sensitive_classification_dataset['y']

    mod_1 = types.SimpleNamespace()

    mod_1.predict = lambda X: np.array([1, 0, 1, 0, 1, 0, 1, 1])
    assert equal_opportunity_score(sensitive_column="x2")(mod_1, X, y) == 0.75

    mod_1.predict = lambda X: np.array([1, 0, 1, 0, 1, 0, 0, 1])
    assert equal_opportunity_score(sensitive_column="x2")(mod_1, X, y) == 0.75

    mod_1.predict = lambda X: np.array([1, 0, 1, 0, 1, 0, 0, 0])
    assert equal_opportunity_score(sensitive_column="x2")(mod_1, X, y) == 0
Example #7
0
def test_fairness(sensitive_classification_dataset):
    """tests whether fairness (measured by p percent score) increases as we decrease the covariance threshold"""
    X, y = sensitive_classification_dataset
    scorer = equal_opportunity_score("x1")

    prev_fairness = -np.inf
    for cov_threshold in [None, 10, 0.5, 0.1]:
        fair = EqualOpportunityClassifier(
            covariance_threshold=cov_threshold,
            positive_target=True,
            sensitive_cols=["x1"],
            penalty="none",
            train_sensitive_cols=False,
        ).fit(X, y)
        fairness = scorer(fair, X, y)
        assert fairness >= prev_fairness
        prev_fairness = fairness
Example #8
0
def evaluate_model_test(model__, fair_feature, X_test, y_test):
    y_pred = model__.predict(X_test)

    metrics = {
        "Acc":
        accuracy_score(y_test, y_pred),
        #"BalancedAcc": balanced_accuracy_score(y_test, y_pred),
        #"F-score": f1_score(y_test, y_pred),
        "EO":
        equal_opportunity_score(sensitive_column=fair_feature)(model__, X_test,
                                                               y_test),
        "DP":
        p_percent_score(sensitive_column=fair_feature)(model__, X_test),
        "CV":
        coefficient_of_variation(model__, X_test, y_test)
    }
    metrics["SingleClass"] = False
    if len(np.unique(y_pred)) == 1:
        #raise Exception("Model classifies every point to the same class")
        metrics["SingleClass"] = True
    return metrics
Example #9
0
def test_p_percent_numpy(sensitive_classification_dataset):
    X, y = sensitive_classification_dataset
    X = X.values
    mod = LogisticRegression().fit(X, y)
    assert equal_opportunity_score(1)(mod, X, y) == 0
Example #10
0
    def objective(self, trial):
        C = trial.suggest_loguniform('C', 1e-5, 1e5)
        c = trial.suggest_loguniform('c', 1e-5, 1e5)
        #print(c, C)
        try:
        #if 1==1:
            if self.base_model=='equal':
                model = EqualOpportunityClassifier(sensitive_cols=self.fair_feat, positive_target=True, covariance_threshold=c, C=C, max_iter=10**3)
                model.fit(self.X_train, self.y_train)
            elif self.base_model=='demographic':
                model = DemographicParityClassifier(sensitive_cols=self.fair_feat, covariance_threshold=c, C=C, max_iter=10**3)
                model.fit(self.X_train, self.y_train)
            elif self.base_model=='minimax':
                a_train = self.X_train[self.fair_feat].copy().astype('int')
                a_val = self.X_val[self.fair_feat].copy().astype('int')

                a_train[a_train==0] = -1
                a_val[a_val==0] = -1

                model = SKLearn_Weighted_LLR(self.X_train.values, self.y_train.values,
                             a_train.values, self.X_val.values,
                             self.y_val.values, a_val.values,
                             C_reg=C)

                mua_ini = np.ones(a_val.max() + 1)
                mua_ini /= mua_ini.sum()
                results = APSTAR(model, mua_ini, niter=200, max_patience=200, Kini=1,
                                      Kmin=20, alpha=0.5, verbose=False)
                mu_best_list = results['mu_best_list']

                mu_best = mu_best_list[-1]
                model.weighted_fit(self.X_train.values, self.y_train.values, a_train.values, mu_best)
            else:
                raise('Incorrect base_model.')

            y_pred = model.predict(self.X_val)
        except:
            return float('inf')



        if (sklearn.metrics.accuracy_score(self.y_val, y_pred)==0 or
            equal_opportunity_score(sensitive_column=self.fair_feat)(model, self.X_val, self.y_val)==0 or
            p_percent_score(sensitive_column=self.fair_feat)(model, self.X_val))==0:
            return float('inf')


        if self.metric=='accuracy':
            perf = sklearn.metrics.accuracy_score(self.y_val, y_pred)
        elif self.metric=='equal_opportunity':
            perf = equal_opportunity_score(sensitive_column=self.fair_feat)(model, self.X_val, self.y_val)
        elif self.metric=='p_percent':
            perf = p_percent_score(sensitive_column=self.fair_feat)(model, self.X_val)
        elif self.metric=='c_variation':
            perf = 1/coefficient_of_variation(model, self.X_val, self.y_val)

        if perf>self.best_perf:
            self.best_perf = perf
            self.best_model = model

        return 1/perf if perf!=0 else float('inf')
Example #11
0
#mod_unfair = LogisticRegression(solver='lbfgs').fit(X, y)

from sklego.metrics import equal_opportunity_score
from sklearn.linear_model import LogisticRegression
import types

sensitive_classification_dataset = pd.DataFrame({
    "x1": [1, 0, 1, 0, 1, 0, 1, 1],
    "x2": [0, 0, 0, 0, 0, 1, 1, 1],
    "y": [1, 1, 1, 0, 1, 0, 0, 1]
})

X, y = sensitive_classification_dataset.drop(
    columns='y'), sensitive_classification_dataset['y']

mod_1 = types.SimpleNamespace()

mod_1.predict = lambda X: np.array([1, 0, 1, 0, 1, 0, 1, 1])
print(mod_1)
print('equal_opportunity_score:',
      equal_opportunity_score(sensitive_column="x2")(mod_1, X, y))

mod_1.predict = lambda X: np.array([1, 0, 1, 0, 1, 0, 0, 1])
print('equal_opportunity_score:',
      equal_opportunity_score(sensitive_column="x2")(mod_1, X, y))

mod_1.predict = lambda X: np.array([1, 0, 1, 0, 1, 0, 0, 0])

print('equal_opportunity_score:',
      equal_opportunity_score(sensitive_column="x2")(mod_1, X, y))