def _test_same(dataset): X, y = dataset if X.shape[1] == 1: # If we only have one column (which is also the sensitive one) we can't fit return True sensitive_cols = [0] X_without_sens = np.delete(X, sensitive_cols, axis=1) lr = LogisticRegression( penalty="none", solver="lbfgs", multi_class="ovr", dual=False, tol=1e-4, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None, max_iter=100, verbose=0, warm_start=False, n_jobs=None, l1_ratio=None, ) fair = EqualOpportunityClassifier( covariance_threshold=None, sensitive_cols=sensitive_cols, penalty="none", positive_target=True, ) fair.fit(X, y) lr.fit(X_without_sens, y) normal_pred = lr.predict_proba(X_without_sens) fair_pred = fair.predict_proba(X) np.testing.assert_almost_equal(normal_pred, fair_pred, decimal=2) assert np.sum( lr.predict(X_without_sens) != fair.predict(X)) / len(X) < 0.01
def _test_same(dataset): X, y = dataset if X.shape[1] == 1: # If we only have one column (which is also the sensitive one) we can't fit return True sensitive_cols = [0] X_without_sens = np.delete(X, sensitive_cols, axis=1) lr = LogisticRegression(penalty="none", solver="lbfgs") fair = EqualOpportunityClassifier( covariance_threshold=None, sensitive_cols=sensitive_cols, penalty="none", positive_target=True, ) fair.fit(X, y) lr.fit(X_without_sens, y) normal_pred = lr.predict_proba(X_without_sens) fair_pred = fair.predict_proba(X) np.testing.assert_almost_equal(normal_pred, fair_pred, decimal=2) assert np.sum( lr.predict(X_without_sens) != fair.predict(X)) / len(X) < 0.01
def objective(self, trial): C = trial.suggest_loguniform('C', 1e-5, 1e5) c = trial.suggest_loguniform('c', 1e-5, 1e5) #print(c, C) try: #if 1==1: if self.base_model=='equal': model = EqualOpportunityClassifier(sensitive_cols=self.fair_feat, positive_target=True, covariance_threshold=c, C=C, max_iter=10**3) model.fit(self.X_train, self.y_train) elif self.base_model=='demographic': model = DemographicParityClassifier(sensitive_cols=self.fair_feat, covariance_threshold=c, C=C, max_iter=10**3) model.fit(self.X_train, self.y_train) elif self.base_model=='minimax': a_train = self.X_train[self.fair_feat].copy().astype('int') a_val = self.X_val[self.fair_feat].copy().astype('int') a_train[a_train==0] = -1 a_val[a_val==0] = -1 model = SKLearn_Weighted_LLR(self.X_train.values, self.y_train.values, a_train.values, self.X_val.values, self.y_val.values, a_val.values, C_reg=C) mua_ini = np.ones(a_val.max() + 1) mua_ini /= mua_ini.sum() results = APSTAR(model, mua_ini, niter=200, max_patience=200, Kini=1, Kmin=20, alpha=0.5, verbose=False) mu_best_list = results['mu_best_list'] mu_best = mu_best_list[-1] model.weighted_fit(self.X_train.values, self.y_train.values, a_train.values, mu_best) else: raise('Incorrect base_model.') y_pred = model.predict(self.X_val) except: return float('inf') if (sklearn.metrics.accuracy_score(self.y_val, y_pred)==0 or equal_opportunity_score(sensitive_column=self.fair_feat)(model, self.X_val, self.y_val)==0 or p_percent_score(sensitive_column=self.fair_feat)(model, self.X_val))==0: return float('inf') if self.metric=='accuracy': perf = sklearn.metrics.accuracy_score(self.y_val, y_pred) elif self.metric=='equal_opportunity': perf = equal_opportunity_score(sensitive_column=self.fair_feat)(model, self.X_val, self.y_val) elif self.metric=='p_percent': perf = p_percent_score(sensitive_column=self.fair_feat)(model, self.X_val) elif self.metric=='c_variation': perf = 1/coefficient_of_variation(model, self.X_val, self.y_val) if perf>self.best_perf: self.best_perf = perf self.best_model = model return 1/perf if perf!=0 else float('inf')