def decision_function(self, X): return self.project(X) def predict(self, X): return np.sign(self.project(X)) def score(self, X_test, y_test): predict = self.predict(X_test) acc = accuracy_score(y_test, predict) return acc if __name__ == "__main__": # Load Adult dataset (a smaller version!) dataset_train, dataset_test = load_adult(smaller=True) sensible_feature = 9 # GENDER sensible_feature_values = sorted( list(set(dataset_train.data[:, sensible_feature]))) print('Different values of the sensible feature', sensible_feature, ':', sensible_feature_values) ntrain = len(dataset_train.target) # Standard SVM - Train an SVM using the training set print('Grid search for SVM...') grid_search_complete = 1 if grid_search_complete: param_grid = [{ 'C': [0.1, 1, 10.0], 'gamma': [0.1, 0.01], 'kernel': ['rbf']
#self.coef_ = self.model.coef_ #self.intercept_ = self.model.intercept_ if __name__ == "__main__": experiment_number = 0 if experiment_number == 0: dataset_train = load_binary_diabetes_uci() dataset_test = load_binary_diabetes_uci() sensible_feature = 1 # sex elif experiment_number == 1: dataset_train = load_heart_uci() dataset_test = load_heart_uci() sensible_feature = 1 # sex elif experiment_number == 2: dataset_train, dataset_test = load_adult(smaller=False) sensible_feature = 9 # sex print('Different values of the sensible feature', sensible_feature, ':', set(dataset_train.data[:, sensible_feature])) elif experiment_number == 3: dataset_train, dataset_test = load_adult_race(smaller=False) sensible_feature = 8 # race print('Different values of the sensible feature', sensible_feature, ':', set(dataset_train.data[:, sensible_feature])) if experiment_number in [0, 1]: # % for train ntrain = 5 * len(dataset_train.target) // 10 dataset_train.data = dataset_train.data[:ntrain, :] dataset_train.target = dataset_train.target[:ntrain] dataset_test.data = dataset_test.data[ntrain:, :]
def experiment(dataset, frac, eval_objective, eps, rho_list, rho, eps_list, criteria, classifier, trials, include_sensible, filename, learner_name='lsq', mode='four', verbose=False): ''' dataset: one of ['compas', 'bank', 'adult', 'law', 'german']. Default is 'compas'. frac: real number in interval [0, 1]. The fraction of the data points in chosen dataset to use. eval_objective: ['test_tau', 'test_rho_est_err']. 'test_tau' runs experiments between tau and error/fairness violation. 'test_rho_est_err' runs experiments between the estimated rho and error/fairness violation. eps: a number specifying the wanted fairness level. Valid when eval_objective='test_rho_est_err'. rho_list: a list of (rho_plut, rho_minus) pairs. Valid when eval_objective='test_rho_est_err'. rho: [a, b] where a, b in interval [0,0.5]. eps_list: a list of non-negative real numbers. Valid when eval_objective='test_eps'. criteria: one of ['DP','EO'] classifier: one of ['Agarwal', 'Zafar']. Agarwal is the default. trials: the number of trials to run. include_sensible: boolean. If to include sensitive attribute as a feature for optimizing the oroginal loss. This is used only for debugging purpose. It is hard-coded to be False now. filename: the file name to store the log of experiment(s). learner_name: ['lsq', 'LR', 'SVM']. SVM is the slowest. lsq does not work for law school dataset but it works reasonally well on all other datasets. mode: ['four']. Currently, we only support four. Valid when eval_objective='test_eps'. verbose: boolean. If print out info at each run. ''' # We hard-code mode and classifier. mode = 'four' # classifier if classifier not in ['Agarwal', 'Zafar']: classifier = 'Agarwal' # We hard-code include_sensible to False. include_sensible = False sensible_name = None sensible_feature = None learner = None print('input dataset:', dataset) if dataset == 'adult': datamat = load_adult(frac) sensible_name = 'gender' sensible_feature = 9 elif dataset == 'law': datamat = load_law(frac) sensible_name = 'racetxt' sensible_feature = 9 # lsq does not work for law learner_name = 'LR' elif dataset == 'german': datamat = load_german(frac) sensible_name = 'Foreign' sensible_feature = 21 elif dataset == 'bank': datamat = load_bank(frac) sensible_name = 'Middle_Aged' sensible_feature = 7 else: datamat = load_compas(frac) sensible_name = 'race' sensible_feature = 4 if learner_name == 'LR': learner = LR() elif learner_name == 'SVM': learner = SVM() else: learner = LeastSquaresLearner() print('eval_objective', eval_objective) print('learner_name:', learner_name) if eval_objective == 'test_rho_est_err': eps_list = [eps for _ in range(len(rho_list))] if criteria == 'EO': tests = [{"cons_class": moments.EO, "eps": eps} for eps in eps_list] else: tests = [{"cons_class": moments.DP, "eps": eps} for eps in eps_list] if eval_objective == 'test_rho_est_err': all_data = _experiment_est_error(datamat, tests, rho, rho_list, trials, sensible_name, sensible_feature, criteria, classifier, include_sensible, learner, mode, verbose) _save_all_data(filename, all_data, rho_list) else: all_data = _experiment(datamat, tests, rho, trials, sensible_name, sensible_feature, criteria, classifier, include_sensible, learner, mode, verbose) _save_all_data(filename, all_data, eps_list) return all_data