예제 #1
0
    def decision_function(self, X):
        return self.project(X)

    def predict(self, X):
        return np.sign(self.project(X))

    def score(self, X_test, y_test):
        predict = self.predict(X_test)
        acc = accuracy_score(y_test, predict)
        return acc


if __name__ == "__main__":
    # Load Adult dataset (a smaller version!)
    dataset_train, dataset_test = load_adult(smaller=True)
    sensible_feature = 9  # GENDER
    sensible_feature_values = sorted(
        list(set(dataset_train.data[:, sensible_feature])))
    print('Different values of the sensible feature', sensible_feature, ':',
          sensible_feature_values)
    ntrain = len(dataset_train.target)

    # Standard SVM - Train an SVM using the training set
    print('Grid search for SVM...')
    grid_search_complete = 1
    if grid_search_complete:
        param_grid = [{
            'C': [0.1, 1, 10.0],
            'gamma': [0.1, 0.01],
            'kernel': ['rbf']
        #self.coef_ = self.model.coef_
        #self.intercept_ = self.model.intercept_


if __name__ == "__main__":
    experiment_number = 0
    if experiment_number == 0:
        dataset_train = load_binary_diabetes_uci()
        dataset_test = load_binary_diabetes_uci()
        sensible_feature = 1  # sex
    elif experiment_number == 1:
        dataset_train = load_heart_uci()
        dataset_test = load_heart_uci()
        sensible_feature = 1  # sex
    elif experiment_number == 2:
        dataset_train, dataset_test = load_adult(smaller=False)
        sensible_feature = 9  # sex
        print('Different values of the sensible feature', sensible_feature,
              ':', set(dataset_train.data[:, sensible_feature]))
    elif experiment_number == 3:
        dataset_train, dataset_test = load_adult_race(smaller=False)
        sensible_feature = 8  # race
        print('Different values of the sensible feature', sensible_feature,
              ':', set(dataset_train.data[:, sensible_feature]))

    if experiment_number in [0, 1]:
        # % for train
        ntrain = 5 * len(dataset_train.target) // 10
        dataset_train.data = dataset_train.data[:ntrain, :]
        dataset_train.target = dataset_train.target[:ntrain]
        dataset_test.data = dataset_test.data[ntrain:, :]
예제 #3
0
def experiment(dataset,
               frac,
               eval_objective,
               eps,
               rho_list,
               rho,
               eps_list,
               criteria,
               classifier,
               trials,
               include_sensible,
               filename,
               learner_name='lsq',
               mode='four',
               verbose=False):
    '''
    dataset: one of ['compas', 'bank', 'adult', 'law', 'german']. Default is 'compas'.
    frac: real number in interval [0, 1]. The fraction of the data points in chosen dataset to use.

    eval_objective: ['test_tau', 'test_rho_est_err']. 'test_tau' runs experiments between tau and error/fairness violation. 'test_rho_est_err' runs experiments between the estimated rho and error/fairness violation.
    eps: a number specifying the wanted fairness level. Valid when eval_objective='test_rho_est_err'.
    rho_list: a list of (rho_plut, rho_minus) pairs. Valid when eval_objective='test_rho_est_err'.
    rho: [a, b] where a, b in interval [0,0.5].
    eps_list: a list of non-negative real numbers. Valid when eval_objective='test_eps'.

    criteria: one of ['DP','EO']
    classifier: one of ['Agarwal', 'Zafar']. Agarwal is the default.
    trials: the number of trials to run.
    include_sensible: boolean. If to include sensitive attribute as a feature for optimizing the oroginal loss. This is used only for debugging purpose. It is hard-coded to be False now.
    filename: the file name to store the log of experiment(s).
    learner_name: ['lsq', 'LR', 'SVM']. SVM is the slowest. lsq does not work for law school dataset but it works reasonally well on all other datasets.
    mode: ['four']. Currently, we only support four. Valid when eval_objective='test_eps'.
    verbose: boolean. If print out info at each run.
    '''

    # We hard-code mode and classifier.
    mode = 'four'

    # classifier
    if classifier not in ['Agarwal', 'Zafar']:
        classifier = 'Agarwal'

    # We hard-code include_sensible to False.
    include_sensible = False

    sensible_name = None
    sensible_feature = None
    learner = None
    print('input dataset:', dataset)
    if dataset == 'adult':
        datamat = load_adult(frac)
        sensible_name = 'gender'
        sensible_feature = 9
    elif dataset == 'law':
        datamat = load_law(frac)
        sensible_name = 'racetxt'
        sensible_feature = 9
        # lsq does not work for law
        learner_name = 'LR'
    elif dataset == 'german':
        datamat = load_german(frac)
        sensible_name = 'Foreign'
        sensible_feature = 21
    elif dataset == 'bank':
        datamat = load_bank(frac)
        sensible_name = 'Middle_Aged'
        sensible_feature = 7
    else:
        datamat = load_compas(frac)
        sensible_name = 'race'
        sensible_feature = 4

    if learner_name == 'LR':
        learner = LR()
    elif learner_name == 'SVM':
        learner = SVM()
    else:
        learner = LeastSquaresLearner()

    print('eval_objective', eval_objective)
    print('learner_name:', learner_name)

    if eval_objective == 'test_rho_est_err':
        eps_list = [eps for _ in range(len(rho_list))]

    if criteria == 'EO':
        tests = [{"cons_class": moments.EO, "eps": eps} for eps in eps_list]
    else:
        tests = [{"cons_class": moments.DP, "eps": eps} for eps in eps_list]

    if eval_objective == 'test_rho_est_err':
        all_data = _experiment_est_error(datamat, tests, rho, rho_list, trials,
                                         sensible_name, sensible_feature,
                                         criteria, classifier,
                                         include_sensible, learner, mode,
                                         verbose)
        _save_all_data(filename, all_data, rho_list)
    else:
        all_data = _experiment(datamat, tests, rho, trials, sensible_name,
                               sensible_feature, criteria, classifier,
                               include_sensible, learner, mode, verbose)
        _save_all_data(filename, all_data, eps_list)

    return all_data