def run_clean(fairness_constraints): print(f"Start running experiment with clean data.") unmitigated_predictor = LogisticRegression(solver='liblinear', fit_intercept=True) # unmitigated_predictor.fit(X_train, Y_train) unmitigated_predictor.fit(X_train, Y_train) sweep = GridSearch(LogisticRegression(solver='liblinear', fit_intercept=True), constraints=EqualizedOdds(), grid_size=71) sweep.fit(X_train, Y_train, sensitive_features=A_train) predictors = [unmitigated_predictor ] + [z.predictor for z in sweep.all_results] all_results_train, all_results_test = [], [] for predictor in predictors: prediction_train = predictor.predict(X_train) prediction_test = predictor.predict(X_test) all_results_train.append({ 'accuracy': accuracy(prediction_train, Y_train), 'violation': violation(prediction_train, Y_train, A_train) }) all_results_test.append({ 'accuracy': accuracy(prediction_test, Y_test), 'violation': violation(prediction_test, Y_test, A_test) }) # print(all_results_train) # print(all_results_test) best_train, best_test = [], [] for constraint in fairness_constraints: best = 0.0 for result in all_results_train: if result['violation'] <= constraint and result['accuracy'] > best: best = result['accuracy'] best_train.append(best) best = 0.0 for result in all_results_test: if result['violation'] <= constraint and result['accuracy'] > best: best = result['accuracy'] best_test.append(best) return best_train, best_test
def run_corrupt(fairness_constraints): all_results = {} all_results['eps'] = fairness_constraints all_results['accuracy'] = { 'train': [], 'test': [] } all_results['violation'] = { 'train': [], 'test': [] } all_results['violation_male'] = { 'train': [], 'test': [] } all_results['violation_female'] = { 'train': [], 'test': [] } for eps in fairness_constraints: begin = time.time() print(f"[INFO][RUN] Corrupt") sweep = ExponentiatedGradient(LogisticRegression(solver='liblinear', fit_intercept=True), constraints=EqualizedOdds(), eps=eps) try: sweep.fit(X_train, Y_noised, sensitive_features=A_train) prediction_train = sweep.predict(X_train) prediction_test = sweep.predict(X_test) except: print(f"Fairlearn can't fit at fairness constraint {eps}") pass all_results['accuracy']['train'].append(accuracy(prediction_train, Y_train)) all_results['accuracy']['test'].append(accuracy(prediction_test, Y_test)) all_results['violation']['train'].append(violation(prediction_train, Y_train, A_train)) all_results['violation']['test'].append(violation(prediction_test, Y_test, A_test)) all_results['violation_male']['train'].append(violation(prediction_train, Y_train, A_train, grp=1)) all_results['violation_male']['test'].append(violation(prediction_test, Y_test, A_test, grp=1)) all_results['violation_female']['train'].append(violation(prediction_train, Y_train, A_train, grp=0)) all_results['violation_female']['test'].append(violation(prediction_test, Y_test, A_test, grp=0)) print(f"Running fairness constraint: {eps}, Training Accuracy: {all_results['accuracy']['train'][-1]}, Test Accuracy: {all_results['accuracy']['test'][-1]}, Training Violation: {all_results['violation']['train'][-1]}, Test Violation: {all_results['violation']['test'][-1]}, Time cost: {time.time() - begin}") acc = np.array(all_results['accuracy']['test']) v = np.array(all_results['violation']['test']) all_results['accuracy']['mean'] = acc.mean() all_results['accuracy']['std'] = acc.std() all_results['violation']['mean'] = v.mean() all_results['violation']['std'] = v.std() return all_results
def run(fairness_constraints, use_proxy=False): print(f"Start running experiment with Proxy: {use_proxy}.") all_results = {} all_results['eps'] = fairness_constraints all_results['accuracy'] = {'train': [], 'test': []} all_results['violation'] = {'train': [], 'test': []} all_results['violation_male'] = {'train': [], 'test': []} all_results['violation_female'] = {'train': [], 'test': []} for eps in fairness_constraints: begin = time.time() if use_proxy: sweep = ExponentiatedGradient( LogisticRegression(solver='liblinear', fit_intercept=True), constraints=ProxyEqualizedOdds(error_rate=error_rate), eps=eps) else: sweep = ExponentiatedGradient(LogisticRegression( solver='liblinear', fit_intercept=True), constraints=EqualizedOdds(), eps=eps) try: sweep.fit(X_train, Y_noised, sensitive_features=A_train) prediction_train = sweep.predict(X_train) prediction_test = sweep.predict(X_test) except: print(f"Fairlearn can't fit at fairness constraint {eps}") pass all_results['accuracy']['train'].append( accuracy(prediction_train, Y_train)) all_results['accuracy']['test'].append( accuracy(prediction_test, Y_test)) all_results['violation']['train'].append( violation(prediction_train, Y_train, A_train)) all_results['violation']['test'].append( violation(prediction_test, Y_test, A_test)) all_results['violation_male']['train'].append( violation(prediction_train, Y_train, A_train, grp=1)) all_results['violation_male']['test'].append( violation(prediction_test, Y_test, A_test, grp=1)) all_results['violation_female']['train'].append( violation(prediction_train, Y_train, A_train, grp=0)) all_results['violation_female']['test'].append( violation(prediction_test, Y_test, A_test, grp=0)) print( f"Running fairness constraint: {eps}, Training Accuracy: {all_results['accuracy']['train']}, Test Accuracy: {all_results['accuracy']['test']}, Training Violation: {all_results['violation']['train']}, Test Violation: {all_results['violation']['test']}, Time cost: {time.time() - begin}" ) return all_results
def run_surrogate(fairness_constraints, est=False): print(f"[INFO][RUN] Surrogate Loss.") all_results = {} all_results['eps'] = fairness_constraints all_results['accuracy'] = { 'train': [], 'test': [] } all_results['violation'] = { 'train': [], 'test': [] } all_results['violation_male'] = { 'train': [], 'test': [] } all_results['violation_female'] = { 'train': [], 'test': [] } for eps in fairness_constraints: begin = time.time() if not est: surrogate_clf = SurrogateLoss(clf=LogisticRegression(solver='liblinear', fit_intercept=True), noise_matrix=noise_matrix) else: surrogate_clf = SurrogateLoss(clf=LogisticRegression(solver='liblinear', fit_intercept=True)) sweep = ExponentiatedGradient(surrogate_clf, constraints=ProxyEqualizedOdds(error_rate=error_rate), eps=eps) sweep.fit(X_train, Y_noised, sensitive_features=A_train) prediction_train = sweep.predict(X_train) prediction_test = sweep.predict(X_test) all_results['accuracy']['train'].append(accuracy(prediction_train, Y_train)) all_results['accuracy']['test'].append(accuracy(prediction_test, Y_test)) all_results['violation']['train'].append(violation(prediction_train, Y_train, A_train)) all_results['violation']['test'].append(violation(prediction_test, Y_test, A_test)) all_results['violation_male']['train'].append(accuracy(prediction_train, Y_train)) all_results['violation_male']['test'].append(accuracy(prediction_test, Y_test)) all_results['violation_female']['train'].append(accuracy(prediction_train, Y_train)) all_results['violation_female']['test'].append(accuracy(prediction_test, Y_test)) print(f"Running fairness constraint: {eps}, Training Accuracy: {all_results['accuracy']['train'][-1]}, Test Accuracy: {all_results['accuracy']['test'][-1]}, Training Violation: {all_results['violation']['train'][-1]}, Test Violation: {all_results['violation']['test'][-1]}, Time cost: {time.time() - begin}") acc = np.array(all_results['accuracy']['test']) v = np.array(all_results['violation']['test']) all_results['accuracy']['mean'] = acc.mean() all_results['accuracy']['std'] = acc.std() all_results['violation']['mean'] = v.mean() all_results['violation']['std'] = v.std() return all_results
def run_peerloss(fairness_constraints, alpha=0.5, est=False): print(f"[INFO][RUN] Peer Loss with alpha = {alpha}") all_results = {} all_results['eps'] = fairness_constraints all_results['accuracy'] = { 'train': [], 'test': [] } all_results['violation'] = { 'train': [], 'test': [] } all_results['violation_male'] = { 'train': [], 'test': [] } all_results['violation_female'] = { 'train': [], 'test': [] } if est: delta = [1 - est_error_rate[i][0] - est_error_rate[i][1] for i in range(len(est_error_rate))] else: delta = [1 - error_rate[i][0] - error_rate[i][1] for i in range(len(error_rate))] for eps in fairness_constraints: begin = time.time() sweep = ExponentiatedGradient(PeerLoss(A_train, delta=delta, alpha=alpha), constraints=EqualizedOdds(), eps=eps) sweep.fit(X_train, Y_noised, sensitive_features=A_train) prediction_train = sweep.predict(X_train) prediction_test = sweep.predict(X_test) all_results['accuracy']['train'].append(accuracy(prediction_train, Y_train)) all_results['accuracy']['test'].append(accuracy(prediction_test, Y_test)) all_results['violation']['train'].append(violation(prediction_train, Y_train, A_train)) all_results['violation']['test'].append(violation(prediction_test, Y_test, A_test)) all_results['violation_male']['train'].append(accuracy(prediction_train, Y_train)) all_results['violation_male']['test'].append(accuracy(prediction_test, Y_test)) all_results['violation_female']['train'].append(accuracy(prediction_train, Y_train)) all_results['violation_female']['test'].append(accuracy(prediction_test, Y_test)) print(f"Running fairness constraint: {eps}, Training Accuracy: {all_results['accuracy']['train'][-1]}, Test Accuracy: {all_results['accuracy']['test'][-1]}, Training Violation: {all_results['violation']['train'][-1]}, Test Violation: {all_results['violation']['test'][-1]}, Time cost: {time.time() - begin}") acc = np.array(all_results['accuracy']['test']) v = np.array(all_results['violation']['test']) all_results['accuracy']['mean'] = acc.mean() all_results['accuracy']['std'] = acc.std() all_results['violation']['mean'] = v.mean() all_results['violation']['std'] = v.std() return all_results
def run_estimation(fairness_constraints, isEstimate=True): def NearestNeighbor(X, A, i): # print(X_train.shape) distance = max(np.linalg.norm(X[i] - X[0]), np.linalg.norm(X[i] - X[1])) nn = 0 for j in range(len(X)): if i == j: continue if A[i] == A[j] and np.linalg.norm(X[i] - X[j]) < distance: distance = np.linalg.norm(X[i] - X[j]) nn = j return nn def estimate_delta(X, A, Y): c1 = np.array([0., 0.]) t = np.array([0., 0.]) num = np.array([0., 0.]) for i in range(len(X)): num[int(A[i])] += 1. if Y[i] == 1: j = NearestNeighbor(X, A, i) # print(i, j) t[int(A[i])] += Y[i] == Y[j] c1[int(A[i])] += 1 c1 = 2 * c1 / num c2 = 2 * t / num print(f"c1: {c1}, c2: {c2}") return np.sqrt(2 * c2 - c1 * c1) if isEstimate: print(f"Start running proxy fairness constraint with estimated delta.") delta = estimate_delta(X_train.values, A_train.values, Y_noised) print(f"Estimated delta is {delta}.") else: print("Start running proxy fairness constraint with known delta.") delta = np.array([ 1 - error_rate[0][0] - error_rate[0][1], 1 - error_rate[1][0] - error_rate[1][1] ]) print(f"The known delta is {delta}.") all_results = {} all_results['eps'] = fairness_constraints all_results['accuracy'] = {'train': [], 'test': []} all_results['violation'] = {'train': [], 'test': []} all_results['violation_male'] = {'train': [], 'test': []} all_results['violation_female'] = {'train': [], 'test': []} for eps in fairness_constraints: begin = time.time() sweep = ExponentiatedGradient( LogisticRegression(solver='liblinear', fit_intercept=True), constraints=ProxyEqualizedOdds2(delta=delta), eps=eps) try: sweep.fit(X_train, Y_noised, sensitive_features=A_train) prediction_train = sweep.predict(X_train) prediction_test = sweep.predict(X_test) except: print(f"Fairlearn can't fit at fairness constraint {eps}") pass all_results['accuracy']['train'].append( accuracy(prediction_train, Y_train)) all_results['accuracy']['test'].append( accuracy(prediction_test, Y_test)) all_results['violation']['train'].append( violation(prediction_train, Y_train, A_train)) all_results['violation']['test'].append( violation(prediction_test, Y_test, A_test)) all_results['violation_male']['train'].append( violation(prediction_train, Y_train, A_train, grp=1)) all_results['violation_male']['test'].append( violation(prediction_test, Y_test, A_test, grp=1)) all_results['violation_female']['train'].append( violation(prediction_train, Y_train, A_train, grp=0)) all_results['violation_female']['test'].append( violation(prediction_test, Y_test, A_test, grp=0)) print( f"Running fairness constraint: {eps}, Training Accuracy: {all_results['accuracy']['train']}, Test Accuracy: {all_results['accuracy']['test']}, Training Violation: {all_results['violation']['train']}, Test Violation: {all_results['violation']['test']}, Time cost: {time.time() - begin}" ) return all_results
for i in range(len(alphas)): alpha = alphas[i] for eps in fairness_constraints: sweep = ExponentiatedGradient( PeerLoss(A_train, delta, alpha=alpha), constraints=ProxyEqualizedOdds(error_rate=error_rate), # constraints=EqualizedOdds(), eps=eps) sweep.fit(X_train, Y_noised, sensitive_features=A_train) prediction_train = sweep.predict(X_train) prediction_test = sweep.predict(X_test) accuracy_train = accuracy(prediction_train, Y_train) accuracy_test = accuracy(prediction_test, Y_test) violation_train = violation(prediction_train, Y_train, A_train) violation_test = violation(prediction_test, Y_test, A_test) all_results_train[i].append(accuracy_train) all_results_test[i].append(accuracy_test) print( f"Running alpha {alpha}, fairness constraint {eps}, Train Accuracy {accuracy_train}, Test Accuracy {accuracy_test}, Train Violation {violation_train}, Test Violation {violation_test}." ) fp.write( f"{alpha},{eps},{accuracy_train},{accuracy_test},{violation_train},{violation_test}\n" ) fp.close() plt.style.use('seaborn') for i in range(len(alphas)): plt.plot(fairness_constraints,