def synth(): rn.seed(1) X_train, y_train, X_test, y_test = (torch.tensor(arr) for arr in get_train_test_synth( n=10000)) print('train: KL p(y | nonwhite), p(y | white)', calc_fairness(X_train, y_train)) print('test: KL p(y | nonwhite), p(y | white)', calc_fairness(X_test, y_test)) get_model = lambda: TorchLogisticRegression(X_train.shape[1]) trainer = Trainer(get_model, nn.BCELoss()) trainer.train(X_train, y_train, batch_size=1000, num_epochs=500, reg=0.0, verbose=False) unfair_model = trainer.model print('acc:', eval_acc(unfair_model, X_test, y_test)) print('model: KL p(y | nonwhite), p(y | white)', eval_fairness(unfair_model, X_train)) print('model: KL p(y | nonwhite), p(y | white)', eval_fairness(unfair_model, X_test)) calc_hvp_inv = lambda model, grads, reg: calc_log_reg_hvp_inverse( model, X_train, grads, reg=reg) s_tests = calc_s_tests(unfair_model, calc_hvp_inv, calc_log_reg_dkl_grad, X_test, y_test, reg=0.05) influences = calc_influences(unfair_model, calc_log_reg_grad, s_tests, X_train, y_train).squeeze() idxs_to_drop = (influences > 0).nonzero()[:, 0] not_idxs_to_drop = (influences <= 0).nonzero()[:, 0] trainer.retrain_leave_one_out(X_train, y_train, idxs_to_drop, reg=0.0, batch_size=1000, num_epochs=500, verbose=False) fair_model = trainer.model print('model retrain: KL p(y | nonwhite), p(y | white)', eval_fairness(fair_model, X_train)) print('model retrain: KL p(y | nonwhite), p(y | white)', eval_fairness(fair_model, X_test)) print('acc:', eval_acc(fair_model, X_test, y_test)) model = DecisionTreeClassifier(max_depth=2) model.fit(X_train, np.float32(influences > 0)) dot_data = tree.export_graphviz(model, out_file=None) graph = graphviz.Source(dot_data) graph.render("high_influence") print('most important feature is', np.argmax(model.feature_importances_))
def saf_predict_inf(): seed(1) raw_train, raw_test = get_train_test_saf() data = pd.concat([raw_train, raw_test]) X, y = [ torch.tensor(arr) for arr in prepare_saf( data, target=[ 'contrabn', 'adtlrept', 'pistol', 'riflshot', 'asltweap', 'knifcuti', 'machgun', 'othrweap' ], protected='race', primary='W', no_cats=True) ] X_train = X[:len(raw_train)] y_train = y[:len(raw_train)] X_test = X[len(raw_train):len(raw_train) + len(raw_test)] y_test = y[len(raw_train):len(raw_train) + len(raw_test)] _idxs = ((1 - X_train[:, -1]) + X_train[:, -1] * (1 - y_train)).nonzero().squeeze() X_train = X_train[_idxs] y_train = y_train[_idxs] infs = [] for pct in data.pct.unique(): infs.append( (pct, calc_fairness(X[(raw_train.pct == pct).nonzero()], y[(raw_train.pct == pct).nonzero()]).item())) print(sorted(infs, key=itemgetter(1))) pct = ~(raw_train.pct == 106) # pct = raw_train.pct < 70 not_pct = ~pct # not_pct = (raw_train.pct == 77) pct = pct.nonzero()[0] not_pct = not_pct.nonzero()[0] X_train = X[pct] y_train = y[pct] X_test = X[not_pct] y_test = y[not_pct] print('train: KL p(y | nonwhite), p(y | white)', calc_fairness(X_train, y_train)) print('test: KL p(y | nonwhite), p(y | white)', calc_fairness(X_test, y_test)) get_model = lambda: TorchLogisticRegression(X_train.shape[1]) trainer = Trainer(get_model, nn.BCELoss()) trainer.train(X_train, y_train, batch_size=1000, num_epochs=100, reg=0.0, verbose=False) unfair_model = trainer.model print('acc:', eval_acc(unfair_model, X_test, y_test)) print('model: KL p(y | nonwhite), p(y | white)', eval_fairness(unfair_model, X_train)) print('model: KL p(y | nonwhite), p(y | white)', eval_fairness(unfair_model, X_test)) calc_hvp_inv = lambda model, grads, reg: calc_log_reg_hvp_inverse( model, X_train, grads, reg=reg) s_tests = calc_s_tests(unfair_model, calc_hvp_inv, calc_log_reg_dkl_grad, X_test, y_test, reg=0.05) influences = calc_influences(unfair_model, calc_log_reg_grad, s_tests, X_train, y_train).squeeze() idxs_to_drop = (influences > 0).nonzero()[:, 0] not_idxs_to_drop = (influences <= 0).nonzero()[:, 0] trainer.retrain_leave_one_out(X_train, y_train, idxs_to_drop, reg=0.0, batch_size=1000, num_epochs=100, verbose=False) fair_model = trainer.model print('model retrain: KL p(y | nonwhite), p(y | white)', eval_fairness(fair_model, X_train)) print('model retrain: KL p(y | nonwhite), p(y | white)', eval_fairness(fair_model, X_test)) print('acc:', eval_acc(fair_model, X_test, y_test))
def saf_active_learning(): seed(1) raw_train, raw_test = get_train_test_saf() data = pd.concat([raw_train, raw_test]) X, y = [ torch.tensor(arr) for arr in prepare_saf( data, target=[ 'contrabn', 'adtlrept', 'pistol', 'riflshot', 'asltweap', 'knifcuti', 'machgun', 'othrweap' ], protected='race', primary='W', no_cats=True) ] for num in [1000, 800, 500, 10]: first_holdout_idx = int(X[:, -1].nonzero()[-num]) white_holdout = X[:, -1].nonzero()[-num:].squeeze() all_holdout = np.array( sample(range(first_holdout_idx, len(X)), len(white_holdout))) for method, holdout_idxs in (('white', white_holdout), ('all', all_holdout)): X_holdout = X[holdout_idxs] y_holdout = y[holdout_idxs] keep_idxs = np.array( list(set(range(len(X))) - set(holdout_idxs.tolist()))) X_sampled = X[keep_idxs] y_sampled = y[keep_idxs] get_model = lambda: TorchLogisticRegression(X_sampled.shape[1]) trainer = Trainer(get_model, nn.BCELoss()) trainer.train(X_sampled, y_sampled, batch_size=1000, num_epochs=100, reg=0.0, verbose=False) unfair_model = trainer.model print('sampling', method) print('num W withheld', num) print('acc:', eval_acc(unfair_model, X_sampled, y_sampled)) print('model: KL p(y | nonwhite), p(y | white)', eval_fairness(unfair_model, X_sampled), np.log(eval_fairness(unfair_model, X_sampled))) calc_hvp_inv = lambda model, grads, reg, X_sampled=X_sampled: calc_log_reg_hvp_inverse( model, X_sampled, grads, reg=reg) s_tests = calc_s_tests(unfair_model, calc_hvp_inv, calc_log_reg_dkl_grad, X_sampled, y_sampled, reg=0.05) influences = calc_influences(unfair_model, calc_log_reg_grad, s_tests, X_sampled, y_sampled).squeeze() plt.figure() plt.hist(np.log( np.abs(influences[X_sampled[:, -1].nonzero()].squeeze())), bins=100, label='W', density=True) plt.hist(np.log( np.abs(influences[( ~X_sampled[:, -1].byte()).nonzero()].squeeze())), bins=100, label='~W', density=True) plt.legend() plt.savefig('withhold_' + str(num) + method + '.png') plt.close() calc_hvp_inv = lambda model, grads, reg, X=X: calc_log_reg_hvp_inverse( model, X, grads, reg=reg) s_tests = calc_s_tests(unfair_model, calc_hvp_inv, calc_log_reg_dkl_grad, X, y, reg=0.05) influences = calc_influences(unfair_model, calc_log_reg_grad, s_tests, X, y).squeeze()
def adult_analysis(): raw_train, raw_test = get_train_test_adult() raw_train = raw_train[[ 'age', 'fnlwgt', 'education-num', 'race', 'capital-gain', 'capital-loss', 'hours-per-week', 'sex', 'target' ]] raw_test = raw_test[[ 'age', 'fnlwgt', 'education-num', 'race', 'capital-gain', 'capital-loss', 'hours-per-week', 'sex', 'target' ]] X, y = [ torch.tensor(arr) for arr in prepare_adult( pd.concat([raw_train, raw_test]), protected='sex', primary='Male') ] X_train = X[:len(raw_train)] y_train = y[:len(raw_train)] X_test = X[len(raw_train):len(raw_train) + len(raw_test)] y_test = y[len(raw_train):len(raw_train) + len(raw_test)] get_model = lambda: TorchLogisticRegression(X_train.shape[1]) trainer = Trainer(get_model, nn.BCELoss()) trainer.train(X_train, y_train, batch_size=1000, num_epochs=100, reg=0.0, verbose=False) unfair_model = trainer.model print('acc:', eval_acc(unfair_model, X_test, y_test)) print('train: KL p(y | female), p(y | male)', calc_fairness(X_train, y_train)) print('test: KL p(y | female), p(y | male)', calc_fairness(X_test, y_test)) print('model: KL p(y | female), p(y | male)', eval_fairness(unfair_model, X_test)) if '--retrain' in sys.argv: impacts = make_more_fair_retrain(trainer, X_train, y_train, X_test, reg=0.0, batch_size=1000, num_epochs=1, verbose=False) retrain_fair_model = trainer.model idxs_to_drop = (impacts > eval_fairness(unfair_model, X_test)).nonzero()[:, 0] trainer.retrain_leave_one_out(X_train, y_train, idxs_to_drop, reg=0.0, batch_size=1000, num_epochs=100, verbose=False) neg_retrain_fair_model = trainer.model print('model loo retrain: KL p(y | female), p(y | male)', eval_fairness(neg_retrain_fair_model, X_test)) print('acc:', eval_acc(neg_retrain_fair_model, X_test, y_test)) else: calc_hvp_inv = lambda model, grads, reg: calc_log_reg_hvp_inverse( model, X_train, grads, reg=reg) calc_grad = lambda model, data, target: calc_log_reg_grad( model, data, target) calc_dkl_grad = lambda model, data, target: calc_log_reg_dkl_grad( model, data, target) s_tests = calc_s_tests(unfair_model, calc_hvp_inv, calc_dkl_grad, X_test, y_test, reg=0.06) influences = calc_influences(unfair_model, calc_grad, s_tests, X_train, y_train).squeeze() idxs_to_drop = (influences > 0).nonzero()[:, 0] trainer.retrain_leave_one_out(X_train, y_train, idxs_to_drop, reg=0.0, batch_size=1000, num_epochs=100, verbose=False) fair_model = trainer.model print('model retrain: KL p(y | female), p(y | male)', eval_fairness(fair_model, X_test)) print('acc:', eval_acc(fair_model, X_test, y_test)) female = ((X_train[:, -1].byte()).int() & y_train.int()).nonzero().squeeze() trainer.retrain_leave_one_out(X_train, y_train, female[:len(idxs_to_drop)], reg=0.0, batch_size=1000, num_epochs=100, verbose=False) female_model = trainer.model print('female model retrain: KL p(y | female), p(y | male)', eval_fairness(female_model, X_test)) print('acc:', eval_acc(female_model, X_test, y_test)) fisher_vectors = get_fisher_vectors(unfair_model, X_train, y_train) mask = torch.ones(len(X_train), dtype=torch.uint8) mask[idxs_to_drop] = 0 helpful, hurtful = scatter_dists(fisher_vectors[idxs_to_drop], fisher_vectors[mask]) plt.close() plt.figure() # plt.scatter(*helpful.T, c='blue', s=1, label='helpful') # plt.scatter(*hurtful.T, c='red', s=1, label='harmful') sns.scatterplot(*helpful[torch.empty( len(helpful)).bernoulli(0.1).nonzero().squeeze()].T, label='helpful', palette="Set2", marker='+', s=30) sns.scatterplot(*hurtful[torch.empty( len(hurtful)).bernoulli(0.1).nonzero().squeeze()].T, label='hurtful', palette="Set2", marker='x', s=30) plt.title( 'First two principal directions of Fisher embeddings of training points ' ) plt.legend() plt.savefig('./fish_pca_hurtful.png') # pd.Series(X_train[:, -1][idxs] == y_train[idxs]).rolling(1000).mean().plot() # percentage in the protected class AND less than 50k remain = raw_train.iloc[list( set(range(len(X_train))) - set(idxs_to_drop.tolist()))] remain_X = X_train[list( set(range(len(X_train))) - set(idxs_to_drop.tolist()))] remain_y = y_train[list( set(range(len(y_train))) - set(idxs_to_drop.tolist()))] orig_corrs = [ np.corrcoef(X_train[:, i], y_train)[0, 1] for i in range(X_train.shape[1]) ] remain_corrs = [ np.corrcoef(remain_X[:, i], remain_y)[0, 1] for i in range(remain_X.shape[1]) ]
def saf_provenance_analysis(): seed(1) raw_train, raw_test = get_train_test_saf() data = pd.concat([raw_train, raw_test]) X, y = [ torch.tensor(arr) for arr in prepare_saf( data, target=[ 'contrabn', 'adtlrept', 'pistol', 'riflshot', 'asltweap', 'knifcuti', 'machgun', 'othrweap' ], protected='race', primary='W', no_cats=True) ] get_model = lambda: TorchLogisticRegression(X.shape[1]) trainer = Trainer(get_model, nn.BCELoss()) trainer.train(X, y, batch_size=1000, num_epochs=100, reg=0.0, verbose=False) unfair_model = trainer.model print('acc:', eval_acc(unfair_model, X, y)) print('model: KL p(y | nonwhite), p(y | white)', eval_fairness(unfair_model, X)) influences_by_pct = {} pct_nums = data.pct.unique() for pct_num in pct_nums: pct = raw_train.pct == pct_num not_pct = ~pct pct = pct.nonzero()[0] not_pct = not_pct.nonzero()[0] X_train_pct = X[pct] y_train_pct = y[pct] X_test_pct = X[not_pct] y_test_pct = y[not_pct] calc_hvp_inv = lambda model, grads, reg, X_train_pct=X_train_pct: calc_log_reg_hvp_inverse( model, X_train_pct, grads, reg=reg) s_tests = calc_s_tests(unfair_model, calc_hvp_inv, calc_log_reg_dkl_grad, X_test_pct, y_test_pct, reg=0.05) influences = calc_influences(unfair_model, calc_log_reg_grad, s_tests, X_train_pct, y_train_pct).squeeze() pct_influence = influences.sum() influences_by_pct[pct_num] = pct_influence.item() frisk_white_by_pct = { pct_num: ((data.race[data.pct == pct_num] == 'W') & y[(data.pct == pct_num).nonzero()].byte()).mean() for pct_num in pct_nums } pct_ranked_by_influence = sorted(influences_by_pct.items(), key=itemgetter(1)) pct_ranked_by_frisk_white = sorted(frisk_white_by_pct.items(), key=itemgetter(1)) print( kendalltau(*map(colgetter(0), (pct_ranked_by_influence, pct_ranked_by_frisk_white)))) for r in ['W', 'B']: print(r, 'y overall', ((data.race == r) & y.byte()).values.sum() / (data.race == r).values.sum()) print(r, 'not y overall', ((data.race == r) & pd.Series(~y.byte())).values.sum() / (data.race == r).values.sum()) pct = 106 print(r, f'y pct {pct}', ((data.pct == pct) & (data.race == r) & pd.Series(y.byte())).values.sum() / ((data.pct == pct) & (data.race == r)).values.sum()) print(r, 'not y pct 106', ((data.pct == pct) & (data.race == r) & pd.Series(~y.byte())).values.sum() / ((data.pct == pct) & (data.race == r)).values.sum()) calc_hvp_inv = lambda model, grads, reg, X=X: calc_log_reg_hvp_inverse( model, X, grads, reg=reg) s_tests = calc_s_tests(unfair_model, calc_hvp_inv, calc_log_reg_dkl_grad, X, y, reg=0.05) influences = calc_influences(unfair_model, calc_log_reg_grad, s_tests, X, y).squeeze() # fisher_vectors = get_fisher_vectors(unfair_model, X, y) model = DecisionTreeClassifier(max_depth=3) # model.fit(fisher_vectors, influences > 0) model.fit(np.concatenate([X, y[:, np.newaxis]], 1), influences > 0) dot_data = tree.export_graphviz(model, out_file=None) graph = graphviz.Source(dot_data) graph.render("iris") plt.hist(influences[X[:, -1].nonzero()].squeeze(), bins=100, label='W', density=True) plt.hist(influences[(~X[:, -1].byte()).nonzero()].squeeze(), bins=100, label='~W', density=True) plt.legend() plt.show() corrs = {} for col_name in data.columns: if col_name == 'race': corr, p = spearmanr(data[col_name] == 'W', influences) corrs[col_name] = 0.0 if np.isnan(corr) else corr try: # corrs[col_name] = pearsonr(data[col_name] == 'Y', data.race == 'W')[0] corr, p = spearmanr(data[col_name] == 'Y', influences) corrs[col_name] = 0.0 if np.isnan(corr) else corr except: pass cols_by_corr = sorted(corrs.items(), key=itemgetter(1))
def saf_analysis(): seed(1) raw_train, raw_test = get_train_test_saf() data = pd.concat([raw_train, raw_test]) X, y = [ torch.tensor(arr) for arr in prepare_saf( data, target='frisked', protected='race', primary='W') ] # X_train = X[:len(raw_train)] # y_train = y[:len(raw_train)] # X_test = X[len(raw_train):len(raw_train) + len(raw_test)] # y_test = y[len(raw_train):len(raw_train) + len(raw_test)] pct = ~(raw_train.pct == 40) # pct = raw_train.pct < 70 not_pct = ~pct pct = pct.nonzero()[0] not_pct = not_pct.nonzero()[0] X_train = X[pct] y_train = y[pct] X_test = X[not_pct] y_test = y[not_pct] print('train: KL p(y | nonwhite), p(y | white)', calc_fairness(X_train, y_train)) print('test: KL p(y | nonwhite), p(y | white)', calc_fairness(X_test, y_test)) get_model = lambda: TorchLogisticRegression(X_train.shape[1]) trainer = Trainer(get_model, nn.BCELoss()) trainer.train(X_train, y_train, batch_size=1000, num_epochs=100, reg=0.0, verbose=False) unfair_model = trainer.model print('acc:', eval_acc(unfair_model, X_test, y_test)) print('model: KL p(y | nonwhite), p(y | white)', eval_fairness(unfair_model, X_train)) print('model: KL p(y | nonwhite), p(y | white)', eval_fairness(unfair_model, X_test)) calc_hvp_inv = lambda model, grads, reg: calc_log_reg_hvp_inverse( model, X_train, grads, reg=reg) s_tests = calc_s_tests(unfair_model, calc_hvp_inv, calc_log_reg_dkl_grad, X_test, y_test, reg=0.05) influences = calc_influences(unfair_model, calc_log_reg_grad, s_tests, X_train, y_train).squeeze() idxs_to_drop = (influences > 0).nonzero()[:, 0] not_idxs_to_drop = (influences <= 0).nonzero()[:, 0] trainer.retrain_leave_one_out(X_train, y_train, idxs_to_drop, reg=0.0, batch_size=1000, num_epochs=100, verbose=False) fair_model = trainer.model print('model retrain: KL p(y | nonwhite), p(y | white)', eval_fairness(fair_model, X_train)) print('model retrain: KL p(y | nonwhite), p(y | white)', eval_fairness(fair_model, X_test)) print('acc:', eval_acc(fair_model, X_test, y_test)) nonwhite = ((X_train[:, -1].byte()).int() & y_train.int()).nonzero().squeeze() trainer.retrain_leave_one_out(X_train, y_train, nonwhite[:len(idxs_to_drop)], reg=0.0, batch_size=1000, num_epochs=100, verbose=False) nonwhite_model = trainer.model print('nonwhite model retrain: KL p(y | nonwhite), p(y | white)', eval_fairness(nonwhite_model, X_train)) print('nonwhite model retrain: KL p(y | nonwhite), p(y | white)', eval_fairness(nonwhite_model, X_test)) print('acc:', eval_acc(nonwhite_model, X_test, y_test)) infs, idxs = torch.sort(influences, descending=True) raw_train.iloc[idxs[:100]]