def RL(path,response,sensitive,atr,demo_option,r,b,alpha,rnd,data_option,flag_demo): demo_test=[] _Xl,_Xl_s,n,m,_yl,_Xu,_Xu_s,_yu,_Xt,_Xt_s,_yt,_Cset,_Cset_s,_Cset_y=pr.data_prep(path,response,sensitive,atr,r,rnd,data_option,flag_demo) index = np.arange(len(_Xu)) rnd_id = np.random.choice(index,b) _Xl = np.append(_Xl,_Xu[rnd_id],axis=0) _Xl_s = np.append(_Xl_s,_Xu_s[rnd_id],axis=0) _yl=np.append(_yl,_yu[rnd_id],axis=0) clf = LogisticRegression(solver= 'liblinear').fit(_Xl, _yl) theta = clf.coef_.T score=clf.score(_Xt,_yt) demo_test=np.append(demo_test,dm.Demo(_Xt,_Xt_s,_yt,clf=clf,option=demo_option)) return demo_test,_Xl,clf,score
def AL(path, response, sensitive, atr, demo_option, r, b, alpha, rnd, data_option): demo_test = [] demo_cset = [] _Xl, _Xl_s, n, m, _yl, _Xu, _Xu_s, _yu, _Xt, _Xt_s, _yt, _Cset, _Cset_s, _Cset_y = pr.data_prep( path, response, sensitive, atr, r, rnd, data_option) overall_score = [] # train the model for the first time clf = LogisticRegression(solver='liblinear').fit(_Xl, _yl) theta = clf.coef_.T for Iter in range(b): u = len(_Xu) E_corr = np.zeros(u) a = alpha[min(10, math.floor(Iter / int(b / 11)))] # record stats to be reported -- these lines are added for the purpose of experiments and are not part of the algorithm score = clf.score(_Xt, _yt) overall_score = np.append(overall_score, score) demo_test = np.append( demo_test, dm.Demo(_Xt, _Xt_s, _yt, clf=clf, option=demo_option)) #compute the entropy for all instances in U probas_val = clf.predict_proba(_Xu) e = (-probas_val * np.log2(probas_val)).sum(axis=1) e_all = (e) # find the argmax and label it selection = np.argsort(e_all)[::-1][0] _Xl = np.append(_Xl, [_Xu[selection]], axis=0) _Xl_s = np.append(_Xl_s, [_Xu_s[selection]], axis=0) _yl = np.append(_yl, [_yu[selection]], axis=0) # update the model and U clf = LogisticRegression(solver='liblinear').fit(_Xl, _yl) _Xu = np.delete(_Xu, selection, 0) _yu = np.delete(_yu, selection, 0) return demo_test, _Xl, _Xl_s, _yl, clf, overall_score
def RL(path, response, sensitive, atr, demo_option, r, b, alpha, rnd, data_option, flag_demo): demo_test = [] f1score = [] _Xl, _Xl_s, n, m, _yl, _Xu, _Xu_s, _yu, _Xt, _Xt_s, _yt, _Cset, _Cset_s, _Cset_y = pr.data_prep( path, response, sensitive, atr, r, rnd, data_option, flag_demo) index = np.arange(len(_Xu)) rnd_id = np.random.choice(index, b) _Xl = np.append(_Xl, _Xu[rnd_id], axis=0) _Xl_s = np.append(_Xl_s, _Xu_s[rnd_id], axis=0) _yl = np.append(_yl, _yu[rnd_id], axis=0) clf = DemographicParityClassifier(sensitive_cols=-1, covariance_threshold=0.5) _Xl_with_s = np.append(_Xl, _Xl_s[:, None], axis=1) clf.fit(_Xl_with_s, _yl) _Xt_with_s = np.append(_Xt, _Xt_s[:, None], axis=1) _Cset_with_s = np.append(_Cset, _Cset_s[:, None], axis=1) score = clf.score(_Xt_with_s, _yt) demo_test = demo_test = dm.Demo(_Xt_with_s, _Xt_s, _yt, clf=clf, option=demo_option) return demo_test, _Xl, _Xl_s, _yl, clf, score, f1score
def FAL(path, response, sensitive, atr, demo_option, r, b, alpha, rnd, data_option, flag_demo): demo_test = [] demo_cset = [] Time = np.zeros(b) _Xl, _Xl_s, n, m, _yl, _Xu, _Xu_s, _yu, _Xt, _Xt_s, _yt, _Cset, _Cset_s, _Cset_y = pr.data_prep( path, response, sensitive, atr, r, rnd, data_option, flag_demo) overall_score = [] f1score = [] # train the model for the first time clf = LogisticRegression(solver='liblinear').fit(_Xl, _yl) t1 = time() for Iter in range(b): u = len(_Xu) E_corr = np.zeros(u) # print("Iteration:", Iter) a = alpha[min(10, math.floor(Iter / int(b / 11)))] score = clf.score(_Xt, _yt) overall_score = np.append(overall_score, score) y_pred = clf.predict(_Xt) demo_test = np.append( demo_test, dm.Demo(_Xt, _Xt_s, _yt, clf=clf, option=demo_option)) demo_cset = dm.Demo(_Cset, _Cset_s, _Cset_y, clf=clf, option=demo_option) probas_val = clf.predict_proba(_Xu) e = (-probas_val * np.log2(probas_val)).sum(axis=1) for j in range(0, u): f_tmp = [] for k in range(0, 2): _Xl_tmp = np.append(_Xl, [_Xu[j]], axis=0) _yl_tmp = np.append(_yl, [k], axis=0) clf_tmp = LogisticRegression(solver='liblinear').fit( _Xl_tmp, _yl_tmp) f_tmp = np.append( f_tmp, dm.Demo(_Cset, _Cset_s, _Cset_y, clf=clf_tmp, option=demo_option)) f_tmp[np.isnan(f_tmp)] = 0 p = clf.predict_proba(_Xu)[j][0] E_corr[j] = (f_tmp).dot([p, 1 - p]) E_corr_scaled = ((E_corr.max() - E_corr) / (E_corr.max() - E_corr.min())) E_corr_scaled[np.isnan(E_corr_scaled)] = 0 e_all = ((e[0:u] - e[0:u].min()) / (e[0:u].max() - e[0:u].min())) e_all = a * e_all + (1 - a) * E_corr_scaled selection = np.argsort(e_all)[::-1][0] _Xl = np.append(_Xl, [_Xu[selection]], axis=0) _Xl_s = np.append(_Xl_s, [_Xu_s[selection]], axis=0) _yl = np.append(_yl, [_yu[selection]], axis=0) clf = LogisticRegression(solver='liblinear').fit(_Xl, _yl) _Xu = np.delete(_Xu, selection, 0) _yu = np.delete(_yu, selection, 0) _Xu_s = np.delete(_Xu_s, selection, 0) Time[Iter] = time() - t1 return demo_test, _Xl, _Xl_s, _yl, clf, overall_score, f1score, Time
def FAL(path, response, sensitive, atr, demo_option, r, b, alpha, rnd, data_option, flag_demo): _Xl, _Xl_s, n, m, _yl, _Xu, _Xu_s, _yu, _Xt, _Xt_s, _yt, _Cset, _Cset_s, _Cset_y = pr.data_prep( path, response, sensitive, atr, r, rnd, data_option, flag_demo) overall_score = np.zeros(b) Time = np.zeros(b) demo = np.zeros(b) # train the model for the first time clf = LogisticRegression(solver='liblinear').fit(_Xl, _yl) theta = clf.coef_.T covXS = np.cov( np.concatenate((_Xu, _Xu_s.reshape(_Xu_s.shape[0], 1)), 1).T)[0:m, -1].reshape(m, 1) # print('covXS_train:', covXS_tmp.T) # covXS = np.cov(np.concatenate((_Xt, _Xt_s.reshape(_Xt_s.shape[0],1)),1).T)[0:m,-1].reshape(m,1) # this is not correct # print('covXS_test:', covXS.T) fbc.init(_Xl, _yl, covXS, theta) t1 = time() for Iter in range(b): u = len(_Xu) ECov = np.zeros(u) # record stats to be reported -- these lines are added for the purpose of experiments and are not part of the algorithm a = alpha[min(10, math.floor(Iter / int(b / 11)))] overall_score[Iter] = clf.score(_Xt, _yt) demo[Iter] = dm.Demo(_Xt, _Xt_s, _yt, clf=clf, option=demo_option) #compute the entropy and expected covariance improvement for all instances in U probas_val = clf.predict_proba(_Xu) e = (-probas_val * np.log2(probas_val)).sum(axis=1) covYS = np.dot(covXS.transpose(), theta)[0, 0] #t2 = time() for j in range(0, u): tmp = clf.predict_proba(_Xu[j].reshape(1, -1)).reshape(-1, 1) ECov[j] = fbc.efi(_Xu[j], tmp) #print("Cov time = ",time()-t2) Emax = ECov.max() Emin = ECov.min() # normalize the values # if Emax>Emin: ECov=(Emax-ECov)/(Emax-Emin) if Emax > Emin: ECov = (ECov - Emin) / (Emax - Emin) emin = e[0:u].min() emax = e[0:u].max() if emax > emin: e = (e[0:u] - emin) / (emax - emin) e_all = a * e + (1 - a) * ECov # e_all=a*e + (1-a)*(1-ECov)/covYS # find the argmax and add label it selection = np.argsort(e_all)[::-1][0] _Xl = np.append(_Xl, [_Xu[selection]], axis=0) _yl = np.append(_yl, [_yu[selection]], axis=0) # update the model clf = LogisticRegression(solver='liblinear').fit(_Xl, _yl) theta = clf.coef_.T fbc.updateAggs(_Xu[selection], _yu[selection], theta) _Xu = np.delete(_Xu, selection, 0) _yu = np.delete(_yu, selection, 0) # _Xu_s= np.delete(_Xu_s, selection, 0) ECov = ECov[:-1] #u-=1 Time[Iter] = time() - t1 return demo, _Xl, _Xl_s, _yl, theta, clf, overall_score, Time
def FAL(path, response, sensitive, atr, demo_option, r, b, alpha, rnd, data_option, flag_demo, k): _Xl, _Xl_s, n, m, _yl, _Xu, _Xu_s, _yu, _Xt, _Xt_s, _yt, _Cset, _Cset_s, _Cset_y = pr.data_prep( path, response, sensitive, atr, r, rnd, data_option, flag_demo) overall_score = np.zeros(b) Time = np.zeros(b) demo = np.zeros(b) # train the model for the first time clf = LogisticRegression(solver='liblinear').fit(_Xl, _yl) theta = clf.coef_.T covXS = np.cov( np.concatenate((_Xu, _Xu_s.reshape(_Xu_s.shape[0], 1)), 1).T)[0:m, -1].reshape(m, 1) fbc.init(_Xl, _yl, covXS, theta) t1 = time() for Iter in range(b): u = len(_Xu) ECov = np.zeros(u) # record stats to be reported -- these lines are added for the purpose of experiments and are not part of the algorithm a = alpha[min(10, math.floor(Iter / int(b / 11)))] overall_score[Iter] = clf.score(_Xt, _yt) demo[Iter] = dm.Demo(_Xt, _Xt_s, _yt, clf=clf, option=demo_option) #compute the entropy and expected covariance improvement probas_val = clf.predict_proba(_Xu) e = (-probas_val * np.log2(probas_val)).sum(axis=1) idx = np.argsort(e)[::-1][0:k] ECov = np.zeros(k) for j in range(0, len(idx)): tmp = clf.predict_proba(_Xu[idx[j]].reshape(1, -1)).reshape(-1, 1) ECov[j] = fbc.efi(_Xu[idx[j]], tmp) # find the argmax and add label it selection = idx[np.argsort(ECov)[::-1][0]] _Xl = np.append(_Xl, [_Xu[selection]], axis=0) _yl = np.append(_yl, [_yu[selection]], axis=0) # update the model clf = LogisticRegression(solver='liblinear').fit(_Xl, _yl) theta = clf.coef_.T fbc.updateAggs(_Xu[selection], _yu[selection], theta) _Xu = np.delete(_Xu, selection, 0) _yu = np.delete(_yu, selection, 0) _Xu_s = np.delete(_Xu_s, selection, 0) ECov = ECov[:-1] Time[Iter] = time() - t1 return demo, _Xl, _Xl_s, _yl, theta, clf, overall_score, Time
def FAL(path, response, sensitive, atr, demo_option, r, b, alpha, rnd, data_option, flag_demo, kk): demo_test = [] demo_cset = [] Time = np.zeros(b) _Xl, _Xl_s, n, m, _yl, _Xu, _Xu_s, _yu, _Xt, _Xt_s, _yt, _Cset, _Cset_s, _Cset_y = pr.data_prep( path, response, sensitive, atr, r, rnd, data_option, flag_demo) overall_score = [] f1score = [] # train the model for the first time clf = LogisticRegression(solver='liblinear').fit(_Xl, _yl) t1 = time() for Iter in range(b): u = len(_Xu) # print("Iteration:", Iter) a = alpha[min(10, math.floor(Iter / int(b / 11)))] score = clf.score(_Xt, _yt) overall_score = np.append(overall_score, score) y_pred = clf.predict(_Xt) demo_test = np.append( demo_test, dm.Demo(_Xt, _Xt_s, _yt, clf=clf, option=demo_option)) demo_cset1 = dm.Demo(_Cset, _Cset_s, _Cset_y, clf=clf, option=demo_option) probas_val = clf.predict_proba(_Xu) e = (-probas_val * np.log2(probas_val)).sum(axis=1) idx = np.argsort(e)[::-1][0:kk] E_corr = np.zeros(kk) demo_f = np.zeros((kk, 2)) for j in range(0, len(idx)): f_tmp = [] for k in range(0, 2): _Xl_tmp = np.append(_Xl, [_Xu[idx[j]]], axis=0) _yl_tmp = np.append(_yl, [k], axis=0) clf_tmp = LogisticRegression(solver='liblinear').fit( _Xl_tmp, _yl_tmp) f_tmp = np.append( f_tmp, dm.Demo(_Cset, _Cset_s, _Cset_y, clf=clf_tmp, option=demo_option)) f_tmp[np.isnan(f_tmp)] = 0 demo_f[j] = f_tmp p = clf.predict_proba(_Xu)[idx[j]][0] E_corr[j] = (f_tmp).dot([p, 1 - p]) # find the argmax and add label it selection = idx[np.argsort(E_corr)[::1][0]] demo_cset2 = demo_f[np.argsort(E_corr)[::1][0], int(_yu[selection])] #replicate the points that improves unfairness reduction after labeling them if demo_cset2 - demo_cset1 < 0: _Xl = np.append(_Xl, [_Xu[selection]], axis=0) _yl = np.append(_yl, [_yu[selection]], axis=0) _Xl_s = np.append(_Xl_s, [_Xu_s[selection]], axis=0) _Xl = np.append(_Xl, [_Xu[selection]], axis=0) _Xl_s = np.append(_Xl_s, [_Xu_s[selection]], axis=0) _yl = np.append(_yl, [_yu[selection]], axis=0) clf = LogisticRegression(solver='liblinear').fit(_Xl, _yl) _Xu = np.delete(_Xu, selection, 0) _yu = np.delete(_yu, selection, 0) _Xu_s = np.delete(_Xu_s, selection, 0) Time[Iter] = time() - t1 return demo_test, _Xl, _Xl_s, _yl, clf, overall_score, f1score
def AL(path, response, sensitive, atr, demo_option, r, b, alpha, rnd, data_option, flag_demo): demo_test = [] demo_cset = [] _Xl, _Xl_s, n, m, _yl, _Xu, _Xu_s, _yu, _Xt, _Xt_s, _yt, _Cset, _Cset_s, _Cset_y = pr.data_prep( path, response, sensitive, atr, r, rnd, data_option, flag_demo) overall_score = [] # train the model for the first time clf = LogisticRegression(solver='liblinear').fit(_Xl, _yl) theta = clf.coef_.T for Iter in range(b): u = len(_Xu) E_corr = np.zeros(u) # print("Iteration:", Iter) a = alpha[min(10, math.floor(Iter / int(b / 11)))] # print(a) score = clf.score(_Xt, _yt) overall_score = np.append(overall_score, score) # print("test score is:", score) demo_test = np.append( demo_test, dm.Demo(_Xt, _Xt_s, _yt, clf=clf, option=demo_option)) probas_val = clf.predict_proba(_Xu) e = (-probas_val * np.log2(probas_val)).sum(axis=1) e_all = (e) selection = np.argsort(e_all)[::-1][0] _Xl = np.append(_Xl, [_Xu[selection]], axis=0) _Xl_s = np.append(_Xl_s, [_Xu_s[selection]], axis=0) _yl = np.append(_yl, [_yu[selection]], axis=0) clf = LogisticRegression(solver='liblinear').fit(_Xl, _yl) _Xu = np.delete(_Xu, selection, 0) _yu = np.delete(_yu, selection, 0) return demo_test, _Xl, _Xl_s, _yl, clf, overall_score
def FAL(path, response, sensitive, atr, demo_option, r, b, alpha, rnd, data_option): demo_test = [] demo_cset = [] _Xl, _Xl_s, n, m, _yl, _Xu, _Xu_s, _yu, _Xt, _Xt_s, _yt, _Cset, _Cset_s, _Cset_y = pr.data_prep( path, response, sensitive, atr, r, rnd, data_option) overall_score = [] # train the model for the first time clf = LogisticRegression(solver='liblinear').fit(_Xl, _yl) for Iter in range(b): u = len(_Xu) E_corr = np.zeros(u) a = alpha[min(10, math.floor(Iter / int(b / 11)))] # record stats to be reported -- these lines are added for the purpose of experiments and are not part of the algorithm score = clf.score(_Xt, _yt) overall_score = np.append(overall_score, score) demo_test = np.append( demo_test, dm.Demo(_Xt, _Xt_s, _yt, clf=clf, option=demo_option)) demo_cset = dm.Demo(_Cset, _Cset_s, _Cset_y, clf=clf, option=demo_option) #compute the entropy and expected fairness for all instances in U probas_val = clf.predict_proba(_Xu) e = (-probas_val * np.log2(probas_val)).sum(axis=1) for j in range(0, u): f_tmp = [] for k in range(0, 2): _Xl_tmp = np.append(_Xl, [_Xu[j]], axis=0) _yl_tmp = np.append(_yl, [k], axis=0) clf_tmp = LogisticRegression(solver='liblinear').fit( _Xl_tmp, _yl_tmp) f_tmp = np.append( f_tmp, dm.Demo(_Cset, _Cset_s, _Cset_y, clf=clf_tmp, option=demo_option)) f_tmp[np.isnan(f_tmp)] = 0 p = clf.predict_proba(_Xu)[j][0] E_corr[j] = (f_tmp).dot([p, 1 - p]) # normalize the values E_corr_scaled = ((E_corr.max() - E_corr) / (E_corr.max() - E_corr.min())) E_corr_scaled[np.isnan(E_corr_scaled)] = 0 e_all = ((e[0:u] - e[0:u].min()) / (e[0:u].max() - e[0:u].min())) e_all = a * e_all + (1 - a) * E_corr_scaled # find the argmax and label it selection = np.argsort(e_all)[::-1][0] _Xl = np.append(_Xl, [_Xu[selection]], axis=0) _Xl_s = np.append(_Xl_s, [_Xu_s[selection]], axis=0) _yl = np.append(_yl, [_yu[selection]], axis=0) # update the model and U clf = LogisticRegression(solver='liblinear').fit(_Xl, _yl) _Xu = np.delete(_Xu, selection, 0) _yu = np.delete(_yu, selection, 0) return demo_test, _Xl, _Xl_s, _yl, clf, overall_score