예제 #1
0
def RL(path,response,sensitive,atr,demo_option,r,b,alpha,rnd,data_option,flag_demo):
    demo_test=[]
    _Xl,_Xl_s,n,m,_yl,_Xu,_Xu_s,_yu,_Xt,_Xt_s,_yt,_Cset,_Cset_s,_Cset_y=pr.data_prep(path,response,sensitive,atr,r,rnd,data_option,flag_demo)
    index = np.arange(len(_Xu))
    rnd_id = np.random.choice(index,b) 
    _Xl = np.append(_Xl,_Xu[rnd_id],axis=0)
    _Xl_s = np.append(_Xl_s,_Xu_s[rnd_id],axis=0)
    _yl=np.append(_yl,_yu[rnd_id],axis=0)
    clf = LogisticRegression(solver= 'liblinear').fit(_Xl, _yl)
    theta = clf.coef_.T                
    score=clf.score(_Xt,_yt)
    demo_test=np.append(demo_test,dm.Demo(_Xt,_Xt_s,_yt,clf=clf,option=demo_option))
    return demo_test,_Xl,clf,score
예제 #2
0
def AL(path, response, sensitive, atr, demo_option, r, b, alpha, rnd,
       data_option):
    demo_test = []
    demo_cset = []
    _Xl, _Xl_s, n, m, _yl, _Xu, _Xu_s, _yu, _Xt, _Xt_s, _yt, _Cset, _Cset_s, _Cset_y = pr.data_prep(
        path, response, sensitive, atr, r, rnd, data_option)
    overall_score = []
    # train the model for the first time
    clf = LogisticRegression(solver='liblinear').fit(_Xl, _yl)
    theta = clf.coef_.T
    for Iter in range(b):
        u = len(_Xu)
        E_corr = np.zeros(u)
        a = alpha[min(10, math.floor(Iter / int(b / 11)))]

        # record stats to be reported -- these lines are added for the purpose of experiments and are not part of the algorithm
        score = clf.score(_Xt, _yt)
        overall_score = np.append(overall_score, score)
        demo_test = np.append(
            demo_test, dm.Demo(_Xt, _Xt_s, _yt, clf=clf, option=demo_option))

        #compute the entropy for all instances in U
        probas_val = clf.predict_proba(_Xu)
        e = (-probas_val * np.log2(probas_val)).sum(axis=1)
        e_all = (e)

        # find the argmax and label it
        selection = np.argsort(e_all)[::-1][0]
        _Xl = np.append(_Xl, [_Xu[selection]], axis=0)
        _Xl_s = np.append(_Xl_s, [_Xu_s[selection]], axis=0)
        _yl = np.append(_yl, [_yu[selection]], axis=0)

        # update the model and U
        clf = LogisticRegression(solver='liblinear').fit(_Xl, _yl)
        _Xu = np.delete(_Xu, selection, 0)
        _yu = np.delete(_yu, selection, 0)
    return demo_test, _Xl, _Xl_s, _yl, clf, overall_score
def RL(path, response, sensitive, atr, demo_option, r, b, alpha, rnd,
       data_option, flag_demo):
    demo_test = []
    f1score = []
    _Xl, _Xl_s, n, m, _yl, _Xu, _Xu_s, _yu, _Xt, _Xt_s, _yt, _Cset, _Cset_s, _Cset_y = pr.data_prep(
        path, response, sensitive, atr, r, rnd, data_option, flag_demo)
    index = np.arange(len(_Xu))
    rnd_id = np.random.choice(index, b)
    _Xl = np.append(_Xl, _Xu[rnd_id], axis=0)
    _Xl_s = np.append(_Xl_s, _Xu_s[rnd_id], axis=0)
    _yl = np.append(_yl, _yu[rnd_id], axis=0)
    clf = DemographicParityClassifier(sensitive_cols=-1,
                                      covariance_threshold=0.5)
    _Xl_with_s = np.append(_Xl, _Xl_s[:, None], axis=1)
    clf.fit(_Xl_with_s, _yl)
    _Xt_with_s = np.append(_Xt, _Xt_s[:, None], axis=1)
    _Cset_with_s = np.append(_Cset, _Cset_s[:, None], axis=1)
    score = clf.score(_Xt_with_s, _yt)
    demo_test = demo_test = dm.Demo(_Xt_with_s,
                                    _Xt_s,
                                    _yt,
                                    clf=clf,
                                    option=demo_option)
    return demo_test, _Xl, _Xl_s, _yl, clf, score, f1score
def FAL(path, response, sensitive, atr, demo_option, r, b, alpha, rnd,
        data_option, flag_demo):
    demo_test = []
    demo_cset = []
    Time = np.zeros(b)
    _Xl, _Xl_s, n, m, _yl, _Xu, _Xu_s, _yu, _Xt, _Xt_s, _yt, _Cset, _Cset_s, _Cset_y = pr.data_prep(
        path, response, sensitive, atr, r, rnd, data_option, flag_demo)
    overall_score = []
    f1score = []
    # train the model for the first time
    clf = LogisticRegression(solver='liblinear').fit(_Xl, _yl)
    t1 = time()
    for Iter in range(b):
        u = len(_Xu)
        E_corr = np.zeros(u)
        #         print("Iteration:", Iter)
        a = alpha[min(10, math.floor(Iter / int(b / 11)))]
        score = clf.score(_Xt, _yt)
        overall_score = np.append(overall_score, score)
        y_pred = clf.predict(_Xt)
        demo_test = np.append(
            demo_test, dm.Demo(_Xt, _Xt_s, _yt, clf=clf, option=demo_option))
        demo_cset = dm.Demo(_Cset,
                            _Cset_s,
                            _Cset_y,
                            clf=clf,
                            option=demo_option)
        probas_val = clf.predict_proba(_Xu)
        e = (-probas_val * np.log2(probas_val)).sum(axis=1)
        for j in range(0, u):
            f_tmp = []
            for k in range(0, 2):
                _Xl_tmp = np.append(_Xl, [_Xu[j]], axis=0)
                _yl_tmp = np.append(_yl, [k], axis=0)
                clf_tmp = LogisticRegression(solver='liblinear').fit(
                    _Xl_tmp, _yl_tmp)
                f_tmp = np.append(
                    f_tmp,
                    dm.Demo(_Cset,
                            _Cset_s,
                            _Cset_y,
                            clf=clf_tmp,
                            option=demo_option))
                f_tmp[np.isnan(f_tmp)] = 0
            p = clf.predict_proba(_Xu)[j][0]
            E_corr[j] = (f_tmp).dot([p, 1 - p])
        E_corr_scaled = ((E_corr.max() - E_corr) /
                         (E_corr.max() - E_corr.min()))
        E_corr_scaled[np.isnan(E_corr_scaled)] = 0
        e_all = ((e[0:u] - e[0:u].min()) / (e[0:u].max() - e[0:u].min()))
        e_all = a * e_all + (1 - a) * E_corr_scaled
        selection = np.argsort(e_all)[::-1][0]
        _Xl = np.append(_Xl, [_Xu[selection]], axis=0)
        _Xl_s = np.append(_Xl_s, [_Xu_s[selection]], axis=0)
        _yl = np.append(_yl, [_yu[selection]], axis=0)
        clf = LogisticRegression(solver='liblinear').fit(_Xl, _yl)
        _Xu = np.delete(_Xu, selection, 0)
        _yu = np.delete(_yu, selection, 0)
        _Xu_s = np.delete(_Xu_s, selection, 0)
        Time[Iter] = time() - t1
    return demo_test, _Xl, _Xl_s, _yl, clf, overall_score, f1score, Time
예제 #5
0
def FAL(path, response, sensitive, atr, demo_option, r, b, alpha, rnd,
        data_option, flag_demo):
    _Xl, _Xl_s, n, m, _yl, _Xu, _Xu_s, _yu, _Xt, _Xt_s, _yt, _Cset, _Cset_s, _Cset_y = pr.data_prep(
        path, response, sensitive, atr, r, rnd, data_option, flag_demo)
    overall_score = np.zeros(b)
    Time = np.zeros(b)
    demo = np.zeros(b)
    # train the model for the first time
    clf = LogisticRegression(solver='liblinear').fit(_Xl, _yl)
    theta = clf.coef_.T
    covXS = np.cov(
        np.concatenate((_Xu, _Xu_s.reshape(_Xu_s.shape[0], 1)),
                       1).T)[0:m, -1].reshape(m, 1)
    #     print('covXS_train:', covXS_tmp.T)
    # covXS = np.cov(np.concatenate((_Xt, _Xt_s.reshape(_Xt_s.shape[0],1)),1).T)[0:m,-1].reshape(m,1) # this is not correct
    #     print('covXS_test:', covXS.T)
    fbc.init(_Xl, _yl, covXS, theta)
    t1 = time()
    for Iter in range(b):
        u = len(_Xu)
        ECov = np.zeros(u)
        # record stats to be reported -- these lines are added for the purpose of experiments and are not part of the algorithm
        a = alpha[min(10, math.floor(Iter / int(b / 11)))]
        overall_score[Iter] = clf.score(_Xt, _yt)
        demo[Iter] = dm.Demo(_Xt, _Xt_s, _yt, clf=clf, option=demo_option)

        #compute the entropy and expected covariance improvement for all instances in U
        probas_val = clf.predict_proba(_Xu)
        e = (-probas_val * np.log2(probas_val)).sum(axis=1)
        covYS = np.dot(covXS.transpose(), theta)[0, 0]
        #t2 = time()
        for j in range(0, u):
            tmp = clf.predict_proba(_Xu[j].reshape(1, -1)).reshape(-1, 1)
            ECov[j] = fbc.efi(_Xu[j], tmp)
        #print("Cov time = ",time()-t2)
        Emax = ECov.max()
        Emin = ECov.min()

        # normalize the values
        #         if Emax>Emin: ECov=(Emax-ECov)/(Emax-Emin)
        if Emax > Emin: ECov = (ECov - Emin) / (Emax - Emin)
        emin = e[0:u].min()
        emax = e[0:u].max()
        if emax > emin: e = (e[0:u] - emin) / (emax - emin)
        e_all = a * e + (1 - a) * ECov

        #         e_all=a*e + (1-a)*(1-ECov)/covYS
        # find the argmax and add label it
        selection = np.argsort(e_all)[::-1][0]
        _Xl = np.append(_Xl, [_Xu[selection]], axis=0)
        _yl = np.append(_yl, [_yu[selection]], axis=0)

        # update the model
        clf = LogisticRegression(solver='liblinear').fit(_Xl, _yl)
        theta = clf.coef_.T
        fbc.updateAggs(_Xu[selection], _yu[selection], theta)
        _Xu = np.delete(_Xu, selection, 0)
        _yu = np.delete(_yu, selection, 0)
        #         _Xu_s= np.delete(_Xu_s, selection, 0)
        ECov = ECov[:-1]
        #u-=1
        Time[Iter] = time() - t1
    return demo, _Xl, _Xl_s, _yl, theta, clf, overall_score, Time
def FAL(path, response, sensitive, atr, demo_option, r, b, alpha, rnd,
        data_option, flag_demo, k):
    _Xl, _Xl_s, n, m, _yl, _Xu, _Xu_s, _yu, _Xt, _Xt_s, _yt, _Cset, _Cset_s, _Cset_y = pr.data_prep(
        path, response, sensitive, atr, r, rnd, data_option, flag_demo)
    overall_score = np.zeros(b)
    Time = np.zeros(b)
    demo = np.zeros(b)
    # train the model for the first time
    clf = LogisticRegression(solver='liblinear').fit(_Xl, _yl)
    theta = clf.coef_.T
    covXS = np.cov(
        np.concatenate((_Xu, _Xu_s.reshape(_Xu_s.shape[0], 1)),
                       1).T)[0:m, -1].reshape(m, 1)
    fbc.init(_Xl, _yl, covXS, theta)
    t1 = time()
    for Iter in range(b):
        u = len(_Xu)
        ECov = np.zeros(u)
        # record stats to be reported -- these lines are added for the purpose of experiments and are not part of the algorithm
        a = alpha[min(10, math.floor(Iter / int(b / 11)))]
        overall_score[Iter] = clf.score(_Xt, _yt)
        demo[Iter] = dm.Demo(_Xt, _Xt_s, _yt, clf=clf, option=demo_option)

        #compute the entropy and expected covariance improvement
        probas_val = clf.predict_proba(_Xu)
        e = (-probas_val * np.log2(probas_val)).sum(axis=1)
        idx = np.argsort(e)[::-1][0:k]
        ECov = np.zeros(k)
        for j in range(0, len(idx)):
            tmp = clf.predict_proba(_Xu[idx[j]].reshape(1, -1)).reshape(-1, 1)
            ECov[j] = fbc.efi(_Xu[idx[j]], tmp)
        # find the argmax and add label it
        selection = idx[np.argsort(ECov)[::-1][0]]
        _Xl = np.append(_Xl, [_Xu[selection]], axis=0)
        _yl = np.append(_yl, [_yu[selection]], axis=0)

        # update the model
        clf = LogisticRegression(solver='liblinear').fit(_Xl, _yl)
        theta = clf.coef_.T
        fbc.updateAggs(_Xu[selection], _yu[selection], theta)
        _Xu = np.delete(_Xu, selection, 0)
        _yu = np.delete(_yu, selection, 0)
        _Xu_s = np.delete(_Xu_s, selection, 0)
        ECov = ECov[:-1]
        Time[Iter] = time() - t1
    return demo, _Xl, _Xl_s, _yl, theta, clf, overall_score, Time
예제 #7
0
def FAL(path, response, sensitive, atr, demo_option, r, b, alpha, rnd,
        data_option, flag_demo, kk):
    demo_test = []
    demo_cset = []
    Time = np.zeros(b)
    _Xl, _Xl_s, n, m, _yl, _Xu, _Xu_s, _yu, _Xt, _Xt_s, _yt, _Cset, _Cset_s, _Cset_y = pr.data_prep(
        path, response, sensitive, atr, r, rnd, data_option, flag_demo)
    overall_score = []
    f1score = []
    # train the model for the first time
    clf = LogisticRegression(solver='liblinear').fit(_Xl, _yl)
    t1 = time()
    for Iter in range(b):
        u = len(_Xu)
        #         print("Iteration:", Iter)
        a = alpha[min(10, math.floor(Iter / int(b / 11)))]
        score = clf.score(_Xt, _yt)
        overall_score = np.append(overall_score, score)
        y_pred = clf.predict(_Xt)
        demo_test = np.append(
            demo_test, dm.Demo(_Xt, _Xt_s, _yt, clf=clf, option=demo_option))
        demo_cset1 = dm.Demo(_Cset,
                             _Cset_s,
                             _Cset_y,
                             clf=clf,
                             option=demo_option)
        probas_val = clf.predict_proba(_Xu)
        e = (-probas_val * np.log2(probas_val)).sum(axis=1)
        idx = np.argsort(e)[::-1][0:kk]
        E_corr = np.zeros(kk)
        demo_f = np.zeros((kk, 2))
        for j in range(0, len(idx)):
            f_tmp = []
            for k in range(0, 2):
                _Xl_tmp = np.append(_Xl, [_Xu[idx[j]]], axis=0)
                _yl_tmp = np.append(_yl, [k], axis=0)
                clf_tmp = LogisticRegression(solver='liblinear').fit(
                    _Xl_tmp, _yl_tmp)
                f_tmp = np.append(
                    f_tmp,
                    dm.Demo(_Cset,
                            _Cset_s,
                            _Cset_y,
                            clf=clf_tmp,
                            option=demo_option))
                f_tmp[np.isnan(f_tmp)] = 0
                demo_f[j] = f_tmp
            p = clf.predict_proba(_Xu)[idx[j]][0]
            E_corr[j] = (f_tmp).dot([p, 1 - p])
        # find the argmax and add label it
        selection = idx[np.argsort(E_corr)[::1][0]]
        demo_cset2 = demo_f[np.argsort(E_corr)[::1][0], int(_yu[selection])]
        #replicate the points that improves unfairness reduction after labeling them
        if demo_cset2 - demo_cset1 < 0:
            _Xl = np.append(_Xl, [_Xu[selection]], axis=0)
            _yl = np.append(_yl, [_yu[selection]], axis=0)
            _Xl_s = np.append(_Xl_s, [_Xu_s[selection]], axis=0)
        _Xl = np.append(_Xl, [_Xu[selection]], axis=0)
        _Xl_s = np.append(_Xl_s, [_Xu_s[selection]], axis=0)
        _yl = np.append(_yl, [_yu[selection]], axis=0)
        clf = LogisticRegression(solver='liblinear').fit(_Xl, _yl)
        _Xu = np.delete(_Xu, selection, 0)
        _yu = np.delete(_yu, selection, 0)
        _Xu_s = np.delete(_Xu_s, selection, 0)
        Time[Iter] = time() - t1
    return demo_test, _Xl, _Xl_s, _yl, clf, overall_score, f1score
예제 #8
0
def AL(path, response, sensitive, atr, demo_option, r, b, alpha, rnd,
       data_option, flag_demo):
    demo_test = []
    demo_cset = []
    _Xl, _Xl_s, n, m, _yl, _Xu, _Xu_s, _yu, _Xt, _Xt_s, _yt, _Cset, _Cset_s, _Cset_y = pr.data_prep(
        path, response, sensitive, atr, r, rnd, data_option, flag_demo)
    overall_score = []
    # train the model for the first time
    clf = LogisticRegression(solver='liblinear').fit(_Xl, _yl)
    theta = clf.coef_.T
    for Iter in range(b):
        u = len(_Xu)
        E_corr = np.zeros(u)
        # print("Iteration:", Iter)
        a = alpha[min(10, math.floor(Iter / int(b / 11)))]
        # print(a)
        score = clf.score(_Xt, _yt)
        overall_score = np.append(overall_score, score)
        # print("test score is:", score)
        demo_test = np.append(
            demo_test, dm.Demo(_Xt, _Xt_s, _yt, clf=clf, option=demo_option))
        probas_val = clf.predict_proba(_Xu)
        e = (-probas_val * np.log2(probas_val)).sum(axis=1)
        e_all = (e)
        selection = np.argsort(e_all)[::-1][0]
        _Xl = np.append(_Xl, [_Xu[selection]], axis=0)
        _Xl_s = np.append(_Xl_s, [_Xu_s[selection]], axis=0)
        _yl = np.append(_yl, [_yu[selection]], axis=0)
        clf = LogisticRegression(solver='liblinear').fit(_Xl, _yl)
        _Xu = np.delete(_Xu, selection, 0)
        _yu = np.delete(_yu, selection, 0)
    return demo_test, _Xl, _Xl_s, _yl, clf, overall_score
예제 #9
0
def FAL(path, response, sensitive, atr, demo_option, r, b, alpha, rnd,
        data_option):
    demo_test = []
    demo_cset = []
    _Xl, _Xl_s, n, m, _yl, _Xu, _Xu_s, _yu, _Xt, _Xt_s, _yt, _Cset, _Cset_s, _Cset_y = pr.data_prep(
        path, response, sensitive, atr, r, rnd, data_option)
    overall_score = []
    # train the model for the first time
    clf = LogisticRegression(solver='liblinear').fit(_Xl, _yl)
    for Iter in range(b):
        u = len(_Xu)
        E_corr = np.zeros(u)
        a = alpha[min(10, math.floor(Iter / int(b / 11)))]

        # record stats to be reported -- these lines are added for the purpose of experiments and are not part of the algorithm
        score = clf.score(_Xt, _yt)
        overall_score = np.append(overall_score, score)
        demo_test = np.append(
            demo_test, dm.Demo(_Xt, _Xt_s, _yt, clf=clf, option=demo_option))
        demo_cset = dm.Demo(_Cset,
                            _Cset_s,
                            _Cset_y,
                            clf=clf,
                            option=demo_option)

        #compute the entropy and expected fairness for all instances in U
        probas_val = clf.predict_proba(_Xu)
        e = (-probas_val * np.log2(probas_val)).sum(axis=1)
        for j in range(0, u):
            f_tmp = []
            for k in range(0, 2):
                _Xl_tmp = np.append(_Xl, [_Xu[j]], axis=0)
                _yl_tmp = np.append(_yl, [k], axis=0)
                clf_tmp = LogisticRegression(solver='liblinear').fit(
                    _Xl_tmp, _yl_tmp)
                f_tmp = np.append(
                    f_tmp,
                    dm.Demo(_Cset,
                            _Cset_s,
                            _Cset_y,
                            clf=clf_tmp,
                            option=demo_option))
                f_tmp[np.isnan(f_tmp)] = 0
            p = clf.predict_proba(_Xu)[j][0]
            E_corr[j] = (f_tmp).dot([p, 1 - p])

        # normalize the values
        E_corr_scaled = ((E_corr.max() - E_corr) /
                         (E_corr.max() - E_corr.min()))
        E_corr_scaled[np.isnan(E_corr_scaled)] = 0
        e_all = ((e[0:u] - e[0:u].min()) / (e[0:u].max() - e[0:u].min()))
        e_all = a * e_all + (1 - a) * E_corr_scaled

        # find the argmax and label it
        selection = np.argsort(e_all)[::-1][0]
        _Xl = np.append(_Xl, [_Xu[selection]], axis=0)
        _Xl_s = np.append(_Xl_s, [_Xu_s[selection]], axis=0)
        _yl = np.append(_yl, [_yu[selection]], axis=0)

        # update the model and U
        clf = LogisticRegression(solver='liblinear').fit(_Xl, _yl)
        _Xu = np.delete(_Xu, selection, 0)
        _yu = np.delete(_yu, selection, 0)
    return demo_test, _Xl, _Xl_s, _yl, clf, overall_score