Ejemplo n.º 1
0
def glassCeil(DATAPATH2,
              binss,
              edgelist,
              gc_File,
              organic,
              type2=False,
              ccdfflag=False):

    gender, race = read_instagram_genrace(DATAPATH2 + 'london.genrace')

    if not organic:

        for i in range(5):
            print "Iteration ", i
            DATAPATH = DATAPATH2 + str(i) + "/"
            nxgraph = nx.read_edgelist(DATAPATH + edgelist,
                                       nodetype=int)  #, delimiter=',',

            get_degree_dist(DATAPATH, nxgraph, binss, gender, gc_File, type2,
                            ccdfflag)

    if organic:

        nxgraph = read_nxgraph(DATAPATH2 + edgelist)  # , delimiter=',',
        get_degree_dist(DATAPATH2, nxgraph, binss, gender, gc_File, type2,
                        ccdfflag)
Ejemplo n.º 2
0
def glassCeil_race(DATAPATH2, binss, edgelist, gc_File, organic):
    genderdict, racedict = read_instagram_genrace(DATAPATH2 + 'london.genrace')

    if not organic:
        for it in range(5):
            print "Iteration ", it
            DATAPATH = DATAPATH2 + str(it) + "/"

            nxgraph = nx.read_edgelist(DATAPATH + edgelist,
                                       nodetype=int)  # , delimiter=',',
            get_degree_dist_race(DATAPATH2, nxgraph, binss, racedict, gc_File)

    elif organic:

        nxgraph = read_nxgraph(DATAPATH2 + edgelist)
        print DATAPATH2 + edgelist
        print "nxgraph.number_of_edges", nxgraph.number_of_edges()

        get_degree_dist_race(DATAPATH2, nxgraph, binss, racedict, gc_File)
Ejemplo n.º 3
0
import sys
sys.path.append("../../src/")
from graph_utils import read_instagram_genrace
from dataset_utils import make_5_recofiles  #, make_5_trainfilesmake_5_testfiles,
from bias_metrics import sort_Reco, write_topRecos, count_race_groups  # ,  getROCs_race, grow, glassCeil,  get_degree_dist  # , get_bias_genrace,get_degree_dist, get_accuracies, get_Equality_opportunity, get_Disparate_impact

DATAPATH = "../../data/la/"

gender, race = read_instagram_genrace(DATAPATH + 'la.genrace')

#make_5_trainfiles(gender, DATAPATH)

##### Branch 1 ##########

#make_5_recofiles(gender, DATAPATH,embfile='known_80_80_20_128.emb', pickfile_frn="known_80_top100.pick", pickfile_str="known_80_rand.pick", testfile_suffix= "hada_100_test.csv")
make_5_recofiles(gender,
                 DATAPATH,
                 embfile='known_80_raceeq_80_20_128.emb',
                 pickfile_frn="known_80_raceeq_top100.pick",
                 pickfile_str="known_80_rand.pick",
                 testfile_suffix="hada_100_test_raceeq.csv")

#sort_Reco(DATAPATH, testfile= "hada_100_test.csv", reco_file= "recommendations_1.csv")
sort_Reco(DATAPATH,
          testfile="hada_100_test_raceeq.csv",
          reco_file="recommendations_raceeq.csv")

#write_topRecos(DATAPATH, growtharr=[0.20], testfile="hada_100_test_2.csv", reco_file= "recommendations.csv", toprecofile="_topRecos.edgelist")
write_topRecos(DATAPATH,
               growtharr=[0.20],
               testfile="hada_100_test_raceeq.csv",
Ejemplo n.º 4
0
def get_bias_genrace(DATAPATH, recofile, k_arr):

    df = pd.read_csv(DATAPATH + recofile)

    gender, race = read_instagram_genrace(DATAPATH + "london.genrace")

    grouped = df.groupby(['u'])

    arr = []
    count_df_arr = []

    for u, group in grouped:
        in_arr = [u, gender[u], race[u]]

        for k in k_arr:

            if gender[u] == -1 and race[u] == -1:
                in_arr.extend([-1, -1, -1, -1, -1, -1])
                arr.append(in_arr)
                continue

            gen_K, race_K = 0, 0
            same_gen_ctr, fem_ctr, same_race_ctr, race_ctr_0, race_ctr_1, race_ctr_2 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0

            for u2 in group.v:

                if gender[u2] != -1:
                    gen_K += 1

                    if (gen_K == k):
                        break

                    if (gender[u] == gender[u2]):
                        same_gen_ctr += 1

                    if (gender[u2] == 1):  #if female friend
                        fem_ctr += 1

                if race[u2] != -1:
                    race_K += 1

                    if (race_K == k):
                        break

                    if (race[u] == race[u2]):
                        same_race_ctr += 1

                    if (race[u2] == 0):
                        race_ctr_0 += 1

                    if (race[u2] == 1):
                        race_ctr_1 += 1

                    if (race[u2] == 2):
                        race_ctr_2 += 1

            if gen_K == 0 or gender[u] == -1:
                same_gen_ctr, fem_ctr = -1, -1
            else:
                same_gen_ctr /= gen_K
                fem_ctr /= gen_K

            if race_K == 0 or race[u] == -1:
                same_race_ctr, race_ctr_0, race_ctr_1, race_ctr_2 = -1, -1, -1, -1
            else:
                same_race_ctr /= race_K
                race_ctr_0 /= race_K
                race_ctr_1 /= race_K
                race_ctr_2 /= race_K

            in_arr.extend([
                same_gen_ctr, fem_ctr, same_race_ctr, race_ctr_0, race_ctr_1,
                race_ctr_2
            ])
            count_df_arr.append([u, k, gen_K, race_K])

        arr.append(in_arr)

    bias_df = pd.DataFrame(
        data=arr,
        columns=[
            'u', 'gender[u]', 'race[u]', 'sup_same_gen_bias100',
            'sup_fem_gen_bias100', 'sup_same_race_bias100',
            'sup_0_race_bias100', 'sup_1_race_bias100', 'sup_2_race_bias100',
            'sup_same_gen_bias50', 'sup_fem_gen_bias50',
            'sup_same_race_bias50', 'sup_0_race_bias50', 'sup_1_race_bias50',
            'sup_2_race_bias50', 'sup_same_gen_bias10', 'sup_fem_gen_bias10',
            'sup_same_race_bias10', 'sup_0_race_bias10', 'sup_1_race_bias10',
            'sup_2_race_bias10'
        ])

    bias_df.to_csv(DATAPATH + "sup_bias_250.csv")

    count_df = pd.DataFrame(data=count_df_arr,
                            columns=['u', 'top_k', 'gen_K', 'race_K'])
    count_df.to_csv(DATAPATH + "size_non-neg_users_250.csv")
Ejemplo n.º 5
0
def get_bias_json(DATAPATH, k_arr):

    for filename in ["top100cos.json", "top100euc.json"]:
        arr = read_json(DATAPATH + filename)

        gender, race = read_instagram_genrace(DATAPATH + "london.genrace")

        outer_arr = []
        count_df_arr = []
        for u in range(len(arr)):
            in_arr = [u, gender[u], race[u]]
            for k in k_arr:
                gen_K, race_K = 0, 0
                same_gen_ctr, fem_ctr, same_race_ctr, race_ctr_0, race_ctr_1, race_ctr_2 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
                for u2, prob in arr[u]:

                    if gender[u2] != -1:
                        gen_K += 1

                        if (gen_K == k):
                            break

                        if (gender[u] == gender[u2]):
                            same_gen_ctr += 1

                        if (gender[u2] == 1):  # if female friend
                            fem_ctr += 1

                    if race[u2] != -1:
                        race_K += 1

                        if (race_K == k):
                            break

                        if (race[u] == race[u2]):
                            same_race_ctr += 1

                        if (race[u2] == 0):
                            race_ctr_0 += 1

                        if (race[u2] == 1):
                            race_ctr_1 += 1

                        if (race[u2] == 2):
                            race_ctr_2 += 1

                same_gen_ctr /= gen_K
                fem_ctr /= gen_K
                same_race_ctr /= race_K
                race_ctr_0 /= race_K
                race_ctr_1 /= race_K
                race_ctr_2 /= race_K
                in_arr.extend([
                    same_gen_ctr, fem_ctr, same_race_ctr, race_ctr_0,
                    race_ctr_1, race_ctr_2
                ])
                count_df_arr.append([u, k, gen_K, race_K])

            outer_arr.append(in_arr)

        bias_df = pd.DataFrame(
            data=arr,
            columns=[
                'u', 'gender[u]', 'race[u]', 'sup_same_gen_bias100',
                'sup_fem_gen_bias100', 'sup_same_race_bias100',
                'sup_0_race_bias100', 'sup_1_race_bias100',
                'sup_2_race_bias100', 'sup_same_gen_bias50',
                'sup_fem_gen_bias50', 'sup_same_race_bias50',
                'sup_0_race_bias50', 'sup_1_race_bias50', 'sup_2_race_bias50',
                'sup_same_gen_bias10', 'sup_fem_gen_bias10',
                'sup_same_race_bias10', 'sup_0_race_bias10',
                'sup_1_race_bias10', 'sup_2_race_bias10'
            ])

        bias_df.to_csv(DATAPATH + "unsup_bias.csv")

        count_df = pd.DataFrame(data=count_df_arr,
                                columns=['u', 'top_k', 'gen_K', 'race_K'])
        count_df.to_csv(DATAPATH + "size_non-neg_users.csv")