def calculateRRDNormalizer(_user_N, _pro_N, _gf_measure, window): """ Calculate the normalizer of input group fairness measure at input user and protected group setting. The function use two constant: NORM_ITERATION AND NORM_CUTPOINT to specify the max iteration and batch size used in the calculation. First, get the maximum value of input group fairness measure at different fairness probability. Run the above calculation NORM_ITERATION times. Then compute the average value of above results as the maximum value of each fairness probability. Finally, choose the maximum of value as the normalizer of this group fairness measure. :param _user_N: The total user number of input ranking :param _pro_N: The size of protected group in the input ranking :param _gf_measure: The group fairness measure to be used in calculation :return: returns the group fairness value for the unfair ranking generated at input setting """ NORM_CUTPOINT = window # set the range of fairness probability based on input group fairness measure f_probs = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] avg_maximums = [ ] #initialize the lists of average results of all iteration for fpi in f_probs: iter_results = [] #initialize the lists of results of all iteration for iteri in range(NORM_ITERATION): input_ranking = [x for x in range(_user_N)] protected_group = [x for x in range(_pro_N)] # generate unfair ranking using algorithm unfair_ranking = dataGenerator.generateUnfairRanking( input_ranking, protected_group, fpi) # calculate the non-normalized group fairness value i.e. input normalized value as 1 gf = calculateNDFairness(unfair_ranking, protected_group, NORM_CUTPOINT, _gf_measure, 1) iter_results.append(gf) avg_maximums.append(np.mean(iter_results)) return max(avg_maximums)
def getExpRR_protNormalizer(rank_len, r): f_probs = [0.0, 1.0] max_e = 0 min_e = float('inf') for fpi in f_probs: input_ranking = [x for x in range(rank_len)] protected_group = [x for x in range(int(rank_len * r))] # generate unfair ranking using algorithm unfair_ranking = dataGenerator.generateUnfairRanking( input_ranking, protected_group, fpi) gf = calculaterExpRR_prot(unfair_ranking, protected_group, rank_len, int(rank_len * r)) max_e = max(max_e, gf) max_e = max(min_e, gf) return [min_e, max_e]
def getExpRRNormalizer(rank_len, r): f_probs = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] maxs = [] #initialize the lists of average results of all iteration mins = [] #initialize the lists of average results of all iteration for fpi in f_probs: iter_results = [] #initialize the lists of results of all iteration for iteri in range(NORM_ITERATION): input_ranking = [x for x in range(rank_len)] protected_group = [x for x in range(int(rank_len * r))] # generate unfair ranking using algorithm unfair_ranking = dataGenerator.generateUnfairRanking( input_ranking, protected_group, fpi) gf = calculaterExpRR(unfair_ranking, protected_group, rank_len, int(rank_len * r)) iter_results.append(gf) maxs.append(np.max(iter_results)) mins.append(np.min(iter_results)) return [min(mins), max(maxs)]
def calculateSkewNormalizer(_user_N, _pro_N, _gf_measure, window): NORM_CUTPOINT = window #handle differently due to negative values f_probs = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] maxs = [] mins = [] for fpi in f_probs: iter_results = [] #initialize the lists of results of all iteration for iteri in range(NORM_ITERATION): input_ranking = [x for x in range(_user_N)] protected_group = [x for x in range(_pro_N)] # generate unfair ranking using algorithm unfair_ranking = dataGenerator.generateUnfairRanking( input_ranking, protected_group, fpi) # calculate the non-normalized group fairness value i.e. input normalized value as 1 gf = calculateNDFairness(unfair_ranking, protected_group, NORM_CUTPOINT, _gf_measure, [0, 1]) iter_results.append(gf) maxs.append(np.max(iter_results)) mins.append(np.min(iter_results)) return [np.min(mins), np.max(maxs)]
def main(_user_N, _pro_N, _gfmeasure, _cut_point, _rez_fn, _loops): """ Run the group fairness experiments of synthetic unfair rankings. Output group fairness results as csv file. :param _user_N: The total user number of input ranking :param _pro_N: The size of protected group in the input ranking :param _gfmeassure: The group fairness measure to be used in calculation one of "rKL", "rND" and "rRD" defined as constant in this py file :param _cut_point: The cut off point of set-wise group fairness calculation :param _rez_fn: The file name to output group fairness results :return: no returns. """ # define the input mixing proportion f_probs = [i / 10 for i in range(10)] f_probs.append( 0.98 ) #using 0.98 as extreme case considering the limitation of random generator # define the output dictionary keys = [] values = [] #define output file name (fn) base_file_name = _rez_fn + "_user" + str(_user_N) + "_pro" + str(_pro_N) # output_fn = os.path.join('~', 'RMIT','RMITRepo','FairRank', 'datasets', base_file_name) output_fn = base_file_name + ".csv" with open(output_fn, 'w') as mf: mf.write( "GF_Measure,MP0.0,MP0.1,MP0.2,MP0.3,MP0.4,MP0.5,MP0.6,MP0.7,MP0.8,MP0.9,MP0.98\n" ) rez_file = open(output_fn, 'a') # calculate the normalizer of the input user number and protected group max_GF = measures.getNormalizer(_user_N, _pro_N, _gfmeasure) # generate a random input ranking and protected group input_ranking = [x for x in range(_user_N)] sensi_idx = [x for x in range(_pro_N)] for i in range(_loops): gf_results = [] # loop the input fairness probabilities for fpi in range(len(f_probs)): fp = f_probs[fpi] gf_iters = 0 for iteri in range(1, NORM_ITERATION + 1): sRFair = dataGenerator.generateUnfairRanking( input_ranking, sensi_idx, fp) gf = measures.calculateNDFairness(sRFair, sensi_idx, _cut_point, _gfmeasure, max_GF) gf_iters = gf_iters + gf gf_results.append(gf_iters / NORM_ITERATION) #record average result # append the key/value to lists for creation of data csv keys.append(fp) values.append(gf_iters / NORM_ITERATION) # print( "Finished mixing proportion ",fp) # output results into csv file fline = _gfmeasure + "," for item in gf_results: fline = fline + str(item) + "," rez_file.write(fline + "\n") data = list(zip(keys, values)) df = pd.DataFrame(data, columns=['prob', 'measure']) rez_file.close() data_file = os.path.join('.', 'PMCdata', base_file_name + '_data' + '.csv') df.to_csv(data_file, index=False) return