def enforce_single_threshold(categorical_results): single_threshold_data = {} thresholds = {} single_thresh = 0 merged = [] max_acc = 0 for k, v in categorical_results.items(): merged.extend(v) max_range_thresh = max(merged, key=lambda x: x[0]) min_range_thresh = min(merged, key=lambda x: x[0]) for thresh in np.arange(min_range_thresh[0], max_range_thresh[0], 0.01): for key, value in categorical_results.items(): result = utils.apply_threshold(value, thresh) single_threshold_data[key] = result acc = utils.get_total_accuracy(single_threshold_data) if (acc > max_acc): max_acc = acc single_thresh = thresh for key, value in categorical_results.items(): thresholds[key] = single_thresh result = utils.apply_threshold(value, thresholds[key]) single_threshold_data[key] = result # Must complete this function! #return single_threshold_data, thresholds return single_threshold_data, thresholds
def enforce_maximum_profit(categorical_results): mp_data = {} thresholds = {} for key, value in categorical_results.items(): li = [] max_acc = 0 threshold = 0 max_range_thresh = max(value, key=lambda x: x[0]) min_range_thresh = min(value, key=lambda x: x[0]) for thresh in np.arange(min_range_thresh[0], max_range_thresh[0], 0.01): result = utils.apply_threshold(value, thresh) total_num_cases = 0 total_correct = 0 for prediction, label in result: total_num_cases += 1.0 if prediction == label: total_correct += 1.0 acc = total_correct / total_num_cases # print(key, thresh, acc) if acc > max_acc: max_acc = acc threshold = thresh thresholds[key] = threshold for key, value in categorical_results.items(): result = utils.apply_threshold(value, thresholds[key]) mp_data[key] = result acc = utils.get_total_accuracy(mp_data) # print(acc) # Must complete this function! # return mp_data, thresholds return mp_data, thresholds
def enforce_equal_opportunity(categorical_results, epsilon): thresholds = {} equal_opportunity_data = {} tpr_data = {} for threshold in np.arange(0, 1, 0.01): for key, value in categorical_results.items(): t_data = utils.apply_threshold(value, threshold) tpr = utils.get_true_positive_rate(t_data) if (key not in tpr_data): tpr_data[key] = [] tpr_data[key].append([tpr, threshold]) keys = [*tpr_data] tpr_data_refined = [] for tpr_d_0 in tpr_data[keys[0]]: for tpr_d_1 in tpr_data[keys[1]]: if (abs(tpr_d_0[0] - tpr_d_1[0]) <= epsilon): tpr_data_refined.append([tpr_d_0, tpr_d_1]) tpr_data_refined_2 = [] for val in tpr_data_refined: for tpr_d_2 in tpr_data[keys[2]]: if (abs(tpr_d_2[0] - val[0][0]) <= epsilon): if (abs(tpr_d_2[0] - val[1][0]) <= epsilon): tpr_data_refined_2.append([val[0], val[1], tpr_d_2]) tpr_data_refined_3 = [] for val in tpr_data_refined_2: for tpr_d_3 in tpr_data[keys[3]]: if (abs(tpr_d_3[0] - val[0][0]) <= epsilon): if (abs(tpr_d_3[0] - val[1][0]) <= epsilon): if (abs(tpr_d_3[0] - val[2][0]) <= epsilon): tpr_data_refined_3.append( [val[0], val[1], val[2], tpr_d_3]) #print(len(tpr_data_refined_3)) max_acc = 0 temp = {} for thresh in tpr_data_refined_3: temp['African-American'] = utils.apply_threshold( categorical_results['African-American'], thresh[0][1]) temp['Caucasian'] = utils.apply_threshold( categorical_results['Caucasian'], thresh[1][1]) temp['Hispanic'] = utils.apply_threshold( categorical_results['Hispanic'], thresh[2][1]) temp['Other'] = utils.apply_threshold(categorical_results['Other'], thresh[3][1]) acc = utils.get_total_accuracy(temp) if (acc > max_acc): max_acc = acc thresholds['African-American'] = thresh[0][1] thresholds['Caucasian'] = thresh[1][1] thresholds['Hispanic'] = thresh[2][1] thresholds['Other'] = thresh[3][1] equal_opportunity_data = temp.copy() return equal_opportunity_data, thresholds
def enforce_demographic_parity(categorical_results, epsilon): demographic_parity_data = {} thresholds = {} npp = [] npp_data = {} for threshold in np.arange(0, 1, 0.01): for key, value in categorical_results.items(): t_data = utils.apply_threshold(value, threshold) npp = (utils.get_num_predicted_positives(t_data) / len(t_data)) if (key not in npp_data): npp_data[key] = [] npp_data[key].append([npp, threshold]) keys = [*npp_data] npp_data_refined = [] for npp_d_0 in npp_data[keys[0]]: for npp_d_1 in npp_data[keys[1]]: if (abs(npp_d_0[0] - npp_d_1[0]) <= epsilon): npp_data_refined.append([npp_d_0, npp_d_1]) npp_data_refined_2 = [] for val in npp_data_refined: for npp_d_2 in npp_data[keys[2]]: if (abs(npp_d_2[0] - val[0][0]) <= epsilon): if (abs(npp_d_2[0] - val[1][0]) <= epsilon): npp_data_refined_2.append([val[0], val[1], npp_d_2]) npp_data_refined_3 = [] for val in npp_data_refined_2: for npp_d_3 in npp_data[keys[3]]: if (abs(npp_d_3[0] - val[0][0]) <= epsilon): if (abs(npp_d_3[0] - val[1][0]) <= epsilon): if (abs(npp_d_3[0] - val[2][0]) <= epsilon): npp_data_refined_3.append( [val[0], val[1], val[2], npp_d_3]) max_acc = 0 temp = {} for thresh in npp_data_refined_3: temp['African-American'] = utils.apply_threshold( categorical_results['African-American'], thresh[0][1]) temp['Caucasian'] = utils.apply_threshold( categorical_results['Caucasian'], thresh[1][1]) temp['Hispanic'] = utils.apply_threshold( categorical_results['Hispanic'], thresh[2][1]) temp['Other'] = utils.apply_threshold(categorical_results['Other'], thresh[3][1]) acc = utils.get_total_accuracy(temp) if (acc > max_acc): max_acc = acc thresholds['African-American'] = thresh[0][1] thresholds['Caucasian'] = thresh[1][1] thresholds['Hispanic'] = thresh[2][1] thresholds['Other'] = thresh[3][1] demographic_parity_data = temp.copy() max_acc = 0 return demographic_parity_data, thresholds
def enforce_maximum_profit(categorical_results): mp_data = {} thresholds = {} a=categorical_results['African-American'] b=categorical_results['Caucasian'] c=categorical_results['Hispanic'] d=categorical_results['Other'] best_accuracy=0 best_threshold=None l1=l2=l3=l4=[] for i in range(10): l1.append(i/10) l2=l3=l4=l1 for i in (itertools.product(l1,l2,l3,l4)): arr1=u.apply_threshold(a, i[0]) arr2=u.apply_threshold(b, i[1]) arr3=u.apply_threshold(c, i[2]) arr4=u.apply_threshold(d, i[3]) d9={'African-American':arr1,'Caucasian':arr2,'Hispanic':arr3,'Other':arr4} acc=u.get_total_accuracy(d9) if(best_accuracy<acc): best_accuracy=acc best_threshold=[i[0],i[1],i[2],i[3]] mp_data={} mp_data=d9 d9={} ''' if(acc>=0.63): break ''' thresholds={'African-American':best_threshold[0], 'Caucasian':best_threshold[1], 'Hispanic':best_threshold[2], 'Other':best_threshold[3]} # Must complete this function! return mp_data, thresholds
def enforce_single_threshold(categorical_results): single_threshold_data = {} thresholds = {} a=categorical_results['African-American'] b=categorical_results['Caucasian'] c=categorical_results['Hispanic'] d=categorical_results['Other'] best_accuracy=0 best_threshold=None for i in range(100): theshold=i/100 arr1=u.apply_threshold(a, theshold) arr2=u.apply_threshold(b, theshold) arr3=u.apply_threshold(c, theshold) arr4=u.apply_threshold(d, theshold) d9={'African-American':arr1, 'Caucasian':arr2, 'Hispanic':arr3, 'Other':arr4} acc=u.get_total_accuracy(d9) if(best_accuracy<acc): best_accuracy=acc best_threshold=theshold single_threshold_data={} single_threshold_data =d9 d9={} thresholds={'African-American':best_threshold, 'Caucasian':best_threshold, 'Hispanic':best_threshold, 'Other':best_threshold} #Must complete this function! return single_threshold_data, thresholds #return None, None
def enforce_predictive_parity(categorical_results, epsilon): # Kedaar Raghavendra Rao import utils as u predictive_parity_data = {} thresholds = {} max_total_acc = 0 temp_thresh = {} # {race, [thresh]} max_thresh = {} # {race, [thresh]} max_thresh_pred = {} # {race, [thresholded_pred]} temp_thresh_pred = {} # {race, [thresholded_pred]} race_cases = categorical_results #categorical_results contains [predicted value, actual label] for p in range(1, 100): prob = p / 100 temp_thresh_pred = {} temp_thresh = {} for race in race_cases: for thresh in range(1, 100): t = thresh / 100 x = {} x[race] = u.apply_threshold(race_cases[race], t) #thresholded_pred r_prob = u.get_positive_predictive_value( x[race] ) # u.get_num_predicted_positives(x[race]) / len(x[race]) if (compare_probs(r_prob, prob, epsilon)): temp_thresh[race] = [t] temp_thresh_pred[race] = x[race] break if len(temp_thresh_pred) == len(race_cases): total_accuracy = u.get_total_accuracy(temp_thresh_pred) if total_accuracy > max_total_acc: max_total_acc = total_accuracy max_thresh_pred = temp_thresh_pred max_thresh = temp_thresh predictive_parity_data = max_thresh_pred thresholds = max_thresh return predictive_parity_data, thresholds
def enforce_equal_opportunity(categorical_results, epsilon): # Rishabh Sharma import utils as u thresholds = {} equal_opportunity_data = {} max_total_acc = 0 temp_thresh = {} # {race, [thresh]} max_thresh = {} # {race, [thresh]} max_thresh_pred = {} # {race, [thresholded_pred]} temp_thresh_pred = {} # {race, [thresholded_pred]} race_cases = categorical_results # categorical_results contains [predicted value, actual label] for p in range(1, 100): prob = p / 100 temp_thresh_pred = {} temp_thresh = {} for race in race_cases: for thresh in range(1, 100): t = thresh / 100 x = {} x[race] = u.apply_threshold(race_cases[race], t) #thresholded_pred r_prob = u.get_true_positive_rate(x[race]) if (compare_probs(r_prob, prob, epsilon)): temp_thresh[race] = t temp_thresh_pred[race] = x[race] break if len(temp_thresh_pred) == len(race_cases): total_accuracy = u.get_total_accuracy(temp_thresh_pred) if total_accuracy > max_total_acc: max_total_acc = total_accuracy max_thresh_pred = temp_thresh_pred max_thresh = temp_thresh equal_opportunity_data = max_thresh_pred thresholds = max_thresh return equal_opportunity_data, thresholds
def enforce_equal_opportunity(categorical_results, epsilon): thresholds = {} equal_opportunity_data = {} a=categorical_results['African-American'] b=categorical_results['Caucasian'] c=categorical_results['Hispanic'] d=categorical_results['Other'] (c1,d1,b1)=u.get_ROC_data(a, 'African-American') (c2,d2,b2)=u.get_ROC_data(b, 'Caucasian') (c3,d3,b3)=u.get_ROC_data(c, 'Hispanic') (c4,d4,b4)=u.get_ROC_data(d, 'Other') arr=[] for i in range(100): n1=c1[i]-0.01 n2=c1[i]+0.01 for j in range(100): if(c2[j]>=n1 and c2[j]<=n2): for k in range(100): if(c3[k]>=n1 and c3[k]<=n2): for l in range(100): if(c4[l]>=n1 and c4[l]<=n2): #print("i am coming here") p=[i/100.,j/100.,k/100.,l/100.] arr.append(p) accValue=0 thresholdList=None abc = arr.copy() for i in range(len(arr)): arr1=u.apply_threshold(a, abc[i][0]) arr2=u.apply_threshold(b, abc[i][1]) arr3=u.apply_threshold(c, abc[i][2]) arr4=u.apply_threshold(d, abc[i][3]) d9={'African-American':arr1, 'Caucasian':arr2, 'Hispanic':arr3, 'Other':arr4} acc=u.get_total_accuracy(d9) if(accValue<acc): accValue=acc thresholdList=arr[i] equal_opportunity_data={} equal_opportunity_data =d9 d9={} thresholds={'African-American':thresholdList[0], 'Caucasian':thresholdList[1], 'Hispanic':thresholdList[2], 'Other':thresholdList[3]} # Must complete this function! return equal_opportunity_data, thresholds
def enforce_predictive_parity(categorical_results, epsilon): predictive_parity_data = {} thresholds = {} a=categorical_results['African-American'] b=categorical_results['Caucasian'] c=categorical_results['Hispanic'] d=categorical_results['Other'] a1=func(a) b1=func(b) c1=func(c) d1=func(d) best_accuracy=0 best_threshold=None arr=[] for i in range(100): n1=a1[i]-0.01 n2=a1[i]+0.01 for j in range(100): if(b1[j]>=n1 and b1[j]<=n2): for k in range(100): if(c1[k]>=n1 and c1[k]<=n2): for l in range(100): if(d1[l]>=n1 and d1[l]<=n2): p=[i/100,j/100,k/100,l/100] arr.append(p) best_accuracy=0 best_threshold=None testdata = list(set(tuple(x) for x in arr)) myarray = np.asarray(arr) unique_dict_a={} unique_dict_b={} unique_dict_c={} unique_dict_d={} uniqueValues=np.unique(myarray) for i in range(len(uniqueValues)): i1 = uniqueValues[i] unique_dict_a.update({ i1 : u.apply_threshold(a, i1) } ) unique_dict_b.update({ i1 : u.apply_threshold(b, i1) } ) unique_dict_c.update({ i1 : u.apply_threshold(c, i1) } ) unique_dict_d.update({ i1 : u.apply_threshold(d, i1) } ) for i in range(10000): if(arr[i][0] in unique_dict_a.keys()): arr1=unique_dict_a[arr[i][0]] else: arr1=u.apply_threshold(a, arr[i][0]) if(arr[i][0] in unique_dict_a.keys()): arr2=unique_dict_b[arr[i][1]] else: arr2=u.apply_threshold(b, arr[i][1]) if(arr[i][0] in unique_dict_a.keys()): arr3=unique_dict_c[arr[i][2]] else: arr3=u.apply_threshold(c, arr[i][2]) if(arr[i][0] in unique_dict_a.keys()): arr4=unique_dict_d[arr[i][3]] else: arr4=u.apply_threshold(d, arr[i][3]) d9={'African-American':arr1, 'Caucasian':arr2, 'Hispanic':arr3, 'Other':arr4} acc=u.get_total_accuracy(d9) if(best_accuracy<acc): best_accuracy=acc best_threshold=arr[i] predictive_parity_data={} predictive_parity_data =d9 d9={} thresholds={'African-American':best_threshold[0], 'Caucasian':best_threshold[1], 'Hispanic':best_threshold[2], 'Other':best_threshold[3]} # Must complete this function! return predictive_parity_data, thresholds
def enforce_predictive_parity(categorical_results, epsilon): predictive_parity_data = {} thresholds = {} thresholds_new = {} ppv = [] max_range_thresh = [1, 1, 1, 1] min_range_thresh = [0, 0, 0, 0] threshold_set = [] for key, val in categorical_results.items(): max_range_thresh.append( max(categorical_results[key], key=lambda x: x[0])[0]) min_range_thresh.append( min(categorical_results[key], key=lambda x: x[0])[0]) ppv_data = {} for threshold in np.arange(0, 1, 0.01): for key, value in categorical_results.items(): t_data = utils.apply_threshold(value, threshold) ppv = utils.get_positive_predictive_value(t_data) if (key not in ppv_data): ppv_data[key] = [] ppv_data[key].append([ppv, threshold]) keys = [*ppv_data] ppv_data_refined = [] for ppv_d_0 in ppv_data[keys[0]]: for ppv_d_1 in ppv_data[keys[1]]: if (abs(ppv_d_0[0] - ppv_d_1[0]) <= epsilon): ppv_data_refined.append([ppv_d_0, ppv_d_1]) ppv_data_refined_2 = [] for val in ppv_data_refined: for ppv_d_2 in ppv_data[keys[2]]: if (abs(ppv_d_2[0] - val[0][0]) <= epsilon): if (abs(ppv_d_2[0] - val[1][0]) <= epsilon): ppv_data_refined_2.append([val[0], val[1], ppv_d_2]) ppv_data_refined_3 = [] for val in ppv_data_refined_2: for ppv_d_3 in ppv_data[keys[3]]: if (abs(ppv_d_3[0] - val[0][0]) <= epsilon): if (abs(ppv_d_3[0] - val[1][0]) <= epsilon): if (abs(ppv_d_3[0] - val[2][0]) <= epsilon): ppv_data_refined_3.append( [val[0], val[1], val[2], ppv_d_3]) #print(len(ppv_data_refined_3)) max_acc = 0 temp = {} for thresh in ppv_data_refined_3: temp['African-American'] = utils.apply_threshold( categorical_results['African-American'], thresh[0][1]) temp['Caucasian'] = utils.apply_threshold( categorical_results['Caucasian'], thresh[1][1]) temp['Hispanic'] = utils.apply_threshold( categorical_results['Hispanic'], thresh[2][1]) temp['Other'] = utils.apply_threshold(categorical_results['Other'], thresh[3][1]) acc = utils.get_total_accuracy(temp) if (acc > max_acc): max_acc = acc thresholds['African-American'] = thresh[0][1] thresholds['Caucasian'] = thresh[1][1] thresholds['Hispanic'] = thresh[2][1] thresholds['Other'] = thresh[3][1] predictive_parity_data = temp.copy() max_acc = 0 return predictive_parity_data, thresholds