def randomOneSideMassageData(examples, protectedIndex, protectedValue): bias = signedStatisticalParity(examples, protectedIndex, protectedValue) print("Initial bias:", bias) favored_trait = zeroOneSign(bias) #break up data by label and by the value of the protected trait favored_data = [(x,label) for x,label in examples if x[protectedIndex]==favored_trait] nonfavored_data = [(x,label) for x,label in examples if x[protectedIndex]!=favored_trait] favored_data_positive = [pt for pt in favored_data if pt[1]==1] nonfavored_data_negative = [pt for pt in nonfavored_data if pt[1]==-1] #calculate number of labels to flip from -1 to +1 on the nonfavored side num_nonfavored_positive = len(nonfavored_data)-len(nonfavored_data_negative) num_to_flip = math.floor((len(nonfavored_data)*len(favored_data_positive) - len(favored_data)*num_nonfavored_positive)/len(favored_data)) print("Number of labels flipped:",num_to_flip) to_flip_to_pos = sample(nonfavored_data_negative, num_to_flip) flipped_examples = [] for data in examples: if data in to_flip_to_pos: flipped_examples.append((data[0],-1*data[1])) else: flipped_examples.append(data) return flipped_examples
def thresholdRelabel(h, trainingData, protectedIndex, protectedValue, hypotheses, weights, threshold): bias = signedStatisticalParity(trainingData, protectedIndex, protectedValue, h) biasedClass = 1 - zeroOneSign(bias) def relabel(pt): proposedLabel = h(pt) if (pt[protectedIndex] == biasedClass and absMargin(pt, hypotheses, weights) < threshold): return -proposedLabel else: return proposedLabel return relabel
def randomOneSideRelabelData(h, trainingData, protectedIndex, protectedValue): bias = signedStatisticalParity(trainingData, protectedIndex, protectedValue, h) favored_trait = zeroOneSign(bias) nonfavored_data = [(feats,label) for feats,label in trainingData if not feats[protectedIndex]==favored_trait] NF, NFn = len(nonfavored_data), len([1 for x,label in nonfavored_data if h(x)==-1]) p = NF*abs(bias)/NFn def relabeledClassifier(point): origClass = h(point) if point[protectedIndex] != favored_trait and origClass == -1: if random() < p: return -origClass else: return origClass else: return origClass return relabeledClassifier
def randomOneSideRelabelData(h, trainingData, protectedIndex, protectedValue): bias = signedStatisticalParity(trainingData, protectedIndex, protectedValue, h) favored_trait = zeroOneSign(bias) nonfavored_data = [(feats, label) for feats, label in trainingData if not feats[protectedIndex] == favored_trait] NF, NFn = (len(nonfavored_data), len([1 for x, label in nonfavored_data if h(x) == -1])) p = NF * abs(bias) / NFn def relabeledClassifier(point): origClass = h(point) if point[protectedIndex] != favored_trait and origClass == -1: if random() < p: return -origClass else: return origClass else: return origClass return relabeledClassifier