def runAdult(): from data import adult from decisionstump import buildDecisionStump train, test = adult.load() weakLearner = buildDecisionStump rounds = 20 h = boosting.boost(train, weakLearner, rounds) print("Training error: %G" % error(h, train)) print("Test error: %G" % error(h, test))
nonfavored_data = [(feats, label) for feats, label in trainingData if not feats[protectedIndex] == favored_trait] NF, NFn = (len(nonfavored_data), len([1 for x, label in nonfavored_data if h(x) == -1])) p = NF * abs(bias) / NFn def relabeledClassifier(point): origClass = h(point) if point[protectedIndex] != favored_trait and origClass == -1: if random() < p: return -origClass else: return origClass else: return origClass return relabeledClassifier if __name__ == '__main__': from data import adult from boosting import boost trainingData, testData = adult.load() protectedIndex = adult.protectedIndex protectedValue = adult.protectedValue h = boost(trainingData, 5) rr = randomOneSideRelabelData(h, trainingData, protectedIndex, protectedValue)
# to get rated 1 def randomOneSideRelabelData(h, trainingData, protectedIndex, protectedValue): bias = signedStatisticalParity(trainingData, protectedIndex, protectedValue, h) favored_trait = zeroOneSign(bias) nonfavored_data = [(feats,label) for feats,label in trainingData if not feats[protectedIndex]==favored_trait] NF, NFn = len(nonfavored_data), len([1 for x,label in nonfavored_data if h(x)==-1]) p = NF*abs(bias)/NFn def relabeledClassifier(point): origClass = h(point) if point[protectedIndex] != favored_trait and origClass == -1: if random() < p: return -origClass else: return origClass else: return origClass return relabeledClassifier if __name__ == '__main__': from data import adult from boosting import boost trainingData, testData = adult.load() protectedIndex = adult.protectedIndex protectedValue = adult.protectedValue h = boost(trainingData, 5) rr = randomOneSideRelabelData(h, trainingData, protectedIndex, protectedValue)
protectedProb = sum(1 for (x,l) in protectedClass if l == 1) / len(protectedClass) elseProb = sum(1 for (x,l) in elseClass if l == 1) / len(elseClass) return elseProb - protectedProb # signedBias: [[float]], int, obj, h -> float # compute the signed bias of a hypothesis on a given dataset def signedBias(data, h, protectedIndex, protectedValue): return labelBias(data, [h(x) for x in data], protectedIndex, protectedValue) if __name__ == "__main__": from data import adult train, test = adult.load(separatePointsAndLabels=True) tests = [('female', (1,0)), ('private employment', (2,1)), ('asian race', (33,1)), ('divorced', (12, 1))] for (name, (index, value)) in tests: print("anti-'%s' bias in training data: %.4f" % (name, labelBias(train[0], train[1], index, value))) indian = lambda x: x[47] == 1 print(len([x for x in train[0] if indian(x)]) / len(train[0])) print(signedBias(train[0], indian, 1, 0))
raise Exception("Nobody in the unprotected class") else: protectedProb = sum( 1 for (x, l) in protectedClass if l == 1) / len(protectedClass) elseProb = sum(1 for (x, l) in elseClass if l == 1) / len(elseClass) return elseProb - protectedProb # signedBias: [[float]], int, obj, h -> float # compute the signed bias of a hypothesis on a given dataset def signedBias(data, h, protectedIndex, protectedValue): return labelBias(data, [h(x) for x in data], protectedIndex, protectedValue) if __name__ == "__main__": from data import adult train, test = adult.load(separatePointsAndLabels=True) tests = [('female', (1, 0)), ('private employment', (2, 1)), ('asian race', (33, 1)), ('divorced', (12, 1))] for (name, (index, value)) in tests: print("anti-'%s' bias in training data: %.4f" % (name, labelBias(train[0], train[1], index, value))) indian = lambda x: x[47] == 1 print(len([x for x in train[0] if indian(x)]) / len(train[0])) print(signedBias(train[0], indian, 1, 0))