def runAdult(): from data import adult from decisionstump import buildDecisionStump train, test = adult.load() weakLearner = buildDecisionStump rounds = 20 h = boosting.boost(train, weakLearner, rounds) print("Training error: %G" % error(h, train)) print("Test error: %G" % error(h, test))
def statistics(train, test, protectedIndex, protectedValue, numRounds=20): weight = 0.5 flipProportion = 0.2 error = makeErrorFunction(protectedIndex, protectedValue, weight) weakLearner = lambda draw: buildDecisionStump(draw, errorFunction=error) h = boosting.boost(train, weakLearner=weakLearner) bias = ef.signedStatisticalParity(test, protectedIndex, protectedValue, h) error = ef.labelError(test, h) ubif = ef.individualFairness(train, boosting.boost, flipProportion) return error, bias, ubif
def statistics(train, test, protectedIndex, protectedValue, numRounds=20): weight = 0.5 flipProportion = 0.2 error = makeErrorFunction(protectedIndex, protectedValue, weight) weakLearner = lambda draw: buildDecisionStump(draw, errorFunction=error) h = boosting.boost(train, weakLearner = weakLearner) bias = ef.signedStatisticalParity(test, protectedIndex, protectedValue, h) error = ef.labelError(test, h) ubif = ef.individualFairness(train, boosting.boost, flipProportion) return error, bias, ubif
def simpleTest(): def target(x): if x[2] > 0.5 or x[3] > 0.5: return 1 if random.random() > 0.05 else -1 return -1 examples = [[random.random() for _ in range(10)] for _ in range(1000)] labels = [target(x) for x in examples] trainingData = list(zip(examples, labels)) testData = [[random.random() for _ in range(10)] for _ in range(1000)] testLabels = [target(x) for x in testData] def testCoordinate(samples, j): values = [sign(x[j] - 0.5) * y for (x,y) in samples] return len([z for z in values if z > 0]) / len(values) def bestCoordinate(samples, n): return max(range(n), key=lambda j: testCoordinate(samples, j)) # find the single coordinate and a threshold value that works best def singleCoordinateLearner(drawExample): samples = [drawExample() for _ in range(100)] n = len(samples[0][0]) j = bestCoordinate(samples, n) return lambda x: x[j] > 0.5 finalH, finalDistr = boosting.boost(trainingData, singleCoordinateLearner, 100) finalError = len([x for x in testData if finalH(x) != target(x)]) / len(testData) print(finalError)
def boostingLearner(data, protectedIndex, protectedValue): h = boosting.boost(data) return randomOneSideRelabelData(h, data, protectedIndex, protectedValue)
nonfavored_data = [(feats, label) for feats, label in trainingData if not feats[protectedIndex] == favored_trait] NF, NFn = (len(nonfavored_data), len([1 for x, label in nonfavored_data if h(x) == -1])) p = NF * abs(bias) / NFn def relabeledClassifier(point): origClass = h(point) if point[protectedIndex] != favored_trait and origClass == -1: if random() < p: return -origClass else: return origClass else: return origClass return relabeledClassifier if __name__ == '__main__': from data import adult from boosting import boost trainingData, testData = adult.load() protectedIndex = adult.protectedIndex protectedValue = adult.protectedValue h = boost(trainingData, 5) rr = randomOneSideRelabelData(h, trainingData, protectedIndex, protectedValue)
# to get rated 1 def randomOneSideRelabelData(h, trainingData, protectedIndex, protectedValue): bias = signedStatisticalParity(trainingData, protectedIndex, protectedValue, h) favored_trait = zeroOneSign(bias) nonfavored_data = [(feats,label) for feats,label in trainingData if not feats[protectedIndex]==favored_trait] NF, NFn = len(nonfavored_data), len([1 for x,label in nonfavored_data if h(x)==-1]) p = NF*abs(bias)/NFn def relabeledClassifier(point): origClass = h(point) if point[protectedIndex] != favored_trait and origClass == -1: if random() < p: return -origClass else: return origClass else: return origClass return relabeledClassifier if __name__ == '__main__': from data import adult from boosting import boost trainingData, testData = adult.load() protectedIndex = adult.protectedIndex protectedValue = adult.protectedValue h = boost(trainingData, 5) rr = randomOneSideRelabelData(h, trainingData, protectedIndex, protectedValue)
def boostingLearner(data, protectedIndex, protectedValue): return boost(data)
if label != row[-1]: numErrors = numErrors + 1 print(float(numErrors) / len(perTestMatrix)) # Boosting Example Use # get training data boostTrainingData = open('data/boosttrain.txt', 'r').readlines() boostTrainingMatrix = [] for line in boostTrainingData: boostTrainingMatrix.append(np.fromstring(line, dtype=float, sep=' ')) boostTrainingMatrix = np.array(boostTrainingMatrix) # get test data boostTestData = open('data/boosttest.txt', 'r').readlines() boostTestMatrix = [] for line in boostTestData: boostTestMatrix.append(np.fromstring(line, dtype=float, sep=' ')) boostTestMatrix = np.array(boostTestMatrix) # get an array of weak learners f = boost(boostTrainingMatrix, 4) # get test error numErrors = 0 for row in boostTestMatrix: label = strongLearner(f, row) if label != row[-1]: numErrors = numErrors + 1 print(float(numErrors) / len(boostTestMatrix))