Example #1
0
def runAdult():
   from data import adult
   from decisionstump import buildDecisionStump
   train, test = adult.load()   
   weakLearner = buildDecisionStump
   rounds = 20

   h = boosting.boost(train, weakLearner, rounds)
   print("Training error: %G" % error(h, train))
   print("Test error: %G" % error(h, test))
Example #2
0
def runAdult():
    from data import adult
    from decisionstump import buildDecisionStump
    train, test = adult.load()
    weakLearner = buildDecisionStump
    rounds = 20

    h = boosting.boost(train, weakLearner, rounds)
    print("Training error: %G" % error(h, train))
    print("Test error: %G" % error(h, test))
Example #3
0
def statistics(train, test, protectedIndex, protectedValue, numRounds=20):
   weight = 0.5
   flipProportion = 0.2

   error = makeErrorFunction(protectedIndex, protectedValue, weight)
   weakLearner = lambda draw: buildDecisionStump(draw, errorFunction=error)

   h = boosting.boost(train, weakLearner=weakLearner)

   bias = ef.signedStatisticalParity(test, protectedIndex, protectedValue, h)
   error = ef.labelError(test, h)
   ubif = ef.individualFairness(train, boosting.boost, flipProportion)

   return error, bias, ubif
def statistics(train, test, protectedIndex, protectedValue, numRounds=20):
   weight = 0.5
   flipProportion = 0.2

   error = makeErrorFunction(protectedIndex, protectedValue, weight)
   weakLearner = lambda draw: buildDecisionStump(draw, errorFunction=error)

   h = boosting.boost(train, weakLearner = weakLearner)

   bias = ef.signedStatisticalParity(test, protectedIndex, protectedValue, h)
   error = ef.labelError(test, h)
   ubif = ef.individualFairness(train, boosting.boost, flipProportion)

   return error, bias, ubif
Example #5
0
def simpleTest():

   def target(x):
      if x[2] > 0.5 or x[3] > 0.5:
         return 1 if random.random() > 0.05 else -1
      return -1


   examples = [[random.random() for _ in range(10)] for _ in range(1000)]
   labels = [target(x) for x in examples]
   trainingData = list(zip(examples, labels))

   testData = [[random.random() for _ in range(10)] for _ in range(1000)]
   testLabels = [target(x) for x in testData]


   def testCoordinate(samples, j):
       values = [sign(x[j] - 0.5) * y for (x,y) in samples]
       return len([z for z in values if z > 0]) / len(values)


   def bestCoordinate(samples, n):
       return max(range(n), key=lambda j: testCoordinate(samples, j))


   # find the single coordinate and a threshold value that works best
   def singleCoordinateLearner(drawExample):
       samples = [drawExample() for _ in range(100)]
       n = len(samples[0][0])

       j = bestCoordinate(samples, n)
       return lambda x: x[j] > 0.5


   finalH, finalDistr = boosting.boost(trainingData, singleCoordinateLearner, 100)

   finalError = len([x for x in testData if finalH(x) != target(x)]) / len(testData)
   print(finalError)
def boostingLearner(data, protectedIndex, protectedValue):
    h = boosting.boost(data)
    return randomOneSideRelabelData(h, data, protectedIndex, protectedValue)
Example #7
0
    nonfavored_data = [(feats, label) for feats, label in trainingData
                       if not feats[protectedIndex] == favored_trait]
    NF, NFn = (len(nonfavored_data),
               len([1 for x, label in nonfavored_data if h(x) == -1]))

    p = NF * abs(bias) / NFn

    def relabeledClassifier(point):
        origClass = h(point)
        if point[protectedIndex] != favored_trait and origClass == -1:
            if random() < p:
                return -origClass
            else:
                return origClass
        else:
            return origClass

    return relabeledClassifier


if __name__ == '__main__':
    from data import adult
    from boosting import boost
    trainingData, testData = adult.load()
    protectedIndex = adult.protectedIndex
    protectedValue = adult.protectedValue

    h = boost(trainingData, 5)
    rr = randomOneSideRelabelData(h, trainingData, protectedIndex,
                                  protectedValue)
Example #8
0
# to get rated 1
def randomOneSideRelabelData(h, trainingData, protectedIndex, protectedValue):
   bias = signedStatisticalParity(trainingData, protectedIndex, protectedValue, h)
   favored_trait = zeroOneSign(bias)

   nonfavored_data = [(feats,label) for feats,label in trainingData if not feats[protectedIndex]==favored_trait]
   NF, NFn = len(nonfavored_data), len([1 for x,label in nonfavored_data if h(x)==-1])

   p = NF*abs(bias)/NFn
   def relabeledClassifier(point):
      origClass = h(point)
      if point[protectedIndex] != favored_trait and origClass == -1:
         if random() < p:
            return -origClass
         else:
            return origClass
      else:
         return origClass

   return relabeledClassifier

if __name__ == '__main__':
   from data import adult
   from boosting import boost
   trainingData, testData = adult.load()
   protectedIndex = adult.protectedIndex
   protectedValue = adult.protectedValue

   h = boost(trainingData, 5)
   rr = randomOneSideRelabelData(h, trainingData, protectedIndex, protectedValue)
Example #9
0
def boostingLearner(data, protectedIndex, protectedValue):
   h = boosting.boost(data)
   return randomOneSideRelabelData(h, data, protectedIndex, protectedValue)
Example #10
0
def boostingLearner(data, protectedIndex, protectedValue):
    return boost(data)
Example #11
0
    if label != row[-1]:
        numErrors = numErrors + 1
print(float(numErrors) / len(perTestMatrix))

# Boosting Example Use

# get training data
boostTrainingData = open('data/boosttrain.txt', 'r').readlines()
boostTrainingMatrix = []
for line in boostTrainingData:
    boostTrainingMatrix.append(np.fromstring(line, dtype=float, sep=' '))
boostTrainingMatrix = np.array(boostTrainingMatrix)

# get test data
boostTestData = open('data/boosttest.txt', 'r').readlines()
boostTestMatrix = []
for line in boostTestData:
    boostTestMatrix.append(np.fromstring(line, dtype=float, sep=' '))
boostTestMatrix = np.array(boostTestMatrix)

# get an array of weak learners
f = boost(boostTrainingMatrix, 4)

# get test error
numErrors = 0
for row in boostTestMatrix:
    label = strongLearner(f, row)
    if label != row[-1]:
        numErrors = numErrors + 1
print(float(numErrors) / len(boostTestMatrix))
Example #12
0
def boostingLearner(data, protectedIndex, protectedValue):
    return boost(data)