Example #1
0
def runAdult():
    from data import adult
    from decisionstump import buildDecisionStump
    train, test = adult.load()
    weakLearner = buildDecisionStump
    rounds = 20

    h = boosting.boost(train, weakLearner, rounds)
    print("Training error: %G" % error(h, train))
    print("Test error: %G" % error(h, test))
Example #2
0
def runAdult():
   from data import adult
   from decisionstump import buildDecisionStump
   train, test = adult.load()   
   weakLearner = buildDecisionStump
   rounds = 20

   h = boosting.boost(train, weakLearner, rounds)
   print("Training error: %G" % error(h, train))
   print("Test error: %G" % error(h, test))
Example #3
0
    nonfavored_data = [(feats, label) for feats, label in trainingData
                       if not feats[protectedIndex] == favored_trait]
    NF, NFn = (len(nonfavored_data),
               len([1 for x, label in nonfavored_data if h(x) == -1]))

    p = NF * abs(bias) / NFn

    def relabeledClassifier(point):
        origClass = h(point)
        if point[protectedIndex] != favored_trait and origClass == -1:
            if random() < p:
                return -origClass
            else:
                return origClass
        else:
            return origClass

    return relabeledClassifier


if __name__ == '__main__':
    from data import adult
    from boosting import boost
    trainingData, testData = adult.load()
    protectedIndex = adult.protectedIndex
    protectedValue = adult.protectedValue

    h = boost(trainingData, 5)
    rr = randomOneSideRelabelData(h, trainingData, protectedIndex,
                                  protectedValue)
Example #4
0
# to get rated 1
def randomOneSideRelabelData(h, trainingData, protectedIndex, protectedValue):
   bias = signedStatisticalParity(trainingData, protectedIndex, protectedValue, h)
   favored_trait = zeroOneSign(bias)

   nonfavored_data = [(feats,label) for feats,label in trainingData if not feats[protectedIndex]==favored_trait]
   NF, NFn = len(nonfavored_data), len([1 for x,label in nonfavored_data if h(x)==-1])

   p = NF*abs(bias)/NFn
   def relabeledClassifier(point):
      origClass = h(point)
      if point[protectedIndex] != favored_trait and origClass == -1:
         if random() < p:
            return -origClass
         else:
            return origClass
      else:
         return origClass

   return relabeledClassifier

if __name__ == '__main__':
   from data import adult
   from boosting import boost
   trainingData, testData = adult.load()
   protectedIndex = adult.protectedIndex
   protectedValue = adult.protectedValue

   h = boost(trainingData, 5)
   rr = randomOneSideRelabelData(h, trainingData, protectedIndex, protectedValue)
      protectedProb = sum(1 for (x,l) in protectedClass if l == 1) / len(protectedClass)
      elseProb = sum(1 for (x,l) in elseClass  if l == 1) / len(elseClass)

   return elseProb - protectedProb


# signedBias: [[float]], int, obj, h -> float
# compute the signed bias of a hypothesis on a given dataset
def signedBias(data, h, protectedIndex, protectedValue):
   return labelBias(data, [h(x) for x in data], protectedIndex, protectedValue)



if __name__ == "__main__":
   from data import adult
   train, test = adult.load(separatePointsAndLabels=True)

   tests = [('female', (1,0)),
            ('private employment', (2,1)),
            ('asian race', (33,1)),
            ('divorced', (12, 1))]

   for (name, (index, value)) in tests:
      print("anti-'%s' bias in training data: %.4f" %
         (name, labelBias(train[0], train[1], index, value)))


   indian = lambda x: x[47] == 1
   print(len([x for x in train[0] if indian(x)]) / len(train[0]))
   print(signedBias(train[0], indian, 1, 0))
Example #6
0
        raise Exception("Nobody in the unprotected class")
    else:
        protectedProb = sum(
            1 for (x, l) in protectedClass if l == 1) / len(protectedClass)
        elseProb = sum(1 for (x, l) in elseClass if l == 1) / len(elseClass)

    return elseProb - protectedProb


# signedBias: [[float]], int, obj, h -> float
# compute the signed bias of a hypothesis on a given dataset
def signedBias(data, h, protectedIndex, protectedValue):
    return labelBias(data, [h(x) for x in data], protectedIndex,
                     protectedValue)


if __name__ == "__main__":
    from data import adult
    train, test = adult.load(separatePointsAndLabels=True)

    tests = [('female', (1, 0)), ('private employment', (2, 1)),
             ('asian race', (33, 1)), ('divorced', (12, 1))]

    for (name, (index, value)) in tests:
        print("anti-'%s' bias in training data: %.4f" %
              (name, labelBias(train[0], train[1], index, value)))

    indian = lambda x: x[47] == 1
    print(len([x for x in train[0] if indian(x)]) / len(train[0]))
    print(signedBias(train[0], indian, 1, 0))