Example #1
0
def randomOneSideMassageData(examples, protectedIndex, protectedValue):
   bias = signedStatisticalParity(examples, protectedIndex, protectedValue)
   print("Initial bias:", bias)
   favored_trait = 1-protectedValue

   #break up data by label and by the value of the protected trait
   favored_data = [(x,label) for x,label in examples if x[protectedIndex]==favored_trait]
   nonfavored_data = [(x,label) for x,label in examples if x[protectedIndex]!=favored_trait]
   favored_data_positive = [pt for pt in favored_data if pt[1]==1]
   nonfavored_data_negative = [pt for pt in nonfavored_data if pt[1]==-1]

   print("len(favored_data): %.3f" % len(favored_data))
   print("len(nonfavored_data): %.3f" % len(nonfavored_data))
   print("len(favored_data_positive): %.3f" % len(favored_data_positive))
   print("len(nonfavored_data_negative): %.3f" % len(nonfavored_data_negative))

   #calculate number of labels to flip from -1 to +1 on the nonfavored side
   num_nonfavored_positive = len(nonfavored_data)-len(nonfavored_data_negative)
   print("len(num_nonfavored_positive): %.3f" % num_nonfavored_positive)
   num_to_flip = math.floor((len(nonfavored_data)*len(favored_data_positive) - len(favored_data)*num_nonfavored_positive)/len(favored_data))
   print("Number of labels flipped:", num_to_flip)

   to_flip_to_pos = sample(nonfavored_data_negative, num_to_flip)

   flipped_examples = []
   for data in examples:
      if data in to_flip_to_pos:
         flipped_examples.append((data[0],-1*data[1]))
      else:
         flipped_examples.append(data)

   return flipped_examples
Example #2
0
def statistics(train, test, protectedIndex, protectedValue, learner):
   h = learner(train, protectedIndex, protectedValue)
   print("Computing error")
   error = labelError(test, h)
   print("Computing bias")
   bias = signedStatisticalParity(test, protectedIndex, protectedValue, h)
   print("Computing UBIF")
   ubif = individualFairness(train, learner, flipProportion=0.2, passProtected=True)
   return error, bias, ubif
Example #3
0
def statistics(massager, trainingData, testData, protectedIndex, protectedValue,
               learner, flipProportion=0.2):
   massagedData = massager(trainingData, protectedIndex, protectedValue)
   h = learner(massagedData)

   error = labelError(testData, h)
   bias = signedStatisticalParity(testData, protectedIndex, protectedValue, h)
   ubif = individualFairness(trainingData, learner, flipProportion)

   return error, bias, ubif
Example #4
0
def thresholdRelabel(h, trainingData, protectedIndex, protectedValue,
                     hypotheses, weights, threshold):
   bias = signedStatisticalParity(trainingData, protectedIndex, protectedValue, h)
   biasedClass = 1 - zeroOneSign(bias)

   def relabel(pt):
      proposedLabel = h(pt)
      if (pt[protectedIndex] == biasedClass and
          absMargin(pt, hypotheses, weights) < threshold):
         return -proposedLabel
      else:
         return proposedLabel

   return relabel
def statistics(train, test, protectedIndex, protectedValue, numRounds=20):
   weight = 0.5
   flipProportion = 0.2

   error = makeErrorFunction(protectedIndex, protectedValue, weight)
   weakLearner = lambda draw: buildDecisionStump(draw, errorFunction=error)

   h = boosting.boost(train, weakLearner = weakLearner)

   bias = ef.signedStatisticalParity(test, protectedIndex, protectedValue, h)
   error = ef.labelError(test, h)
   ubif = ef.individualFairness(train, boosting.boost, flipProportion)

   return error, bias, ubif
Example #6
0
def randomOneSideRelabelData(h, trainingData, protectedIndex, protectedValue):
   bias = signedStatisticalParity(trainingData, protectedIndex, protectedValue, h)
   favored_trait = zeroOneSign(bias)

   nonfavored_data = [(feats,label) for feats,label in trainingData if not feats[protectedIndex]==favored_trait]
   NF, NFn = len(nonfavored_data), len([1 for x,label in nonfavored_data if h(x)==-1])

   p = NF*abs(bias)/NFn
   def relabeledClassifier(point):
      origClass = h(point)
      if point[protectedIndex] != favored_trait and origClass == -1:
         if random() < p:
            return -origClass
         else:
            return origClass
      else:
         return origClass

   return relabeledClassifier
Example #7
0
   def optimalShift(self, goal=None, condition=None, rounds=20):
      if goal == None:
         goal = lambda d, h: signedStatisticalParity(d, self.protectedIndex, self.protectedValue, h)
      if condition == None:
         condition = self.protected

      low = self.minShift
      high = self.maxShift
      dataToUse = self.validationData

      minGoalValue = goal(dataToUse, self.conditionalShiftClassifier(low, condition))
      maxGoalValue = goal(dataToUse, self.conditionalShiftClassifier(high, condition))
      print((low, minGoalValue))
      print((high, minGoalValue))

      if sign(minGoalValue) != sign(maxGoalValue):
         # a binary search for zero
         for _ in range(rounds):
            midpoint = (low + high) / 2
            if (sign(goal(dataToUse, self.conditionalShiftClassifier(low, condition))) ==
                  sign(goal(dataToUse, self.conditionalShiftClassifier(midpoint, condition)))):
               low = midpoint
            else:
               high = midpoint
         return midpoint
      else:
         print("Warning: bisection method not applicable")
         bestShift = None
         bestVal = float('inf')
         step = (high-low)/rounds
         for newShift in numpy.arange(low, high, step):
            newVal = goal(dataToUse, self.conditionalShiftClassifier(newShift, condition))
            print(newVal)
            newVal = abs(newVal)
            if newVal < bestVal:
               bestShift = newShift
               bestVal = newVal
         return bestShift
Example #8
0
 def optimalShiftClassifier(self, goal=None, condition=None, rounds=20):
    if goal == None:
       goal = lambda d, h: signedStatisticalParity(d, self.protectedIndex, self.protectedValue, h)
    if condition == None:
       condition = self.protected
    return self.conditionalShiftClassifier(self.optimalShift(goal, condition, rounds), condition)