Esempio n. 1
0
def randomOneSideMassageData(examples, protectedIndex, protectedValue):
   bias = signedStatisticalParity(examples, protectedIndex, protectedValue)
   print("Initial bias:", bias)
   favored_trait = 1-protectedValue

   #break up data by label and by the value of the protected trait
   favored_data = [(x,label) for x,label in examples if x[protectedIndex]==favored_trait]
   nonfavored_data = [(x,label) for x,label in examples if x[protectedIndex]!=favored_trait]
   favored_data_positive = [pt for pt in favored_data if pt[1]==1]
   nonfavored_data_negative = [pt for pt in nonfavored_data if pt[1]==-1]

   print("len(favored_data): %.3f" % len(favored_data))
   print("len(nonfavored_data): %.3f" % len(nonfavored_data))
   print("len(favored_data_positive): %.3f" % len(favored_data_positive))
   print("len(nonfavored_data_negative): %.3f" % len(nonfavored_data_negative))

   #calculate number of labels to flip from -1 to +1 on the nonfavored side
   num_nonfavored_positive = len(nonfavored_data)-len(nonfavored_data_negative)
   print("len(num_nonfavored_positive): %.3f" % num_nonfavored_positive)
   num_to_flip = math.floor((len(nonfavored_data)*len(favored_data_positive) - len(favored_data)*num_nonfavored_positive)/len(favored_data))
   print("Number of labels flipped:", num_to_flip)

   to_flip_to_pos = sample(nonfavored_data_negative, num_to_flip)

   flipped_examples = []
   for data in examples:
      if data in to_flip_to_pos:
         flipped_examples.append((data[0],-1*data[1]))
      else:
         flipped_examples.append(data)

   return flipped_examples
Esempio n. 2
0
 def optimalShiftClassifier(self, goal=None, condition=None, rounds=20):
     if goal == None:
         goal = lambda d, h: signedStatisticalParity(
             d, self.protectedIndex, self.protectedValue, h)
     if condition == None:
         condition = self.protected
     return self.conditionalShiftClassifier(
         self.optimalShift(goal, condition, rounds), condition)
Esempio n. 3
0
def statistics(train, test, protectedIndex, protectedValue, learner):
    h = learner(train, protectedIndex, protectedValue)
    print("Computing error")
    error = labelError(test, h)
    print("Computing bias")
    bias = signedStatisticalParity(test, protectedIndex, protectedValue, h)
    print("Computing UBIF")
    ubif = individualFairness(train, learner, 0.2, passProtected=True)
    return error, bias, ubif
Esempio n. 4
0
def statistics(train, test, protectedIndex, protectedValue, learner):
   h = learner(train, protectedIndex, protectedValue)
   print("Computing error")
   error = labelError(test, h)
   print("Computing bias")
   bias = signedStatisticalParity(test, protectedIndex, protectedValue, h)
   print("Computing UBIF")
   ubif = individualFairness(train, learner, flipProportion=0.2, passProtected=True)
   return error, bias, ubif
def statistics(massager, trainingData, testData, protectedIndex, protectedValue,
               learner, flipProportion=0.2):
   massagedData = massager(trainingData, protectedIndex, protectedValue)
   h = learner(massagedData)

   error = labelError(testData, h)
   bias = signedStatisticalParity(testData, protectedIndex, protectedValue, h)
   ubif = individualFairness(trainingData, learner, flipProportion)

   return error, bias, ubif
Esempio n. 6
0
def thresholdRelabel(h, trainingData, protectedIndex, protectedValue,
                     hypotheses, weights, threshold):
   bias = signedStatisticalParity(trainingData, protectedIndex, protectedValue, h)
   biasedClass = 1 - zeroOneSign(bias)

   def relabel(pt):
      proposedLabel = h(pt)
      if (pt[protectedIndex] == biasedClass and
          absMargin(pt, hypotheses, weights) < threshold):
         return -proposedLabel
      else:
         return proposedLabel

   return relabel
Esempio n. 7
0
def statistics(train, test, protectedIndex, protectedValue, numRounds=20):
   weight = 0.5
   flipProportion = 0.2

   error = makeErrorFunction(protectedIndex, protectedValue, weight)
   weakLearner = lambda draw: buildDecisionStump(draw, errorFunction=error)

   h = boosting.boost(train, weakLearner=weakLearner)

   bias = ef.signedStatisticalParity(test, protectedIndex, protectedValue, h)
   error = ef.labelError(test, h)
   ubif = ef.individualFairness(train, boosting.boost, flipProportion)

   return error, bias, ubif
def statistics(train, test, protectedIndex, protectedValue, numRounds=20):
   weight = 0.5
   flipProportion = 0.2

   error = makeErrorFunction(protectedIndex, protectedValue, weight)
   weakLearner = lambda draw: buildDecisionStump(draw, errorFunction=error)

   h = boosting.boost(train, weakLearner = weakLearner)

   bias = ef.signedStatisticalParity(test, protectedIndex, protectedValue, h)
   error = ef.labelError(test, h)
   ubif = ef.individualFairness(train, boosting.boost, flipProportion)

   return error, bias, ubif
Esempio n. 9
0
    def optimalShift(self, goal=None, condition=None, rounds=20):
        if goal == None:
            goal = lambda d, h: signedStatisticalParity(
                d, self.protectedIndex, self.protectedValue, h)
        if condition == None:
            condition = self.protected

        low = self.minShift
        high = self.maxShift
        dataToUse = self.validationData

        minGoalValue = goal(dataToUse,
                            self.conditionalShiftClassifier(low, condition))
        maxGoalValue = goal(dataToUse,
                            self.conditionalShiftClassifier(high, condition))
        #print((low, minGoalValue))
        #print((high, maxGoalValue))

        if sign(minGoalValue) != sign(maxGoalValue):
            # a binary search for zero
            for _ in range(rounds):
                midpoint = (low + high) / 2
                if (sign(
                        goal(dataToUse,
                             self.conditionalShiftClassifier(
                                 low, condition))) == sign(
                                     goal(
                                         dataToUse,
                                         self.conditionalShiftClassifier(
                                             midpoint, condition)))):
                    low = midpoint
                else:
                    high = midpoint
            return midpoint
        else:
            print("Warning: bisection method not applicable")
            bestShift = None
            bestVal = float('inf')
            step = (high - low) / rounds
            for newShift in numpy.arange(low, high, step):
                newVal = goal(
                    dataToUse,
                    self.conditionalShiftClassifier(newShift, condition))
                print(newVal)
                newVal = abs(newVal)
                if newVal < bestVal:
                    bestShift = newShift
                    bestVal = newVal
            return bestShift
Esempio n. 10
0
def thresholdRelabel(h, trainingData, protectedIndex, protectedValue,
                     hypotheses, weights, threshold):
    bias = signedStatisticalParity(trainingData, protectedIndex,
                                   protectedValue, h)
    biasedClass = 1 - zeroOneSign(bias)

    def relabel(pt):
        proposedLabel = h(pt)
        if (pt[protectedIndex] == biasedClass
                and absMargin(pt, hypotheses, weights) < threshold):
            return -proposedLabel
        else:
            return proposedLabel

    return relabel
Esempio n. 11
0
def randomOneSideRelabelData(h, trainingData, protectedIndex, protectedValue):
   bias = signedStatisticalParity(trainingData, protectedIndex, protectedValue, h)
   favored_trait = zeroOneSign(bias)

   nonfavored_data = [(feats,label) for feats,label in trainingData if not feats[protectedIndex]==favored_trait]
   NF, NFn = len(nonfavored_data), len([1 for x,label in nonfavored_data if h(x)==-1])

   p = NF*abs(bias)/NFn
   def relabeledClassifier(point):
      origClass = h(point)
      if point[protectedIndex] != favored_trait and origClass == -1:
         if random() < p:
            return -origClass
         else:
            return origClass
      else:
         return origClass

   return relabeledClassifier
Esempio n. 12
0
def randomOneSideMassageData(examples, protectedIndex, protectedValue):
    bias = signedStatisticalParity(examples, protectedIndex, protectedValue)
    print("Initial bias:", bias)
    favored_trait = 1 - protectedValue

    # break up data by label and by the value of the protected trait
    favored_data = [(x, label) for x, label in examples
                    if x[protectedIndex] == favored_trait]
    nonfavored_data = [(x, label) for x, label in examples
                       if x[protectedIndex] != favored_trait]
    favored_data_positive = [pt for pt in favored_data if pt[1] == 1]
    nonfavored_data_negative = [pt for pt in nonfavored_data if pt[1] == -1]

    print("len(favored_data): %.3f" % len(favored_data))
    print("len(nonfavored_data): %.3f" % len(nonfavored_data))
    print("len(favored_data_positive): %.3f" % len(favored_data_positive))
    print("len(nonfavored_data_negative): %.3f" %
          len(nonfavored_data_negative))

    # calculate number of labels to flip from -1 to +1 on the nonfavored side
    num_nonfavored_positive = len(nonfavored_data) - len(
        nonfavored_data_negative)
    print("len(num_nonfavored_positive): %.3f" % num_nonfavored_positive)
    num_to_flip = math.floor(
        (len(nonfavored_data) * len(favored_data_positive) -
         len(favored_data) * num_nonfavored_positive) / len(favored_data))
    print("Number of labels flipped:", num_to_flip)

    to_flip_to_pos = sample(nonfavored_data_negative, num_to_flip)

    flipped_examples = []
    for data in examples:
        if data in to_flip_to_pos:
            flipped_examples.append((data[0], -1 * data[1]))
        else:
            flipped_examples.append(data)

    return flipped_examples
Esempio n. 13
0
   def optimalShift(self, goal=None, condition=None, rounds=20):
      if goal == None:
         goal = lambda d, h: signedStatisticalParity(d, self.protectedIndex, self.protectedValue, h)
      if condition == None:
         condition = self.protected

      low = self.minShift
      high = self.maxShift
      dataToUse = self.validationData

      minGoalValue = goal(dataToUse, self.conditionalShiftClassifier(low, condition))
      maxGoalValue = goal(dataToUse, self.conditionalShiftClassifier(high, condition))
      print((low, minGoalValue))
      print((high, minGoalValue))

      if sign(minGoalValue) != sign(maxGoalValue):
         # a binary search for zero
         for _ in range(rounds):
            midpoint = (low + high) / 2
            if (sign(goal(dataToUse, self.conditionalShiftClassifier(low, condition))) ==
                  sign(goal(dataToUse, self.conditionalShiftClassifier(midpoint, condition)))):
               low = midpoint
            else:
               high = midpoint
         return midpoint
      else:
         print("Warning: bisection method not applicable")
         bestShift = None
         bestVal = float('inf')
         step = (high-low)/rounds
         for newShift in numpy.arange(low, high, step):
            newVal = goal(dataToUse, self.conditionalShiftClassifier(newShift, condition))
            print(newVal)
            newVal = abs(newVal)
            if newVal < bestVal:
               bestShift = newShift
               bestVal = newVal
         return bestShift
Esempio n. 14
0
def randomOneSideRelabelData(h, trainingData, protectedIndex, protectedValue):
    bias = signedStatisticalParity(trainingData, protectedIndex,
                                   protectedValue, h)
    favored_trait = zeroOneSign(bias)

    nonfavored_data = [(feats, label) for feats, label in trainingData
                       if not feats[protectedIndex] == favored_trait]
    NF, NFn = (len(nonfavored_data),
               len([1 for x, label in nonfavored_data if h(x) == -1]))

    p = NF * abs(bias) / NFn

    def relabeledClassifier(point):
        origClass = h(point)
        if point[protectedIndex] != favored_trait and origClass == -1:
            if random() < p:
                return -origClass
            else:
                return origClass
        else:
            return origClass

    return relabeledClassifier
Esempio n. 15
0
 def optimalShiftClassifier(self, goal=None, condition=None, rounds=20):
    if goal == None:
       goal = lambda d, h: signedStatisticalParity(d, self.protectedIndex, self.protectedValue, h)
    if condition == None:
       condition = self.protected
    return self.conditionalShiftClassifier(self.optimalShift(goal, condition, rounds), condition)