Beispiel #1
0
def randomOneSideMassageData(examples, protectedIndex, protectedValue):
   bias = signedStatisticalParity(examples, protectedIndex, protectedValue)
   print("Initial bias:", bias)
   favored_trait = zeroOneSign(bias)

   #break up data by label and by the value of the protected trait
   favored_data = [(x,label) for x,label in examples if x[protectedIndex]==favored_trait]
   nonfavored_data = [(x,label) for x,label in examples if x[protectedIndex]!=favored_trait]
   favored_data_positive = [pt for pt in favored_data if pt[1]==1]
   nonfavored_data_negative = [pt for pt in nonfavored_data if pt[1]==-1]

   #calculate number of labels to flip from -1 to +1 on the nonfavored side
   num_nonfavored_positive = len(nonfavored_data)-len(nonfavored_data_negative)
   num_to_flip = math.floor((len(nonfavored_data)*len(favored_data_positive) - len(favored_data)*num_nonfavored_positive)/len(favored_data))
   print("Number of labels flipped:",num_to_flip)

   to_flip_to_pos = sample(nonfavored_data_negative, num_to_flip)

   flipped_examples = []
   for data in examples:
      if data in to_flip_to_pos:
         flipped_examples.append((data[0],-1*data[1]))
      else:
         flipped_examples.append(data)

   return flipped_examples
Beispiel #2
0
def thresholdRelabel(h, trainingData, protectedIndex, protectedValue,
                     hypotheses, weights, threshold):
   bias = signedStatisticalParity(trainingData, protectedIndex, protectedValue, h)
   biasedClass = 1 - zeroOneSign(bias)

   def relabel(pt):
      proposedLabel = h(pt)
      if (pt[protectedIndex] == biasedClass and
          absMargin(pt, hypotheses, weights) < threshold):
         return -proposedLabel
      else:
         return proposedLabel

   return relabel
Beispiel #3
0
def thresholdRelabel(h, trainingData, protectedIndex, protectedValue,
                     hypotheses, weights, threshold):
    bias = signedStatisticalParity(trainingData, protectedIndex,
                                   protectedValue, h)
    biasedClass = 1 - zeroOneSign(bias)

    def relabel(pt):
        proposedLabel = h(pt)
        if (pt[protectedIndex] == biasedClass
                and absMargin(pt, hypotheses, weights) < threshold):
            return -proposedLabel
        else:
            return proposedLabel

    return relabel
Beispiel #4
0
def randomOneSideRelabelData(h, trainingData, protectedIndex, protectedValue):
   bias = signedStatisticalParity(trainingData, protectedIndex, protectedValue, h)
   favored_trait = zeroOneSign(bias)

   nonfavored_data = [(feats,label) for feats,label in trainingData if not feats[protectedIndex]==favored_trait]
   NF, NFn = len(nonfavored_data), len([1 for x,label in nonfavored_data if h(x)==-1])

   p = NF*abs(bias)/NFn
   def relabeledClassifier(point):
      origClass = h(point)
      if point[protectedIndex] != favored_trait and origClass == -1:
         if random() < p:
            return -origClass
         else:
            return origClass
      else:
         return origClass

   return relabeledClassifier
Beispiel #5
0
def randomOneSideRelabelData(h, trainingData, protectedIndex, protectedValue):
    bias = signedStatisticalParity(trainingData, protectedIndex,
                                   protectedValue, h)
    favored_trait = zeroOneSign(bias)

    nonfavored_data = [(feats, label) for feats, label in trainingData
                       if not feats[protectedIndex] == favored_trait]
    NF, NFn = (len(nonfavored_data),
               len([1 for x, label in nonfavored_data if h(x) == -1]))

    p = NF * abs(bias) / NFn

    def relabeledClassifier(point):
        origClass = h(point)
        if point[protectedIndex] != favored_trait and origClass == -1:
            if random() < p:
                return -origClass
            else:
                return origClass
        else:
            return origClass

    return relabeledClassifier