def _RF():
    "Test RF"
    dir = "../Data"
    one, two = explore(dir)
    # Training data
    train_DF = createTbl([one[0][0]])
    # Test data
    test_df = createTbl([one[0][1]])
    actual = Bugs(test_df)
    preds = rforest(train_DF, test_df, mss=6, msl=8, max_feat=4, n_est=5756, smoteit=False)
    print _Abcd(before=actual, after=preds, show=False)[-1]
def _logit():
    "Test LOGIT"
    dir = './Data'
    one, two = explore(dir)
    # Training data
    train_DF = createTbl(one[0])
    # Test data
    test_df = createTbl(two[0])
    actual = Bugs(test_df)
    preds = logit(train_DF, test_df)
    set_trace()
    _Abcd(train=actual, test=preds, verbose=True)
def _adaboost():
    "Test AdaBoost"
    dir = "./Data"
    one, two = explore(dir)
    # Training data
    train_DF = createTbl(one[0])
    # Test data
    test_df = createTbl(two[0])
    actual = Bugs(test_df)
    preds = adaboost(train_DF, test_df)
    set_trace()
    _Abcd(train=actual, test=preds, verbose=True)
def _logit():
  "Test LOGIT"
  dir = './Data'
  one, two = explore(dir)
  # Training data
  train_DF = createTbl(one[0])
  # Test data
  test_df = createTbl(two[0])
  actual = Bugs(test_df)
  preds = logit(train_DF, test_df)
  set_trace()
  _Abcd(train=actual, test=preds, verbose=True)
Exemple #5
0
def _where2pred():
  "Test where2"
  dir = '../Data'
  one, two = explore(dir)
  # Training data
  train_DF = createTbl(one[0])
  # Test data
  test_df = createTbl(two[0])
  actual = Bugs(test_df)
  preds = where2prd(train_DF, test_df)
  for a, b in zip(actual, preds): print a, b
  set_trace()
  print _Abcd(before = actual, after = preds, show = False)
Exemple #6
0
def _RF():
  "Test RF"
  dir = '../Data'
  one, two = explore(dir)
  # Training data
  train_DF = createTbl([one[0][0]])
  # Test data
  test_df = createTbl([one[0][1]])
  actual = Bugs(test_df)
  preds = rforest(train_DF, test_df, mss=6, msl=8,
                  max_feat=4, n_est=5756,
                  smoteit=False)
  print _Abcd(before=actual, after=preds, show=False)[-1]
Exemple #7
0
def _where2pred():
    "Test where2"
    dir = '../Data'
    one, two = explore(dir)
    # Training data
    train_DF = createTbl(one[0])
    # Test data
    test_df = createTbl(two[0])
    actual = Bugs(test_df)
    preds = where2prd(train_DF, test_df)
    for a, b in zip(actual, preds):
        print a, b
    set_trace()
    print _Abcd(before=actual, after=preds, show=False)
Exemple #8
0
  def go(self):

    for _ in xrange(self.reps):
      predRows = []
      train_DF = createTbl(self.train[self._n], isBin=True)
      test_df = createTbl(self.test[self._n], isBin=True)
      actual = np.array(Bugs(test_df))
      before = self.pred(train_DF, test_df,
                         tunings=self.tunedParams,
                         smoteit=True)

      predRows = [row.cells for predicted
                  , row in zip(before, test_df._rows) if predicted > 0]
      predTest = clone(test_df, rows=predRows)

      newTab = xtrees(train=self.train[self._n]
                          , test_DF=predTest, bin=False).main()

      after = self.pred(train_DF, newTab,
                        tunings=self.tunedParams,
                        smoteit=True)

      self.out_pred.append(_Abcd(before=actual, after=before))
      # set_trace()
      delta = cliffs(lst2=Bugs(predTest), lst1=after).delta()
      frac = sum([0 if a < 1 else 1 for a in after]) / \
          sum([0 if b < 1 else 1 for b in before])
      self.out.append(frac)
    print(self.out)
Exemple #9
0
 def depen(self, rows):
     mod = rforest(
         self.train,
         self.test,
         tunings=rows  # n_est, max_feat, mss, msl
         ,
         smoteit=True)
     g = _Abcd(before=Bugs(self.test), after=mod, show=False)[-1]
     return g
Exemple #10
0
 def depen(self, row):
   # My where2pred() takes data in string format. Ex: '../Data/ant/ant-1.6.csv'
   self.where = defaults().update(minSize = row[4]
                                , depthMin = int(row[5])
                                , depthMax = int(row[6])
                                , prune = row[7]>0.5)
   self.tree.infoPrune = row[1]
   self.tree.m = int(row[2])
   self.tree.n = int(row[3])
   self.tree.prune = row[8]>0.5
   actual = Bugs(createTbl([self.test], isBin = True))
   preds = where2prd(self.train, [self.test], tunings = [self.where, self.tree], thresh = row[0])
   return _Abcd(before = actual, after = preds, show = False)[-1]
def _where2pred():
  "Test where2"
  dir = '../Data'
  one, two = explore(dir)
  # set_trace()
  # Training data
  train = one[0][:-1]
  # Test data
  test = [one[0][-1]]
  actual = Bugs(createTbl(test, isBin=True))
  preds = where2prd(train, test)
  # for a, b in zip(actual, preds): print a, b
  # set_trace()
  return _Abcd(before=actual, after=preds, show=False)[-1]
def _where2pred():
    "Test where2"
    dir = '../Data'
    one, two = explore(dir)
    # set_trace()
    # Training data
    train = one[0][:-1]
    # Test data
    test = [one[0][-1]]
    actual = Bugs(createTbl(test, isBin=True))
    preds = where2prd(train, test)
    # for a, b in zip(actual, preds): print a, b
    # set_trace()
    return _Abcd(before=actual, after=preds, show=False)[-1]
Exemple #13
0
 def depen(self, row):
   # My where2pred() takes data in string format. Ex:
   # '../Data/ant/ant-1.6.csv'
   self.where = defaults().update(
       minSize=row[4], depthMin=int(
           row[5]), depthMax=int(
           row[6]), prune=row[7] > 0.5)
   self.tree.infoPrune = row[1]
   self.tree.m = int(row[2])
   self.tree.n = int(row[3])
   self.tree.prune = row[8] > 0.5
   actual = Bugs(createTbl([self.test], isBin=True))
   preds = where2prd(
       self.train, [
           self.test], tunings=[
           self.where, self.tree], thresh=row[0])
   return _Abcd(before=actual, after=preds, show=False)[-1]
Exemple #14
0
  def go(self):

    for _ in xrange(self.reps):
      predRows = []
      train_DF = createTbl(self.train[self._n][-2:], isBin=True)
      test_df = createTbl(self.test[self._n], isBin=True)
      actual = Bugs(test_df)
      before = self.pred(train_DF, test_df,
                         tunings=self.tunedParams,
                         smoteit=True)

      for predicted, row in zip(before, test_df._rows):
        tmp = row.cells
        tmp[-2] = predicted
        if predicted > 0:
          predRows.append(tmp)

      predTest = clone(test_df, rows=predRows)

      if predRows:
        newTab = treatments2(
            train=self.train[self._n][-2:],
            test=self.test[self._n],
            test_df=predTest,
            extent=self.extent,
            far=False,
            smote=True,
            resample=False,
            infoPrune=self.infoPrune,
            Prune=self.Prune).main()
      else:
        newTab = treatments2(
            train=self.train[
                self._n][-2:],
            test=self.test[
                self._n],
            far=False,
            smote=True,
            resample=False,
            extent=self.extent,
            infoPrune=self.infoPrune,
            Prune=self.Prune).main()

      after = self.pred(train_DF, newTab,
                        tunings=self.tunedParams,
                        smoteit=True)

      self.out_pred.append(_Abcd(before=actual, after=before))
      delta = cliffs(lst1=Bugs(predTest), lst2=after).delta()
      self.out.append(delta)
    if self.extent == 0:
      append = 'Base'
    else:
      if self.Prune:
        append = str(
            self.extent) + '_iP(' + str(
            int(self.infoPrune * 100)) + r'%)' if not self.fSelect else str(
            self.extent) + '_w_iP(' + str(
            int(self.infoPrune * 100)) + r'%)'
      else:
        append = str(
            self.extent) if not self.fSelect else str(
            self.extent) + '_w'

    self.out.insert(0, self.dataName + '_' + append)
    self.out_pred.insert(0, self.dataName)
    print(self.out)
Exemple #15
0
def main():
  dir = '../Data'
  from os import walk
  dataName = [Name for _, Name, __ in walk(dir)][0]
  numData = len(dataName)  # Number of data
  Prd = [CART]  # , rforest]  # , adaboost, logit, knn]
  _smoteit = [True]  # , False]
  _tuneit = [False]
  cd = {}
  abcd = []
  res = {}
  for n in xrange(numData):

    out11 = []
    outA1 = []
    out1 = []
    outFar = []
    outNear = []
    outa = []
    one, two = explore(dir)
    data = [one[i] + two[i] for i in xrange(len(one))]
    print('##', dataName[n])
    for p in Prd:
      train = [dat[0] for dat in withinClass(data[n])]
      test = [dat[1] for dat in withinClass(data[n])]
      reps = 10
      abcd = [[], []]
      for t in _tuneit:
        tunedParams = None if not t else params
        print('### Tuning') if t else print('### No Tuning')
        for _smote in _smoteit:
          #          for _n in xrange(0):
          _n = -1
          # Training data
          for _ in xrange(reps):

            train_DF = createTbl(train[_n], isBin=True)
#            set_trace()
            # Testing data
            test_df = createTbl(test[_n], isBin=True)
            predRows = []
            # Tune?
            actual = Bugs(test_df)
            before = p(train_DF, test_df,
                       tunings=tunedParams,
                       smoteit=True)
            tunedParams = None if not t else tuner(p, train[_n])
            for predicted, row in zip(before, test_df._rows):
              tmp = row.cells
              tmp[-2] = predicted
              if predicted > 0:
                predRows.append(tmp)
            predTest = clone(test_df, rows=predRows)
            # Find and apply contrast sets
#             newTab = treatments(train = train[_n],
#                                 test = test[_n],
#                                 verbose = False,
#                                 smoteit = False).main()

            newTab_near = treatments2(train=train[_n], far=False, test=test[_n]  # ).main()
                                      , test_df=predTest).main() \
                if predRows \
                else treatments2(train=train[_n], test=test[_n]).main()
            newTab_far = treatments2(train=train[_n], test=test[_n]  # ).main()
                                     , test_df=predTest).main() \
                if predRows \
                else treatments2(train=train[_n], test=test[_n]).main()

            after_far = p(train_DF, newTab_far,
                          tunings=tunedParams,
                          smoteit=True)
            after_near = p(train_DF, newTab_near,
                           tunings=tunedParams,
                           smoteit=True)
#             print(showoff(dataName[n], before, after))
            outa.append(_Abcd(before=actual, after=before))
#            set_trace()
            cliffsFar = cliffsdelta(Bugs(predTest), after_far)
            cliffsNear = cliffsdelta(Bugs(predTest), after_near)
#             print(cliffsDelta(Bugs(predTest), after))
#            print('Gain =  %1.2f' % float(\
#            	   (sum(Bugs(predTest)) - sum(after)) / sum(Bugs(predTest)) * 100), r'%')
            outFar.append(cliffsFar)
            outNear.append(cliffsNear)
#            out1.append(float((sum(before) - sum(after)) / sum(before) * 100))
#           out1 = [o for o in out1 if np.isfinite(o)]
          outNear.insert(0, dataName[n] + '_Far')
          outFar.insert(0, dataName[n] + '_Near')

          outa.insert(0, dataName[n])
        out11.extend([outNear, outFar])
        outA1.append(outa)
        try:
          print('```')
          rdivDemo(out11, isLatex=False)
    #      rdivDemo(outA1, isLatex = False)
          print('```')
        except IndexError:
          pass
Exemple #16
0
 def depen(self, rows):
   mod = rforest(self.train, self.test
               , tunings = rows  # n_est, max_feat, mss, msl
               , smoteit = True)
   g = _Abcd(before = Bugs(self.test), after = mod, show = False)[-1]
   return g
Exemple #17
0
def main():
  dir = '../Data'
  from os import walk
  dataName = [Name for _, Name, __ in walk(dir)][0]
  numData = len(dataName)  # Number of data
  Prd = [CART]  # , rforest]  # , adaboost, logit, knn]
  _smoteit = [False]  # , False]
  _tuneit = [False]  # , False]
  cd = []
  abcd = []
  res = {}
  for n in xrange(numData):
    one, two = explore(dir)
    data = [one[i] + two[i] for i in xrange(len(one))];
    print('##', dataName[n])
    for p in Prd:
#       print(p.__doc__)
      # params = tuner(p, data[0])
#       print(params)
      train = [dat[0] for dat in withinClass(data[n])]
      test = [dat[1] for dat in withinClass(data[n])]
      reps = 1
      abcd = [[], []];
      for t in _tuneit:
#         print('### Tuning') if t else print('### No Tuning')
        for _smote in _smoteit:
#           print('### SMOTE-ing') if _smote else print('### No SMOTE-ing')
    #       print('```')
#          for _n in xrange(0):
#          set_trace()
          _n = -1
          # Training data
          for _ in xrange(reps):
            train_DF = createTbl(train[_n])
#            set_trace()
            # Testing data
            test_df = createTbl(test[_n])
            # Tune?
            tunedParams = None
#             tunedParams = None if not t else params
            # Find and apply contrast sets
            newTab = treatments(train = train[_n],
                                test = test[_n], verbose = False, smoteit = True)


            # Actual bugs
            actual = Bugs(test_df)
            actual1 = [0 if a == 0 else 1 for a in actual]
            # Use the classifier to predict the number of bugs in the raw data.
            before = p(train_DF, test_df,
                       tunings = tunedParams,
                       smoteit = _smote)
            before1 = [0 if b == 0 else 1 for b in before]
            # Use the classifier to predict the number of bugs in the new data.
            after = p(train_DF, newTab,
                      tunings = tunedParams,
                      smoteit = _smote)
            after1 = [0 if a == 0 else 1 for a in after]
#           %  set_trace()

#             write('.')
#             write('Training: '); [write(l + ', ') for l in train[_n]]; print('\n')
#             cd.append(showoff(dataName[n], actual1, after1))
#             print(showoff(dataName[n], actual1, after1))
#             write('Test: '); [write(l) for l in test[_n]],
            out = _Abcd(before = actual1, after = before1)
            print('Win Ratio : %0.2d' % (sum(after1) / sum(actual1)))
#             %print('Prediction accuracy (g)  %.2d' % out[-1])
#             print (out[-1])
            if _smote:
              out.insert(0, p.__doc__ + '(s, Tuned)  ') if t \
              else out.insert(0, p.__doc__ + '(s, Naive)  ')
              abcd[0].append(out)
            else:
              out.insert(0, p.__doc__ + '(raw, Tuned)') if t \
              else out.insert(0, p.__doc__ + '(raw, Naive)')
              abcd[1].append(out)
      print()
#       cd.update({p.__doc__:sorted(cd)})
#       res.update({p.__doc__:(abcd[0][0:reps],
#                            abcd[0][reps:] ,
#                            abcd[1][0:reps],
#                            abcd[1][reps:] ,
#                            )})
    print('```')
#     print(cd)
#     printsk(res)
    print('```')
Exemple #18
0
    def go(self):

        for _ in xrange(self.reps):
            predRows = []
            train_DF = createTbl(self.train[self._n][-2:], isBin=True)
            test_df = createTbl(self.test[self._n], isBin=True)
            actual = Bugs(test_df)
            before = self.pred(train_DF,
                               test_df,
                               tunings=self.tunedParams,
                               smoteit=True)

            for predicted, row in zip(before, test_df._rows):
                tmp = row.cells
                tmp[-2] = predicted
                if predicted > 0:
                    predRows.append(tmp)

            predTest = clone(test_df, rows=predRows)

            if predRows:
                newTab = treatments2(train=self.train[self._n][-2:],
                                     test=self.test[self._n],
                                     test_df=predTest,
                                     extent=self.extent,
                                     far=False,
                                     smote=True,
                                     resample=False,
                                     infoPrune=self.infoPrune,
                                     Prune=self.Prune).main()
            else:
                newTab = treatments2(train=self.train[self._n][-2:],
                                     test=self.test[self._n],
                                     far=False,
                                     smote=True,
                                     resample=False,
                                     extent=self.extent,
                                     infoPrune=self.infoPrune,
                                     Prune=self.Prune).main()

            after = self.pred(train_DF,
                              newTab,
                              tunings=self.tunedParams,
                              smoteit=True)

            self.out_pred.append(_Abcd(before=actual, after=before))
            delta = cliffs(lst1=Bugs(predTest), lst2=after).delta()
            self.out.append(delta)
        if self.extent == 0:
            append = 'Base'
        else:
            if self.Prune:
                append = str(self.extent) + '_iP(' + str(
                    int(self.infoPrune *
                        100)) + r'%)' if not self.fSelect else str(
                            self.extent) + '_w_iP(' + str(
                                int(self.infoPrune * 100)) + r'%)'
            else:
                append = str(self.extent) if not self.fSelect else str(
                    self.extent) + '_w'

        self.out.insert(0, self.dataName + '_' + append)
        self.out_pred.insert(0, self.dataName)
        print(self.out)
Exemple #19
0
 def depen(self, rows):
   mod = CART(self.train, self.test, tunings=rows, smoteit=False)
   g = _Abcd(before=Bugs(self.test), after=mod, show=False)[-1]
   return g
Exemple #20
0
 def depen(self, rows):
     mod = CART(self.train, self.test, tunings=rows, smoteit=True)
     g = _Abcd(before=Bugs(self.test), after=mod, show=False)[-1]
     return g
Exemple #21
0
def main():
    dir = '../Data'
    from os import walk
    dataName = [Name for _, Name, __ in walk(dir)][0]
    numData = len(dataName)  # Number of data
    Prd = [CART]  # , rforest]  # , adaboost, logit, knn]
    _smoteit = [False]  # , False]
    _tuneit = [False]  # , False]
    cd = []
    abcd = []
    res = {}
    for n in xrange(numData):
        one, two = explore(dir)
        data = [one[i] + two[i] for i in xrange(len(one))]
        print('##', dataName[n])
        for p in Prd:
            #       print(p.__doc__)
            # params = tuner(p, data[0])
            #       print(params)
            train = [dat[0] for dat in withinClass(data[n])]
            test = [dat[1] for dat in withinClass(data[n])]
            reps = 1
            abcd = [[], []]
            for t in _tuneit:
                #         print('### Tuning') if t else print('### No Tuning')
                for _smote in _smoteit:
                    #           print('### SMOTE-ing') if _smote else print('### No SMOTE-ing')
                    #       print('```')
                    #          for _n in xrange(0):
                    #          set_trace()
                    _n = -1
                    # Training data
                    for _ in xrange(reps):
                        train_DF = createTbl(train[_n])
                        #            set_trace()
                        # Testing data
                        test_df = createTbl(test[_n])
                        # Tune?
                        tunedParams = None
                        #             tunedParams = None if not t else params
                        # Find and apply contrast sets
                        newTab = treatments(train=train[_n],
                                            test=test[_n],
                                            verbose=False,
                                            smoteit=True)

                        # Actual bugs
                        actual = Bugs(test_df)
                        actual1 = [0 if a == 0 else 1 for a in actual]
                        # Use the classifier to predict the number of bugs in the raw data.
                        before = p(train_DF,
                                   test_df,
                                   tunings=tunedParams,
                                   smoteit=_smote)
                        before1 = [0 if b == 0 else 1 for b in before]
                        # Use the classifier to predict the number of bugs in the new data.
                        after = p(train_DF,
                                  newTab,
                                  tunings=tunedParams,
                                  smoteit=_smote)
                        after1 = [0 if a == 0 else 1 for a in after]
                        #           %  set_trace()

                        #             write('.')
                        #             write('Training: '); [write(l + ', ') for l in train[_n]]; print('\n')
                        #             cd.append(showoff(dataName[n], actual1, after1))
                        #             print(showoff(dataName[n], actual1, after1))
                        #             write('Test: '); [write(l) for l in test[_n]],
                        out = _Abcd(before=actual1, after=before1)
                        print('Win Ratio : %0.2d' %
                              (sum(after1) / sum(actual1)))
                        #             %print('Prediction accuracy (g)  %.2d' % out[-1])
                        #             print (out[-1])
                        if _smote:
                            out.insert(0, p.__doc__ + '(s, Tuned)  ') if t \
                            else out.insert(0, p.__doc__ + '(s, Naive)  ')
                            abcd[0].append(out)
                        else:
                            out.insert(0, p.__doc__ + '(raw, Tuned)') if t \
                            else out.insert(0, p.__doc__ + '(raw, Naive)')
                            abcd[1].append(out)
            print()


#       cd.update({p.__doc__:sorted(cd)})
#       res.update({p.__doc__:(abcd[0][0:reps],
#                            abcd[0][reps:] ,
#                            abcd[1][0:reps],
#                            abcd[1][reps:] ,
#                            )})
        print('```')
        #     print(cd)
        #     printsk(res)
        print('```')