コード例 #1
0
ファイル: vapp.py プロジェクト: queenstina/Transfer-Learning
  def kFoldCrossVal(self, data, fSel, ext, _prune, _info, method, k=5):
    acc, md, auc = [], [], []
    bef, aft = [], []
    chunks = lambda l, n: [l[i:i + n] for i in range(0, len(l), int(n))]
    from random import shuffle
    rows = data._rows
    shuffle(rows)
    sqe = chunks(rows, int(len(rows) / k))
    if len(sqe) > k:
      sqe = sqe[:-2] + [sqe[-2] + sqe[-1]]
    for indx in xrange(k):
      try:
        testRows = sqe.pop(indx)
      except:
        set_trace()
      trainRows = self.flatten([s for s in sqe if not s == testRows])
      train, test = clone(data, rows=[
          i.cells for i in trainRows]), clone(data, rows=[
              i.cells for i in testRows])

      train_df = formatData(train)
      test_df = formatData(test)
      actual = test_df[
          test_df.columns[-2]].astype('float32').tolist()
      before = predictor(train=train_df, test=test_df).rforest()
      _, __, after = self.planner(
          train, test, fSel, ext, _prune, _info, method)
      bef.extend(before)
      aft.extend(after)
      md.append((median(before) - median(after)) * 100 / median(before))
      auc.append((sum(before) - sum(after)) * 100 / sum(before))
      acc.extend(
          [(1 - abs(b - a) / a) * 100 for b, a in zip(before, actual)])
      sqe.insert(k, testRows)
    return acc, auc, md, bef, aft
コード例 #2
0
ファイル: vapp.py プロジェクト: queenstina/Transfer-Learning
 def mainraw(self, name='Apache', reps=10, fSel=True,
             ext=0.5, _prune=False, _info=0.25, method='best'):
   data = self.explorer(name)
   before, after = [], []
   for _ in xrange(reps):
     for d in data:
       if name == d[0].strip().split('/')[-1]:
         train = createTbl([d[0] + '/' + d[1][1]], isBin=False)
         test = createTbl([d[0] + '/' + d[1][0]], isBin=False)
         train_df = formatData(train)
         test_df = formatData(test)
         actual = test_df[
             test_df.columns[-2]].astype('float32').tolist()
         before.append(predictor(train=train_df, test=test_df).rforest())
 #           set_trace()
         newTab = WHAT(
             train=[d[0] + '/' + d[1][1]],
             test=[d[0] + '/' + d[1][0]],
             train_df=train,
             bin=True,
             test_df=test,
             extent=ext,
             fSelect=fSel,
             far=False,
             infoPrune=_info,
             method=method,
             Prune=_prune).main()
         newTab_df = formatData(newTab)
         after.append(predictor(train=train_df, test=newTab_df).rforest())
   return before, after
コード例 #3
0
ファイル: vapp.py プロジェクト: pfjob09/Transfer-Learning
    def kFoldCrossVal(self, data, fSel, ext, _prune, _info, method, k=5):
        acc, md, auc = [], [], []
        bef, aft = [], []
        chunks = lambda l, n: [l[i:i + n] for i in range(0, len(l), int(n))]
        from random import shuffle, sample
        rows = data._rows
        shuffle(rows)
        sqe = chunks(rows, int(len(rows) / k))
        if len(sqe) > k:
            sqe = sqe[:-2] + [sqe[-2] + sqe[-1]]
        for indx in xrange(k):
            try:
                testRows = sqe.pop(indx)
            except:
                set_trace()
            trainRows = self.flatten([s for s in sqe if not s == testRows])
            train, test = clone(data,
                                rows=[i.cells for i in trainRows]), clone(
                                    data, rows=[i.cells for i in testRows])

            train_df = formatData(train)
            test_df = formatData(test)
            actual = test_df[test_df.columns[-2]].astype('float32').tolist()
            before = predictor(train=train_df, test=test_df).rforest()
            _, __, after = self.planner(train, test, fSel, ext, _prune, _info,
                                        method)
            bef.extend(before)
            aft.extend(after)
            md.append((median(before) - median(after)) * 100 / median(before))
            auc.append((sum(before) - sum(after)) * 100 / sum(before))
            acc.extend([(1 - abs(b - a) / a) * 100
                        for b, a in zip(before, actual)])
            sqe.insert(k, testRows)
        return acc, auc, md, bef, aft
コード例 #4
0
ファイル: vapp.py プロジェクト: queenstina/Transfer-Learning
  def planner(self, train, test, fSel, ext, _prune,
              _info, name, method='best', justDeltas=False):
    train_df = formatData(train)
    test_df = formatData(test)
    actual = test_df[
        test_df.columns[-2]].astype('float32').tolist()
    before = predictor(train=train_df, test=test_df).rforest()
#           set_trace()
    newTab = WHAT(
        name=name,
        train=None,
        test=None,
        train_df=train,
        bin=True,
        test_df=test,
        extent=ext,
        fSelect=fSel,
        far=False,
        infoPrune=_info,
        method=method,
        Prune=_prune).main(justDeltas=justDeltas)
#     newTab_df = formatData(newTab)
    after = predictor(train=train_df, test=test_df).rforest()

    return actual, before, after, newTab
コード例 #5
0
 def planner(self, train, test):
     train_df = formatData(createTbl(train, _smote=False, isBin=False))
     test_df = formatData(createTbl(test, _smote=False, isBin=False))
     actual = test_df[test_df.columns[-2]].astype('float32').tolist()
     before = predictor(train=train_df, test=test_df).rforest()
     #           set_trace()
     newTab = HOW(train=train, test=test, bin=False).main()
     newTab_df = formatData(newTab)
     after = predictor(train=train_df, test=newTab_df).rforest()
     return newTab
コード例 #6
0
ファイル: config.py プロジェクト: pfjob09/Transfer-Learning
  def main(self, name='Apache', reps=20):
    out_xtrees = ['xtrees']
    out_HOW = ['HOW']
    out_cart = ['CART']
    out_basln = ['Base']
    out_baslnFss = ['Base+FSS']
    for _ in xrange(reps):
      data = self.explorer(name)
      for d in data:
        if name == d[0].strip().split('/')[-2]:
          #           set_trace()
          train = [d[0] + d[1][1]]
          test = [d[0] + d[1][0]]
#           set_trace()
          train_df = formatData(createTbl(train, _smote=False, isBin=False))
          test_df = formatData(createTbl(test, _smote=False, isBin=False))
          actual = test_df[test_df.columns[-2]].astype('float32').tolist()
          before = predictor(train=train_df, test=test_df).rforest()

          "Apply Different Planners"

          xTrees = xtrees(train=train,
                          test=test,
                          bin=False,
                          majority=True).main()

          cart = xtrees(train=train,
                        test=test,
                        bin=False,
                        majority=False).main()

          how = HOW(name)
          baseln = strawman(
              train=train,
              test=test).main(
              config=True)

          baselnFss = strawman(
              train=train,
              test=test,
              prune=True).main(config=True)

          after = lambda newTab: predictor(
              train=train_df,
              test=formatData(newTab)).rforest()
          frac = lambda aft: sum(aft) / sum(before)
    #       set_trace()
          out_xtrees.append(frac(after(xTrees)))
          out_cart.append(frac(after(cart)))
          out_HOW.extend(how)
          out_basln.append(frac(after(baseln)))
          out_baslnFss.append(frac(after(baselnFss)))

    return [out_xtrees, out_cart, out_HOW, out_basln, out_baslnFss]
コード例 #7
0
ファイル: config.py プロジェクト: pfjob09/Transfer-Learning
  def planner(self, train, test):
    train_df = formatData(createTbl(train, _smote=False, isBin=False))
    test_df = formatData(createTbl(test, _smote=False, isBin=False))
    actual = test_df[
        test_df.columns[-2]].astype('float32').tolist()
    before = predictor(train=train_df, test=test_df).rforest()
#           set_trace()
    newTab = HOW(
        train=train,
        test=test, bin=False).main()
    newTab_df = formatData(newTab)
    after = predictor(train=train_df, test=newTab_df).rforest()
    return newTab
コード例 #8
0
    def main(self, name='Apache', reps=20):
        out_xtrees = ['xtrees']
        out_HOW = ['HOW']
        out_cart = ['CART']
        out_basln = ['Base']
        out_baslnFss = ['Base+FSS']
        for _ in xrange(reps):
            data = self.explorer(name)
            for d in data:
                if name == d[0].strip().split('/')[-2]:
                    #           set_trace()
                    train = [d[0] + d[1][1]]
                    test = [d[0] + d[1][0]]
                    #           set_trace()
                    train_df = formatData(
                        createTbl(train, _smote=False, isBin=False))
                    test_df = formatData(
                        createTbl(test, _smote=False, isBin=False))
                    actual = test_df[test_df.columns[-2]].astype(
                        'float32').tolist()
                    before = predictor(train=train_df, test=test_df).rforest()

                    "Apply Different Planners"

                    xTrees = xtrees(train=train,
                                    test=test,
                                    bin=False,
                                    majority=True).main()

                    cart = xtrees(train=train,
                                  test=test,
                                  bin=False,
                                  majority=False).main()

                    how = HOW(name)
                    baseln = strawman(train=train, test=test).main(config=True)

                    baselnFss = strawman(train=train, test=test,
                                         prune=True).main(config=True)

                    after = lambda newTab: predictor(
                        train=train_df, test=formatData(newTab)).rforest()
                    frac = lambda aft: sum(aft) / sum(before)
                    #       set_trace()
                    out_xtrees.append(frac(after(xTrees)))
                    out_cart.append(frac(after(cart)))
                    out_HOW.extend(how)
                    out_basln.append(frac(after(baseln)))
                    out_baslnFss.append(frac(after(baselnFss)))

        return [out_xtrees, out_cart, out_HOW, out_basln, out_baslnFss]
コード例 #9
0
ファイル: vapp.py プロジェクト: pfjob09/Transfer-Learning
 def planner(self, train, test, fSel, ext, _prune, _info, method='best'):
     train_df = formatData(train)
     test_df = formatData(test)
     actual = test_df[test_df.columns[-2]].astype('float32').tolist()
     before = predictor(train=train_df, test=test_df).rforest()
     #           set_trace()
     newTab = WHAT(train=None,
                   test=None,
                   train_df=train,
                   bin=True,
                   test_df=test,
                   extent=ext,
                   fSelect=fSel,
                   far=False,
                   infoPrune=_info,
                   method=method,
                   Prune=_prune).main()
     newTab_df = formatData(newTab)
     after = predictor(train=train_df, test=newTab_df).rforest()
     return actual, before, after
コード例 #10
0
ファイル: vapp.py プロジェクト: pfjob09/Transfer-Learning
 def mainraw(self,
             name='Apache',
             reps=10,
             fSel=True,
             ext=0.5,
             _prune=False,
             _info=0.25,
             method='best'):
     data = self.explorer(name)
     before, after = [], []
     for _ in xrange(reps):
         for d in data:
             if name == d[0].strip().split('/')[-1]:
                 train = createTbl([d[0] + '/' + d[1][1]], isBin=False)
                 test = createTbl([d[0] + '/' + d[1][0]], isBin=False)
                 train_df = formatData(train)
                 test_df = formatData(test)
                 actual = test_df[test_df.columns[-2]].astype(
                     'float32').tolist()
                 before.append(
                     predictor(train=train_df, test=test_df).rforest())
                 #           set_trace()
                 newTab = WHAT(train=[d[0] + '/' + d[1][1]],
                               test=[d[0] + '/' + d[1][0]],
                               train_df=train,
                               bin=True,
                               test_df=test,
                               extent=ext,
                               fSelect=fSel,
                               far=False,
                               infoPrune=_info,
                               method=method,
                               Prune=_prune).main()
                 newTab_df = formatData(newTab)
                 after.append(
                     predictor(train=train_df, test=newTab_df).rforest())
     return before, after
コード例 #11
0
  def main(self, name='Apache', reps=20):
    rseed(1)
    for planner in ['DTREE', 'CD+FS', 'CD', 'BIC']:
      out = [planner]
      after = lambda newTab: predictor(
          train=train_df,
          test=formatData(newTab)).rforest()

      frac = lambda aft: (1 - sum(aft) / sum(before))

      data = self.explorer(name)
      for d in data:
        if name == d[0].strip().split('/')[-2]:
          #           set_trace()
          train = [d[0] + d[1][1]]
          test = [d[0] + d[1][0]]
#           set_trace()
          train_df = formatData(createTbl(train, _smote=False, isBin=False))
          test_df = formatData(createTbl(test, _smote=False, isBin=False))
          valid = [
              isValid(
                  new.cells, name=name) for new in createTbl(
                  test,
                  _smote=False,
                  isBin=False)._rows]
          actual = test_df[test_df.columns[-2]].astype('float32').tolist()
          before = predictor(train=train_df, test=test_df).rforest()
          for _ in xrange(reps):
            newTab = None  # Just so I am sure, there isn't any residue.
            "Apply Different Planners"
            if planner == 'xtrees':
              newTab = xtrees(train=train,
                              test=test,
                              bin=False,
                              majority=True,
                              name=name).main()
            if planner == 'DTREE':
              newTab = xtrees(train=train,
                              test=test,
                              bin=False,
                              majority=False,
                              name=name).main()
              valid = [isValid(new.cells, name=name) for new in newTab._rows]
#               set_trace()
            if planner == 'BIC':
              newTab = HOW(name)
              valid = [isValid(new.cells, name=name) for new in newTab._rows]
#               set_trace()
            if planner == 'CD':
              newTab = strawman(name=name,
                                train=train,
                                test=test).main(mode="config")
              valid = [isValid(new.cells, name=name) for new in newTab._rows]
#               set_trace()
            if planner == 'CD+FS':
              newTab = strawman(name=name,
                                train=train,
                                test=test,
                                prune=True).main(mode="config")
              valid = [isValid(new.cells, name=name) for new in newTab._rows]
#               set_trace()
            try:
              out.append(frac(after(newTab)))
            except:
              set_trace()

      yield out