예제 #1
0
  def main(self, name='Apache', reps=20):
    out_xtrees = ['xtrees']
    out_HOW = ['HOW']
    out_cart = ['CART']
    out_basln = ['Base']
    out_baslnFss = ['Base+FSS']
    for _ in xrange(reps):
      data = self.explorer(name)
      for d in data:
        if name == d[0].strip().split('/')[-2]:
          #           set_trace()
          train = [d[0] + d[1][1]]
          test = [d[0] + d[1][0]]
#           set_trace()
          train_df = formatData(createTbl(train, _smote=False, isBin=False))
          test_df = formatData(createTbl(test, _smote=False, isBin=False))
          actual = test_df[test_df.columns[-2]].astype('float32').tolist()
          before = predictor(train=train_df, test=test_df).rforest()

          "Apply Different Planners"

          xTrees = xtrees(train=train,
                          test=test,
                          bin=False,
                          majority=True).main()

          cart = xtrees(train=train,
                        test=test,
                        bin=False,
                        majority=False).main()

          how = HOW(name)
          baseln = strawman(
              train=train,
              test=test).main(
              config=True)

          baselnFss = strawman(
              train=train,
              test=test,
              prune=True).main(config=True)

          after = lambda newTab: predictor(
              train=train_df,
              test=formatData(newTab)).rforest()
          frac = lambda aft: sum(aft) / sum(before)
    #       set_trace()
          out_xtrees.append(frac(after(xTrees)))
          out_cart.append(frac(after(cart)))
          out_HOW.extend(how)
          out_basln.append(frac(after(baseln)))
          out_baslnFss.append(frac(after(baselnFss)))

    return [out_xtrees, out_cart, out_HOW, out_basln, out_baslnFss]
예제 #2
0
    def main(self, name='Apache', reps=20):
        out_xtrees = ['xtrees']
        out_HOW = ['HOW']
        out_cart = ['CART']
        out_basln = ['Base']
        out_baslnFss = ['Base+FSS']
        for _ in xrange(reps):
            data = self.explorer(name)
            for d in data:
                if name == d[0].strip().split('/')[-2]:
                    #           set_trace()
                    train = [d[0] + d[1][1]]
                    test = [d[0] + d[1][0]]
                    #           set_trace()
                    train_df = formatData(
                        createTbl(train, _smote=False, isBin=False))
                    test_df = formatData(
                        createTbl(test, _smote=False, isBin=False))
                    actual = test_df[test_df.columns[-2]].astype(
                        'float32').tolist()
                    before = predictor(train=train_df, test=test_df).rforest()

                    "Apply Different Planners"

                    xTrees = xtrees(train=train,
                                    test=test,
                                    bin=False,
                                    majority=True).main()

                    cart = xtrees(train=train,
                                  test=test,
                                  bin=False,
                                  majority=False).main()

                    how = HOW(name)
                    baseln = strawman(train=train, test=test).main(config=True)

                    baselnFss = strawman(train=train, test=test,
                                         prune=True).main(config=True)

                    after = lambda newTab: predictor(
                        train=train_df, test=formatData(newTab)).rforest()
                    frac = lambda aft: sum(aft) / sum(before)
                    #       set_trace()
                    out_xtrees.append(frac(after(xTrees)))
                    out_cart.append(frac(after(cart)))
                    out_HOW.extend(how)
                    out_basln.append(frac(after(baseln)))
                    out_baslnFss.append(frac(after(baselnFss)))

        return [out_xtrees, out_cart, out_HOW, out_basln, out_baslnFss]
예제 #3
0
  def main(self, name='Apache', reps=20):
    rseed(1)
    for planner in ['DTREE', 'CD+FS', 'CD', 'BIC']:
      out = [planner]
      after = lambda newTab: predictor(
          train=train_df,
          test=formatData(newTab)).rforest()

      frac = lambda aft: (1 - sum(aft) / sum(before))

      data = self.explorer(name)
      for d in data:
        if name == d[0].strip().split('/')[-2]:
          #           set_trace()
          train = [d[0] + d[1][1]]
          test = [d[0] + d[1][0]]
#           set_trace()
          train_df = formatData(createTbl(train, _smote=False, isBin=False))
          test_df = formatData(createTbl(test, _smote=False, isBin=False))
          valid = [
              isValid(
                  new.cells, name=name) for new in createTbl(
                  test,
                  _smote=False,
                  isBin=False)._rows]
          actual = test_df[test_df.columns[-2]].astype('float32').tolist()
          before = predictor(train=train_df, test=test_df).rforest()
          for _ in xrange(reps):
            newTab = None  # Just so I am sure, there isn't any residue.
            "Apply Different Planners"
            if planner == 'xtrees':
              newTab = xtrees(train=train,
                              test=test,
                              bin=False,
                              majority=True,
                              name=name).main()
            if planner == 'DTREE':
              newTab = xtrees(train=train,
                              test=test,
                              bin=False,
                              majority=False,
                              name=name).main()
              valid = [isValid(new.cells, name=name) for new in newTab._rows]
#               set_trace()
            if planner == 'BIC':
              newTab = HOW(name)
              valid = [isValid(new.cells, name=name) for new in newTab._rows]
#               set_trace()
            if planner == 'CD':
              newTab = strawman(name=name,
                                train=train,
                                test=test).main(mode="config")
              valid = [isValid(new.cells, name=name) for new in newTab._rows]
#               set_trace()
            if planner == 'CD+FS':
              newTab = strawman(name=name,
                                train=train,
                                test=test,
                                prune=True).main(mode="config")
              valid = [isValid(new.cells, name=name) for new in newTab._rows]
#               set_trace()
            try:
              out.append(frac(after(newTab)))
            except:
              set_trace()

      yield out
예제 #4
0
  def deltas(self, name, planner):
    predRows = []
    delta = []
    data = self.explorer(name)
    rows = lambda newTab: map(lambda r: r.cells[:-2], newTab._rows)
    for d in data:
      if name == d[0].strip().split('/')[-2]:
        train = [d[0] + '/' + d[1][1]]
        test = [d[0] + '/' + d[1][0]]
        train_DF = createTbl(train, isBin=False)
        test_df = createTbl(test, isBin=False)
        self.headers = train_DF.headers
        write2file(rows(test_df), fname='before_cpm')  # save file

        """
        Apply Learner
        """
        if planner == 'xtrees':
          newTab = xtrees(train=train,
                          test=test,
                          bin=False,
                          majority=True,
                          name=name).main(justDeltas=True)
          delta.append(
              [d for d in self.delta1(newTab, train_DF.headers, norm=len(predRows))])
          return np.array(
              np.sum(delta[0], axis=0), dtype='float') / np.size(newTab, axis=0)
        if planner == 'DTREE':
          newTab = xtrees(train=train,
                          test=test,
                          bin=False,
                          majority=False,
                          name=name).main(justDeltas=True)
          delta.append(
              [d for d in self.delta1(newTab, train_DF.headers, norm=len(predRows))])
          return np.array(
              np.sum(delta[0], axis=0), dtype='float') / np.size(newTab, axis=0)
        if planner == 'BIC':
          newTab = HOW(name, justDeltas=True)
          delta.append(
              [d for d in self.delta1(newTab, train_DF.headers, norm=len(predRows))])
          return np.array(
              np.sum(delta[0], axis=0), dtype='float') / np.size(newTab, axis=0)

        if planner == 'CD':
          newTab = strawman(name=name,
                            train=train,
                            test=test).main(mode="config", justDeltas=True)
          delta.append(
              [d for d in self.delta1(newTab, train_DF.headers, norm=len(predRows))])
          return np.array(
              np.sum(delta[0], axis=0), dtype='float') / np.size(newTab, axis=0)
        if planner == 'CD+FS':
          newTab = strawman(name=name,
                            train=train,
                            test=test,
                            prune=True).main(mode="config", justDeltas=True)
          delta.append(
              [d for d in self.delta1(newTab, train_DF.headers, norm=len(predRows))])
          return np.array(
              np.sum(delta[0], axis=0), dtype='float') / np.size(newTab, axis=0)