def _RF(): "Test RF" dir = "../Data" one, two = explore(dir) # Training data train_DF = createTbl([one[0][0]]) # Test data test_df = createTbl([one[0][1]]) actual = Bugs(test_df) preds = rforest(train_DF, test_df, mss=6, msl=8, max_feat=4, n_est=5756, smoteit=False) print _Abcd(before=actual, after=preds, show=False)[-1]
def _logit(): "Test LOGIT" dir = './Data' one, two = explore(dir) # Training data train_DF = createTbl(one[0]) # Test data test_df = createTbl(two[0]) actual = Bugs(test_df) preds = logit(train_DF, test_df) set_trace() _Abcd(train=actual, test=preds, verbose=True)
def _adaboost(): "Test AdaBoost" dir = "./Data" one, two = explore(dir) # Training data train_DF = createTbl(one[0]) # Test data test_df = createTbl(two[0]) actual = Bugs(test_df) preds = adaboost(train_DF, test_df) set_trace() _Abcd(train=actual, test=preds, verbose=True)
def _where2pred(): "Test where2" dir = '../Data' one, two = explore(dir) # Training data train_DF = createTbl(one[0]) # Test data test_df = createTbl(two[0]) actual = Bugs(test_df) preds = where2prd(train_DF, test_df) for a, b in zip(actual, preds): print a, b set_trace() print _Abcd(before = actual, after = preds, show = False)
def _RF(): "Test RF" dir = '../Data' one, two = explore(dir) # Training data train_DF = createTbl([one[0][0]]) # Test data test_df = createTbl([one[0][1]]) actual = Bugs(test_df) preds = rforest(train_DF, test_df, mss=6, msl=8, max_feat=4, n_est=5756, smoteit=False) print _Abcd(before=actual, after=preds, show=False)[-1]
def _where2pred(): "Test where2" dir = '../Data' one, two = explore(dir) # Training data train_DF = createTbl(one[0]) # Test data test_df = createTbl(two[0]) actual = Bugs(test_df) preds = where2prd(train_DF, test_df) for a, b in zip(actual, preds): print a, b set_trace() print _Abcd(before=actual, after=preds, show=False)
def go(self): for _ in xrange(self.reps): predRows = [] train_DF = createTbl(self.train[self._n], isBin=True) test_df = createTbl(self.test[self._n], isBin=True) actual = np.array(Bugs(test_df)) before = self.pred(train_DF, test_df, tunings=self.tunedParams, smoteit=True) predRows = [row.cells for predicted , row in zip(before, test_df._rows) if predicted > 0] predTest = clone(test_df, rows=predRows) newTab = xtrees(train=self.train[self._n] , test_DF=predTest, bin=False).main() after = self.pred(train_DF, newTab, tunings=self.tunedParams, smoteit=True) self.out_pred.append(_Abcd(before=actual, after=before)) # set_trace() delta = cliffs(lst2=Bugs(predTest), lst1=after).delta() frac = sum([0 if a < 1 else 1 for a in after]) / \ sum([0 if b < 1 else 1 for b in before]) self.out.append(frac) print(self.out)
def depen(self, rows): mod = rforest( self.train, self.test, tunings=rows # n_est, max_feat, mss, msl , smoteit=True) g = _Abcd(before=Bugs(self.test), after=mod, show=False)[-1] return g
def depen(self, row): # My where2pred() takes data in string format. Ex: '../Data/ant/ant-1.6.csv' self.where = defaults().update(minSize = row[4] , depthMin = int(row[5]) , depthMax = int(row[6]) , prune = row[7]>0.5) self.tree.infoPrune = row[1] self.tree.m = int(row[2]) self.tree.n = int(row[3]) self.tree.prune = row[8]>0.5 actual = Bugs(createTbl([self.test], isBin = True)) preds = where2prd(self.train, [self.test], tunings = [self.where, self.tree], thresh = row[0]) return _Abcd(before = actual, after = preds, show = False)[-1]
def _where2pred(): "Test where2" dir = '../Data' one, two = explore(dir) # set_trace() # Training data train = one[0][:-1] # Test data test = [one[0][-1]] actual = Bugs(createTbl(test, isBin=True)) preds = where2prd(train, test) # for a, b in zip(actual, preds): print a, b # set_trace() return _Abcd(before=actual, after=preds, show=False)[-1]
def depen(self, row): # My where2pred() takes data in string format. Ex: # '../Data/ant/ant-1.6.csv' self.where = defaults().update( minSize=row[4], depthMin=int( row[5]), depthMax=int( row[6]), prune=row[7] > 0.5) self.tree.infoPrune = row[1] self.tree.m = int(row[2]) self.tree.n = int(row[3]) self.tree.prune = row[8] > 0.5 actual = Bugs(createTbl([self.test], isBin=True)) preds = where2prd( self.train, [ self.test], tunings=[ self.where, self.tree], thresh=row[0]) return _Abcd(before=actual, after=preds, show=False)[-1]
def go(self): for _ in xrange(self.reps): predRows = [] train_DF = createTbl(self.train[self._n][-2:], isBin=True) test_df = createTbl(self.test[self._n], isBin=True) actual = Bugs(test_df) before = self.pred(train_DF, test_df, tunings=self.tunedParams, smoteit=True) for predicted, row in zip(before, test_df._rows): tmp = row.cells tmp[-2] = predicted if predicted > 0: predRows.append(tmp) predTest = clone(test_df, rows=predRows) if predRows: newTab = treatments2( train=self.train[self._n][-2:], test=self.test[self._n], test_df=predTest, extent=self.extent, far=False, smote=True, resample=False, infoPrune=self.infoPrune, Prune=self.Prune).main() else: newTab = treatments2( train=self.train[ self._n][-2:], test=self.test[ self._n], far=False, smote=True, resample=False, extent=self.extent, infoPrune=self.infoPrune, Prune=self.Prune).main() after = self.pred(train_DF, newTab, tunings=self.tunedParams, smoteit=True) self.out_pred.append(_Abcd(before=actual, after=before)) delta = cliffs(lst1=Bugs(predTest), lst2=after).delta() self.out.append(delta) if self.extent == 0: append = 'Base' else: if self.Prune: append = str( self.extent) + '_iP(' + str( int(self.infoPrune * 100)) + r'%)' if not self.fSelect else str( self.extent) + '_w_iP(' + str( int(self.infoPrune * 100)) + r'%)' else: append = str( self.extent) if not self.fSelect else str( self.extent) + '_w' self.out.insert(0, self.dataName + '_' + append) self.out_pred.insert(0, self.dataName) print(self.out)
def main(): dir = '../Data' from os import walk dataName = [Name for _, Name, __ in walk(dir)][0] numData = len(dataName) # Number of data Prd = [CART] # , rforest] # , adaboost, logit, knn] _smoteit = [True] # , False] _tuneit = [False] cd = {} abcd = [] res = {} for n in xrange(numData): out11 = [] outA1 = [] out1 = [] outFar = [] outNear = [] outa = [] one, two = explore(dir) data = [one[i] + two[i] for i in xrange(len(one))] print('##', dataName[n]) for p in Prd: train = [dat[0] for dat in withinClass(data[n])] test = [dat[1] for dat in withinClass(data[n])] reps = 10 abcd = [[], []] for t in _tuneit: tunedParams = None if not t else params print('### Tuning') if t else print('### No Tuning') for _smote in _smoteit: # for _n in xrange(0): _n = -1 # Training data for _ in xrange(reps): train_DF = createTbl(train[_n], isBin=True) # set_trace() # Testing data test_df = createTbl(test[_n], isBin=True) predRows = [] # Tune? actual = Bugs(test_df) before = p(train_DF, test_df, tunings=tunedParams, smoteit=True) tunedParams = None if not t else tuner(p, train[_n]) for predicted, row in zip(before, test_df._rows): tmp = row.cells tmp[-2] = predicted if predicted > 0: predRows.append(tmp) predTest = clone(test_df, rows=predRows) # Find and apply contrast sets # newTab = treatments(train = train[_n], # test = test[_n], # verbose = False, # smoteit = False).main() newTab_near = treatments2(train=train[_n], far=False, test=test[_n] # ).main() , test_df=predTest).main() \ if predRows \ else treatments2(train=train[_n], test=test[_n]).main() newTab_far = treatments2(train=train[_n], test=test[_n] # ).main() , test_df=predTest).main() \ if predRows \ else treatments2(train=train[_n], test=test[_n]).main() after_far = p(train_DF, newTab_far, tunings=tunedParams, smoteit=True) after_near = p(train_DF, newTab_near, tunings=tunedParams, smoteit=True) # print(showoff(dataName[n], before, after)) outa.append(_Abcd(before=actual, after=before)) # set_trace() cliffsFar = cliffsdelta(Bugs(predTest), after_far) cliffsNear = cliffsdelta(Bugs(predTest), after_near) # print(cliffsDelta(Bugs(predTest), after)) # print('Gain = %1.2f' % float(\ # (sum(Bugs(predTest)) - sum(after)) / sum(Bugs(predTest)) * 100), r'%') outFar.append(cliffsFar) outNear.append(cliffsNear) # out1.append(float((sum(before) - sum(after)) / sum(before) * 100)) # out1 = [o for o in out1 if np.isfinite(o)] outNear.insert(0, dataName[n] + '_Far') outFar.insert(0, dataName[n] + '_Near') outa.insert(0, dataName[n]) out11.extend([outNear, outFar]) outA1.append(outa) try: print('```') rdivDemo(out11, isLatex=False) # rdivDemo(outA1, isLatex = False) print('```') except IndexError: pass
def depen(self, rows): mod = rforest(self.train, self.test , tunings = rows # n_est, max_feat, mss, msl , smoteit = True) g = _Abcd(before = Bugs(self.test), after = mod, show = False)[-1] return g
def main(): dir = '../Data' from os import walk dataName = [Name for _, Name, __ in walk(dir)][0] numData = len(dataName) # Number of data Prd = [CART] # , rforest] # , adaboost, logit, knn] _smoteit = [False] # , False] _tuneit = [False] # , False] cd = [] abcd = [] res = {} for n in xrange(numData): one, two = explore(dir) data = [one[i] + two[i] for i in xrange(len(one))]; print('##', dataName[n]) for p in Prd: # print(p.__doc__) # params = tuner(p, data[0]) # print(params) train = [dat[0] for dat in withinClass(data[n])] test = [dat[1] for dat in withinClass(data[n])] reps = 1 abcd = [[], []]; for t in _tuneit: # print('### Tuning') if t else print('### No Tuning') for _smote in _smoteit: # print('### SMOTE-ing') if _smote else print('### No SMOTE-ing') # print('```') # for _n in xrange(0): # set_trace() _n = -1 # Training data for _ in xrange(reps): train_DF = createTbl(train[_n]) # set_trace() # Testing data test_df = createTbl(test[_n]) # Tune? tunedParams = None # tunedParams = None if not t else params # Find and apply contrast sets newTab = treatments(train = train[_n], test = test[_n], verbose = False, smoteit = True) # Actual bugs actual = Bugs(test_df) actual1 = [0 if a == 0 else 1 for a in actual] # Use the classifier to predict the number of bugs in the raw data. before = p(train_DF, test_df, tunings = tunedParams, smoteit = _smote) before1 = [0 if b == 0 else 1 for b in before] # Use the classifier to predict the number of bugs in the new data. after = p(train_DF, newTab, tunings = tunedParams, smoteit = _smote) after1 = [0 if a == 0 else 1 for a in after] # % set_trace() # write('.') # write('Training: '); [write(l + ', ') for l in train[_n]]; print('\n') # cd.append(showoff(dataName[n], actual1, after1)) # print(showoff(dataName[n], actual1, after1)) # write('Test: '); [write(l) for l in test[_n]], out = _Abcd(before = actual1, after = before1) print('Win Ratio : %0.2d' % (sum(after1) / sum(actual1))) # %print('Prediction accuracy (g) %.2d' % out[-1]) # print (out[-1]) if _smote: out.insert(0, p.__doc__ + '(s, Tuned) ') if t \ else out.insert(0, p.__doc__ + '(s, Naive) ') abcd[0].append(out) else: out.insert(0, p.__doc__ + '(raw, Tuned)') if t \ else out.insert(0, p.__doc__ + '(raw, Naive)') abcd[1].append(out) print() # cd.update({p.__doc__:sorted(cd)}) # res.update({p.__doc__:(abcd[0][0:reps], # abcd[0][reps:] , # abcd[1][0:reps], # abcd[1][reps:] , # )}) print('```') # print(cd) # printsk(res) print('```')
def go(self): for _ in xrange(self.reps): predRows = [] train_DF = createTbl(self.train[self._n][-2:], isBin=True) test_df = createTbl(self.test[self._n], isBin=True) actual = Bugs(test_df) before = self.pred(train_DF, test_df, tunings=self.tunedParams, smoteit=True) for predicted, row in zip(before, test_df._rows): tmp = row.cells tmp[-2] = predicted if predicted > 0: predRows.append(tmp) predTest = clone(test_df, rows=predRows) if predRows: newTab = treatments2(train=self.train[self._n][-2:], test=self.test[self._n], test_df=predTest, extent=self.extent, far=False, smote=True, resample=False, infoPrune=self.infoPrune, Prune=self.Prune).main() else: newTab = treatments2(train=self.train[self._n][-2:], test=self.test[self._n], far=False, smote=True, resample=False, extent=self.extent, infoPrune=self.infoPrune, Prune=self.Prune).main() after = self.pred(train_DF, newTab, tunings=self.tunedParams, smoteit=True) self.out_pred.append(_Abcd(before=actual, after=before)) delta = cliffs(lst1=Bugs(predTest), lst2=after).delta() self.out.append(delta) if self.extent == 0: append = 'Base' else: if self.Prune: append = str(self.extent) + '_iP(' + str( int(self.infoPrune * 100)) + r'%)' if not self.fSelect else str( self.extent) + '_w_iP(' + str( int(self.infoPrune * 100)) + r'%)' else: append = str(self.extent) if not self.fSelect else str( self.extent) + '_w' self.out.insert(0, self.dataName + '_' + append) self.out_pred.insert(0, self.dataName) print(self.out)
def depen(self, rows): mod = CART(self.train, self.test, tunings=rows, smoteit=False) g = _Abcd(before=Bugs(self.test), after=mod, show=False)[-1] return g
def depen(self, rows): mod = CART(self.train, self.test, tunings=rows, smoteit=True) g = _Abcd(before=Bugs(self.test), after=mod, show=False)[-1] return g
def main(): dir = '../Data' from os import walk dataName = [Name for _, Name, __ in walk(dir)][0] numData = len(dataName) # Number of data Prd = [CART] # , rforest] # , adaboost, logit, knn] _smoteit = [False] # , False] _tuneit = [False] # , False] cd = [] abcd = [] res = {} for n in xrange(numData): one, two = explore(dir) data = [one[i] + two[i] for i in xrange(len(one))] print('##', dataName[n]) for p in Prd: # print(p.__doc__) # params = tuner(p, data[0]) # print(params) train = [dat[0] for dat in withinClass(data[n])] test = [dat[1] for dat in withinClass(data[n])] reps = 1 abcd = [[], []] for t in _tuneit: # print('### Tuning') if t else print('### No Tuning') for _smote in _smoteit: # print('### SMOTE-ing') if _smote else print('### No SMOTE-ing') # print('```') # for _n in xrange(0): # set_trace() _n = -1 # Training data for _ in xrange(reps): train_DF = createTbl(train[_n]) # set_trace() # Testing data test_df = createTbl(test[_n]) # Tune? tunedParams = None # tunedParams = None if not t else params # Find and apply contrast sets newTab = treatments(train=train[_n], test=test[_n], verbose=False, smoteit=True) # Actual bugs actual = Bugs(test_df) actual1 = [0 if a == 0 else 1 for a in actual] # Use the classifier to predict the number of bugs in the raw data. before = p(train_DF, test_df, tunings=tunedParams, smoteit=_smote) before1 = [0 if b == 0 else 1 for b in before] # Use the classifier to predict the number of bugs in the new data. after = p(train_DF, newTab, tunings=tunedParams, smoteit=_smote) after1 = [0 if a == 0 else 1 for a in after] # % set_trace() # write('.') # write('Training: '); [write(l + ', ') for l in train[_n]]; print('\n') # cd.append(showoff(dataName[n], actual1, after1)) # print(showoff(dataName[n], actual1, after1)) # write('Test: '); [write(l) for l in test[_n]], out = _Abcd(before=actual1, after=before1) print('Win Ratio : %0.2d' % (sum(after1) / sum(actual1))) # %print('Prediction accuracy (g) %.2d' % out[-1]) # print (out[-1]) if _smote: out.insert(0, p.__doc__ + '(s, Tuned) ') if t \ else out.insert(0, p.__doc__ + '(s, Naive) ') abcd[0].append(out) else: out.insert(0, p.__doc__ + '(raw, Tuned)') if t \ else out.insert(0, p.__doc__ + '(raw, Naive)') abcd[1].append(out) print() # cd.update({p.__doc__:sorted(cd)}) # res.update({p.__doc__:(abcd[0][0:reps], # abcd[0][reps:] , # abcd[1][0:reps], # abcd[1][reps:] , # )}) print('```') # print(cd) # printsk(res) print('```')