def __init__(self, data): self.data = data self.train = createTbl(data[-2:-1], _smote=False, isBin=True, bugThres=1, duplicate=True) self.test = createTbl([data[-1]], isBin=True, bugThres=1)
def main(self): train, test = run(dataName='ant').categorize() train_DF = createTbl(train[-1], isBin=True) test_DF = createTbl(test[-1], isBin=True) before = rforest(train=train_DF, test=test_DF) for _ in xrange(1): clstr = [c for c in self.nodes(train_DF._rows)] newTbl = patches(train=train[-1], test=test[-1], clusters=clstr).deltasCSVWriter(name=self.name)
def __init__( self, train, test, clusters, prune=False, B=0.33, verbose=False): self.train = createTbl(train, isBin=True) self.test = createTbl(test, isBin=True) self.pred = rforest(self.train, self.test, smoteit=True, duplicate=True) self.clusters = clusters self.Prune = prune self.B = B self.mask = self.fWeight() self.write = verbose
def newTable(self, justDeltas=False): if not self.bin: oldRows = [r for r in self.test._rows if abs(r.cells[-2]) > 0] else: oldRows = self.test._rows newRows = [self.patchIt(t) for t in oldRows] if self.write: self.deltasCSVWriter() header = [h.name for h in self.test.headers[:-1]] name = str(randint(0, 1e6)) with open('tmp0.csv', 'w') as csvfile: writer = csv.writer(csvfile, delimiter=',') writer.writerow(header) for el in newRows: writer.writerow(el + [0]) if justDeltas == False: try: new = createTbl(['tmp0.csv']) rm('tmp0.csv') return new except: set_trace() else: return self.change
def __init__( self, train, test, clusters, prune=False, B=0.25 , verbose=False, config=False, models=False, pred=[], name=None): if config or models: self.train = createTbl(train, isBin=False) self.test = createTbl(test, isBin=False) else: self.train = createTbl(train, isBin=True) self.test = createTbl(test, isBin=True) self.name = name self.clusters = clusters self.Prune = prune self.B = B self.mask = self.fWeight() self.write = verbose self.bin = config self.pred = pred self.change = []
def main(self, config=False): if not config: train_DF = createTbl(self.train, isBin=False) test_DF = createTbl(self.test, isBin=False) before = rforest(train=train_DF, test=test_DF) clstr = [c for c in self.nodes(train_DF._rows)] return patches(train=self.train, test=self.test, clusters=clstr, prune=self.prune).newTable() else: train_DF = createTbl(self.train, isBin=False) test_DF = createTbl(self.test, isBin=False) before = rforest2(train=train_DF, test=test_DF) clstr = [c for c in self.nodes(train_DF._rows)] return patches(train=self.train, test=self.test, clusters=clstr, prune=self.prune, bin=True).newTable()
def main(self, mode='defect', justDeltas=False): if mode == "defect": train_DF = createTbl(self.train, isBin=False) test_DF = createTbl(self.test, isBin=False) before = rforest(train=train_DF, test=test_DF) clstr = [c for c in self.nodes(train_DF._rows)] return patches(train=self.train, test=self.test, clusters=clstr, prune=self.prune, pred=before).newTable(justDeltas=justDeltas) elif mode == "models": train_DF = createTbl(self.train, isBin=False) test_DF = createTbl(self.test, isBin=False) before = rforest(train=train_DF, test=test_DF) clstr = [c for c in self.nodes(train_DF._rows)] return patches(train=self.train, test=self.test, clusters=clstr, prune=self.prune, models=True, pred=before).newTable(justDeltas=justDeltas) elif mode == "config": train_DF = createTbl(self.train, isBin=False) test_DF = createTbl(self.test, isBin=False) before = rforest2(train=train_DF, test=test_DF) clstr = [c for c in self.nodes(train_DF._rows)] return patches(train=self.train, test=self.test, clusters=clstr, name=self.name, prune=self.prune, pred=before, config=True).newTable(justDeltas=justDeltas)
def newTable(self): oldRows = [r for r, p in zip(self.test._rows, self.pred) if p > 0] newRows = [self.patchIt(t) for t in oldRows] if self.write: self.deltasCSVWriter() header = [h.name for h in self.test.headers[:-1]] with open('tmp.csv', 'w') as csvfile: writer = csv.writer(csvfile, delimiter=',') writer.writerow(header) for el in newRows: writer.writerow(el + [0]) return createTbl(['tmp.csv'])
def depen(self, row): # My where2pred() takes data in string format. Ex: # '../Data/ant/ant-1.6.csv' self.where = defaults().update( minSize=row[4], depthMin=int( row[5]), depthMax=int( row[6]), prune=row[7] > 0.5) self.tree.infoPrune = row[1] self.tree.m = int(row[2]) self.tree.n = int(row[3]) self.tree.prune = row[8] > 0.5 actual = Bugs(createTbl([self.test], isBin=True)) preds = where2prd( self.train, [ self.test], tunings=[ self.where, self.tree], thresh=row[0]) return _Abcd(before=actual, after=preds, show=False)[-1]
def crossval(self, _s=True, k=2): cv_acc = [' Accuracy'] cv_prec = [' Precision'] cv_sen = ['Sensitivity (Recall)'] cv_spec = [' Specificity'] cv_f = [' f'] cv_g = [' g'] for _ in xrange(k): proj = self.explorer2() data = createTbl(proj, isBin=False, _smote=False) a, b, c, d, e, f = self.kFoldCrossVal(data, k=k, smote=_s) cv_acc.extend(a) cv_sen.extend(b) cv_spec.extend(c) cv_prec.extend(d) cv_f.extend(e) cv_g.extend(f) return cv_acc, cv_sen, cv_spec, cv_prec, cv_f, cv_g
#! /Users/rkrsn/miniconda/bin/python
def __init__(self, data): self.data = data self.train = createTbl(data[:-1]) self.test = createTbl([data[-1]])
def test_smote(): dir = './Data/log.csv' Tbl = methods1.createTbl([dir], _smote=False) newTbl = methods1.createTbl([dir], _smote=True) set_trace() print(len(Tbl._rows), len(newTbl._rows))