def createTbl(data, settings=None, _smote=False, isBin=False, bugThres=2, duplicate=False): """ kwargs: _smote = True/False : SMOTE input data (or not) _isBin = True/False : Reduce bugs to defects/no defects _bugThres = int : Threshold for marking stuff as defective, default = 1. Not defective => Bugs < 1 """ makeaModel = makeAmodel.makeAModel() _r = [] for t in data: m = makeaModel.csv2py(t, _smote=_smote, duplicate=duplicate) _r += m._rows m._rows = _r prepare(m, settings=None) # Initialize all parameters for where2 to run tree = where2(m, m._rows) # Decision tree using where2 tbl = table(t) headerLabel = '=klass' Rows = [] for k, _ in leaves(tree): # for k, _ in leaves(tree): for j in k.val: tmp = j.cells if isBin: tmp[-1] = 0 if tmp[-1] < bugThres else 1 tmp.append('_' + str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) return newTable(tbl, headerLabel, Rows)
def createTbl( data, settings=None, _smote=False, isBin=False, bugThres=2, duplicate=False): """ kwargs: _smote = True/False : SMOTE input data (or not) _isBin = True/False : Reduce bugs to defects/no defects _bugThres = int : Threshold for marking stuff as defective, default = 1. Not defective => Bugs < 1 """ makeaModel = makeAmodel.makeAModel() _r = [] for t in data: m = makeaModel.csv2py(t, _smote=_smote, duplicate=duplicate) _r += m._rows m._rows = _r prepare(m, settings=None) # Initialize all parameters for where2 to run tree = where2(m, m._rows) # Decision tree using where2 tbl = table(t) headerLabel = '=klass' Rows = [] for k, _ in leaves(tree): # for k, _ in leaves(tree): for j in k.val: tmp = j.cells if isBin: tmp[-1] = 0 if tmp[-1] < bugThres else 1 tmp.append('_' + str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) return newTable(tbl, headerLabel, Rows)
def tdivPrec(where=None, dtree=None, train=None, test=None): rseed(1) makeaModel = makeAModel() # pdb.set_trace() """ Training """ _r = [] for t in train: m = makeaModel.csv2py(t) _r += m._rows m._rows = _r prepare(m, settings=where) # Initialize all parameters for where2 to run tree = where2(m, m._rows) # Decision tree using where2 tbl = table(t) headerLabel = '=klass' Rows = [] for k, _ in leaves(tree): # for k, _ in leaves(tree): for j in k.val: tmp = (j.cells) tmp.append('_' + str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) tbl2 = newTable(tbl, headerLabel, Rows) """ Testing """ _r = [] for tt in test: mTst = makeaModel.csv2py(tt) _r += mTst._rows mTst._rows = _r prepare(mTst, settings=where) # Initialize all parameters for where2 to run tree = where2(mTst, mTst._rows) # Decision tree using where2 tbl = table(tt) headerLabel = '=klass' Rows = [] for k, _ in leaves(tree): # for k, _ in leaves(tree): for j in k.val: tmp = (j.cells) tmp.append('_' + str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) tbl3 = newTable(tbl, headerLabel, Rows) temp = [] def sort(lst): return [i[0] for i in sorted(enumerate(lst), key = lambda x:x[1])], \ [i[1] for i in sorted(enumerate(lst), key = lambda x:x[1])] def thresh(val1, val2): indx, sorted = sort() def isdefective(case, test=False):
def createDF(data): makeaModel = makeAModel() _r = [] for t in data: m = makeaModel.csv2py(t) _r += m._rows m._rows = _r prepare(m) tree = where2(m, m._rows) Rows = [] for k, _ in leaves(tree): # for k, _ in leaves(tree): for j in k.val: tmp = (j.cells) tmp.append('Class_' + str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) return pd.DataFrame(Rows, columns=get_headers(data) + ['klass'])
def createDF(data): makeaModel = makeAModel() _r = [] for t in data: m = makeaModel.csv2py(t) _r += m._rows m._rows = _r prepare(m) tree = where2(m, m._rows) Rows = [] for k, _ in leaves(tree): # for k, _ in leaves(tree): for j in k.val: tmp = (j.cells) tmp.append('Class_' + str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) return pd.DataFrame(Rows, columns = get_headers(data) + ['klass'])
def createTbl(data): makeaModel = makeAModel() _r = [] for t in data: m = makeaModel.csv2py(t) _r += m._rows m._rows = _r prepare(m, settings=None) # Initialize all parameters for where2 to run tree = where2(m, m._rows) # Decision tree using where2 tbl = table(t) headerLabel = '=klass' Rows = [] for k, _ in leaves(tree): # for k, _ in leaves(tree): for j in k.val: tmp = (j.cells) tmp.append('_' + str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) return newTable(tbl, headerLabel, Rows)
def createTbl(data): makeaModel = makeAModel() _r = [] for t in data: m = makeaModel.csv2py(t) _r += m._rows m._rows = _r prepare(m, settings = None) # Initialize all parameters for where2 to run tree = where2(m, m._rows) # Decision tree using where2 tbl = table(t) headerLabel = '=klass' Rows = [] for k, _ in leaves(tree): # for k, _ in leaves(tree): for j in k.val: tmp = (j.cells) tmp.append('_' + str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) return newTable(tbl, headerLabel, Rows)
def tdivPrec(where = None , dtree = None, train = None, test = None): rseed(1) makeaModel = makeAModel() # pdb.set_trace() """ Training """ _r = [] for t in train: m = makeaModel.csv2py(t) _r += m._rows m._rows = _r prepare(m, settings = where) # Initialize all parameters for where2 to run tree = where2(m, m._rows) # Decision tree using where2 tbl = table(t) headerLabel = '=klass' Rows = [] for k, _ in leaves(tree): # for k, _ in leaves(tree): for j in k.val: tmp = (j.cells) tmp.append('_' + str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) tbl2 = newTable(tbl, headerLabel, Rows) """ Testing """ _r = [] for tt in test: mTst = makeaModel.csv2py(tt) _r += mTst._rows mTst._rows = _r prepare(mTst, settings = where) # Initialize all parameters for where2 to run tree = where2(mTst, mTst._rows) # Decision tree using where2 tbl = table(tt) headerLabel = '=klass' Rows = [] for k, _ in leaves(tree): # for k, _ in leaves(tree): for j in k.val: tmp = (j.cells) tmp.append('_' + str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) tbl3 = newTable(tbl, headerLabel, Rows) temp = [] def sort(lst): return [i[0] for i in sorted(enumerate(lst), key = lambda x:x[1])], \ [i[1] for i in sorted(enumerate(lst), key = lambda x:x[1])] def thresh(val1, val2): indx, sorted = sort() def isdefective(case, test = False): if not test: return 'Defect' if case.cells[-2] > 0 else 'No Defect' else: bugs = [r.cells[-2] for r in case.rows]; meanBugs = np.mean(bugs); medianBugs = np.median(bugs); rangeBugs = (sorted(bugs)[0] + sorted(bugs)[-1]) / 2; temp.append(meanBugs); return 'Defect' if meanBugs > 1.5 else 'No Defect' testCase = tbl3._rows # print testCase testDefective = [] defectivClust = [] t = discreteNums(tbl2, map(lambda x: x.cells, tbl2._rows)) myTree = tdiv(t, opt = dtree) # showTdiv(myTree) testCase = tbl3._rows # # print testCase for tC in testCase: loc = drop(tC, myTree) # if len(loc.kids)==0: testDefective.append(isdefective(tC)) defectivClust.append(isdefective(loc, test = True)) # saveImg(temp, 10) # contrastSet = getContrastSet(loc, myTree) # print 'Contrast Set:', contrastSet return [testDefective, defectivClust]