def createTbl( data, settings=None, _smote=False, isBin=False, bugThres=1, duplicate=False): """ kwargs: _smote = True/False : SMOTE input data (or not) _isBin = True/False : Reduce bugs to defects/no defects _bugThres = int : Threshold for marking stuff as defective, default = 1. Not defective => Bugs < 1 """ makeaModel = makeAmodel.makeAModel() _r = [] for t in data: m = makeaModel.csv2py(t, _smote=_smote, duplicate=duplicate) _r += m._rows m._rows = _r prepare(m, settings=None) # Initialize all parameters for where2 to run # print("WHERE start") tree = where2(m, m._rows) # Decision tree using where2 # print tree # import pdb # pdb.set_trace() # print("WHERE end") tbl = table(t) headerLabel = '=klass' Rows = [] for k, _ in leaves(tree): # for k, _ in leaves(tree): for j in k.val: tmp = j.cells if isBin: tmp[-1] = 0 if tmp[-1] < bugThres else 1 tmp.append('_' + str(id(k))) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) return newTable(tbl, headerLabel, Rows)
def createTbl(data, settings=None, _smote=False, isBin=False, bugThres=1, duplicate=False): """ kwargs: _smote = True/False : SMOTE input Data (or not) _isBin = True/False : Reduce bugs to defects/no defects _bugThres = int : Threshold for marking stuff as defective, default = 1. Not defective => Bugs < 1 """ makeaModel = makeAmodel.makeAModel() _r = [] for t in data: m = makeaModel.csv2py(t, _smote=_smote, duplicate=duplicate) _r += m._rows m._rows = _r prepare(m, settings=None) # Initialize all parameters for where2 to run # print("WHERE start") tree = where2(m, m._rows) # Decision tree using where2 # print tree # import pdb # pdb.set_trace() # print("WHERE end") tbl = table(t) headerLabel = '=klass' Rows = [] for k, _ in leaves(tree): # for k, _ in leaves(tree): for j in k.val: tmp = j.cells if isBin: tmp[-1] = 0 if tmp[-1] < bugThres else 1 tmp.append('_' + str(id(k))) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) return newTable(tbl, headerLabel, Rows)
def csv2py(self, filename, _smote=False, duplicate=False): "Convert a csv file to a model file" tbl = table(filename) # if _smote: # tbl = smote.SMOTE( # tbl, # atleast=50, # atmost=101, # bugIndx=1, # resample=duplicate) self.str2num(tbl) tonum = lambda x: self.translate[x] if isinstance(x, str) else x """ There's a bug in table.py that doesn't separate dependent and independent Variable. The following, badly written, piece of code corrects for it... """ for indx, k in enumerate(tbl.indep): for l in tbl.depen: if k.name == l.name: tbl.indep.pop(indx) return self.data(indep=[i.name for i in tbl.indep], less=[i.name for i in tbl.depen], _rows=map(lambda x: [tonum(xx) for xx in x.cells], tbl._rows))