def tdivPrec(where=None, dtree=None, train=None, test=None): rseed(1) makeaModel = makeAModel() # pdb.set_trace() """ Training """ _r = [] for t in train: m = makeaModel.csv2py(t) _r += m._rows m._rows = _r prepare(m, settings=where) # Initialize all parameters for where2 to run tree = where2(m, m._rows) # Decision tree using where2 tbl = table(t) headerLabel = '=klass' Rows = [] for k, _ in leaves(tree): # for k, _ in leaves(tree): for j in k.val: tmp = (j.cells) tmp.append('_' + str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) tbl2 = newTable(tbl, headerLabel, Rows) """ Testing """ _r = [] for tt in test: mTst = makeaModel.csv2py(tt) _r += mTst._rows mTst._rows = _r prepare(mTst, settings=where) # Initialize all parameters for where2 to run tree = where2(mTst, mTst._rows) # Decision tree using where2 tbl = table(tt) headerLabel = '=klass' Rows = [] for k, _ in leaves(tree): # for k, _ in leaves(tree): for j in k.val: tmp = (j.cells) tmp.append('_' + str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) tbl3 = newTable(tbl, headerLabel, Rows) temp = [] def sort(lst): return [i[0] for i in sorted(enumerate(lst), key = lambda x:x[1])], \ [i[1] for i in sorted(enumerate(lst), key = lambda x:x[1])] def thresh(val1, val2): indx, sorted = sort() def isdefective(case, test=False):
def _tdivPrec(dir='camel/'): #============================================================================== # Recursively clustering the model. #============================================================================== train = ['camel-1.0.csv', 'camel-1.2.csv', 'camel-1.2.csv'] test = ['camel-1.6.csv'] rseed(1) makeaModel = makeAModel() _rows = [] # Concatenate training cases for t in train: file = dir + t m = makeaModel.csv2py(file) prepare(m) # Initialize all parameters for where2 to run tree = where2(m, m._rows) # Decision tree using where2 tbl = table(file) headerLabel = '=klass' Rows = [] for k, _ in leaves(tree): for j in k.val: tmp = (j.cells) tmp.append('_' + str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) _rows += Rows tbl2 = makeMeATable(tbl, headerLabel, _rows) # Test case! _rows = [] for tt in test: file = dir + tt m = makeaModel.csv2py(file) prepare(m) # Initialize all parameters for where2 to run tree = where2(m, m._rows) # Decision tree using where2 tbl = table(file) headerLabel = '=klass' Rows = [] for k, _ in leaves(tree): for j in k.val: tmp = (j.cells) tmp.append('_' + str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) _rows += Rows tbl3 = makeMeATable(tbl, headerLabel, _rows) testCase = tbl3._rows print testCase t = discreteNums(tbl2, map(lambda x: x.cells, tbl2._rows)) myTree = tdiv(t) showTdiv(myTree) loc = leaveOneOut(testCase[randi(0, len(testCase))], myTree) contrastSet = getContrastSet(loc, myTree) print 'Contrast Set:', contrastSet
def _tdivPrec(dir='camel/'): #============================================================================== # Recursively clustering the model. #============================================================================== train=['camel-1.0.csv', 'camel-1.2.csv', 'camel-1.2.csv'] test=['camel-1.6.csv'] rseed(1) makeaModel=makeAModel() _rows=[] # Concatenate training cases for t in train: file=dir+t m=makeaModel.csv2py(file) prepare(m) # Initialize all parameters for where2 to run tree=where2(m, m._rows) # Decision tree using where2 tbl = table(file) headerLabel='=klass' Rows=[] for k,_ in leaves(tree): for j in k.val: tmp=(j.cells) tmp.append('_'+str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) _rows+=Rows tbl2=makeMeATable(tbl, headerLabel, _rows) # Test case! _rows=[] for tt in test: file=dir+tt m=makeaModel.csv2py(file) prepare(m) # Initialize all parameters for where2 to run tree=where2(m, m._rows) # Decision tree using where2 tbl = table(file) headerLabel='=klass' Rows=[] for k,_ in leaves(tree): for j in k.val: tmp=(j.cells) tmp.append('_'+str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) _rows+=Rows tbl3=makeMeATable(tbl, headerLabel, _rows) testCase=tbl3._rows print testCase t=discreteNums(tbl2, map(lambda x: x.cells, tbl2._rows)) myTree=tdiv(t) showTdiv(myTree) loc = leaveOneOut(testCase[randi(0, len(testCase))], myTree) contrastSet = getContrastSet(loc, myTree) print 'Contrast Set:', contrastSet
def _tdivdemo(file='data/nasa93dem.csv'): #============================================================================== # We start by recursively clustering the model. #============================================================================== makeaModel = makeAModel() m = makeaModel.csv2py(file) rseed(1) #alias = dict (zip(makeaModel.translate.values(),makeaModel.translate.keys())) #print alias #def num2str(lst): # return [alias[z] for z in lst] prepare(m) # Initialize all parameters for where2 to run tree = where2(m, m._rows) # Decision tree using where2 tbl = table(file) headerLabel = '=klass' Rows = [] for k, _ in leaves(tree): for j in k.val: tmp = (j.cells) tmp.append('_' + str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) tbl2 = makeMeATable(tbl, headerLabel, Rows) #print testCase = [tbl2._rows.pop(randi(0, len(tbl2._rows))) for k in xrange(500)] t = discreteNums(tbl2, map(lambda x: x.cells, tbl2._rows)) myTree = tdiv(t) showTdiv(myTree) loc = leaveOneOut(testCase[randi(0, len(testCase))], myTree)
def _tdivdemo(file='data/nasa93dem.csv'): #============================================================================== # We start by recursively clustering the model. #============================================================================== makeaModel=makeAModel() m=makeaModel.csv2py(file) prepare(m) # Initialize all parameters for where2 to run tree=where2(m, m._rows) # Decision tree using where2 tbl = table(file) headerLabel='=klass' Rows=[] for k,_ in leaves(tree): for j in k.val: tmp=j.cells tmp.append(id(k) % 1000) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) tbl2=makeMeATable(tbl, headerLabel, Rows) testCase=tbl2._rows.pop(randi(0,len(Rows)-1)) t=discreteNums(tbl2, map(lambda x: x.cells, tbl2._rows)) myTree=tdiv(t) loc=apex(testCase, myTree) print loc.__dict__ print 'Id: ',loc.mode, ' Level: ', loc.lvl, ' Variable: ', loc.f.name showTdiv(myTree) #============================================================================== for node, lvl in dtnodes(myTree): rows=map(lambda x:x.cells,node.rows)
def _tdivdemo(file='data/nasa93dem.csv'): #============================================================================== # We start by recursively clustering the model. #============================================================================== makeaModel=makeAModel() m=makeaModel.csv2py(file) #alias = dict (zip(makeaModel.translate.values(),makeaModel.translate.keys())) #print alias #def num2str(lst): # return [alias[z] for z in lst] prepare(m) # Initialize all parameters for where2 to run tree=where2(m, m._rows) # Decision tree using where2 tbl = table(file) headerLabel='=klass' Rows=[] for k,_ in leaves(tree): for j in k.val: tmp=(j.cells) tmp.append('_'+str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) tbl2=makeMeATable(tbl, headerLabel, Rows) print testCase=[tbl2._rows.pop(randi(0, len(tbl2._rows))) for k in xrange(500)] t=discreteNums(tbl2, map(lambda x: x.cells, tbl2._rows)) myTree=tdiv(t) showTdiv(myTree) loc = leaveOneOut(testCase[randi(0, len(testCase))], myTree) contrastSet = getContrastSet(loc, myTree) print 'Contrast Set:', contrastSet
def main(): global The The.option.showWhere = False The.option.showDTree = False testdata, actual = buildtestdata1(The.data.predict) m = csv2py(The.data.train) Init(m) # init WHere!! tree = where2(m, m._rows) # tree generated by clustering tbl1, row = clustertbl(The.data.train, tree) # new table with cluster ID The.option.clustering = True Dtree = buildtdiv(tbl1) testleaf = gotoleaf(testdata, Dtree) # all the leaves the testdata should go score = _Abcd(testleaf, testdata, actual) return score
def createDF(data): makeaModel = makeAModel() _r = [] for t in data: m = makeaModel.csv2py(t) _r += m._rows m._rows = _r prepare(m) tree = where2(m, m._rows) Rows = [] for k, _ in leaves(tree): # for k, _ in leaves(tree): for j in k.val: tmp = (j.cells) tmp.append('Class_' + str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) return pd.DataFrame(Rows, columns = get_headers(data) + ['klass'])
def createDF(data): makeaModel = makeAModel() _r = [] for t in data: m = makeaModel.csv2py(t) _r += m._rows m._rows = _r prepare(m) tree = where2(m, m._rows) Rows = [] for k, _ in leaves(tree): # for k, _ in leaves(tree): for j in k.val: tmp = (j.cells) tmp.append('Class_' + str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) return pd.DataFrame(Rows, columns=get_headers(data) + ['klass'])
def run(train, test): testdata, actual = buildtestdata1(test) m, sym2num = csv2py(train) num2sym = dict(zip(sym2num.values(), sym2num.keys())) Init(m) # init The class tree = where2(m, m._rows) # tree generated by clustering tbl1, row = clustertbl(train, tree, num2sym) # new table with cluster ID # savetbl(tbl1, "data/trainingData") # write new table to a file The.option.clustering = True Dtree = buildtdiv(tbl1) leaves = findleaves(Dtree) buildcontrast1(Dtree, leaves) testleaf = gotoleaf(testdata, Dtree) # all the leaves the testdata should go printtogo(testleaf) summarize(leaves, Dtree) score = _Abcd(testleaf, testdata, actual) #print "Score: ", score return score
def run(train, test): # random.seed(1) # data = o(src = "data/nasa93train.csv") testdata, actual = buildtestdata1(test) m, sym2num = csv2py(train) num2sym = dict(zip(sym2num.values(), sym2num.keys())) Init(m) # init The class tree = where2(m, m._rows) # tree generated by clustering tbl1, row = clustertbl(train, tree, num2sym) # new table with cluster ID # savetbl(tbl1, "data/trainingData") # write new table to a file Dtree = buildtdiv(tbl1) leaves = findleaves(Dtree) buildcontrast1(Dtree, leaves) testleaf = gotoleaf(testdata, Dtree) # all the leaves the testdata should go printtogo(testleaf) summarize(leaves, Dtree) score = _Abcd(testleaf, testdata, actual) return score
def main(): random.seed(1) data = o(src="data/nasa93train.csv") # data = o(src = "data/ant-1.3.csv") m, sym2num = csv2py(data.src) num2sym = dict(zip(sym2num.values(), sym2num.keys())) Init(m) # init The class tree = where2(m, m._rows) # tree generated by clustering tbl1, row = clustertbl(data.src, tree, num2sym) # new table with cluster ID fname = data.src[:-4] + '_copy' + data.src[-4:] savetbl(tbl1, fname) # write new table to a file # clusterscore = calScore(tree) testdata = buildtestdata(tbl1, 30) # select the testdata Dtree = buildtdiv(tbl1) leaves = findleaves(Dtree) testleaf = gotoleaf(testdata, Dtree) # all the leaves the testdata should go buildcontrast1(Dtree, leaves) printtogo(testleaf) summerize(leaves, Dtree)
def _tdivdemo(file='data/nasa93dem.csv'): #============================================================================== # We start by recursively clustering the model. #============================================================================== makeaModel=makeAModel() m=makeaModel.csv2py(file) alias = dict (zip(makeaModel.translate.values(),makeaModel.translate.keys())) def num2str(lst): return [alias[z] for z in lst] prepare(m) # Initialize all parameters for where2 to run tree=where2(m, m._rows) # Decision tree using where2 tbl = table(file) headerLabel='=klass' Rows=[] for k,_ in leaves(tree): for j in k.val: tmp=num2str(j.cells) tmp.append('_'+str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) tbl2=makeMeATable(tbl, headerLabel, Rows) testCase=tbl2._rows.pop(1) t=discreteNums(tbl2, map(lambda x: x.cells, tbl2._rows)) myTree=tdiv(t) showTdiv(myTree) loc = leaveOneOut(testCase, myTree) print loc.__dict__ getContrastSet(loc, myTree) #============================================================================== #for node, lvl in dtnodes(myTree): #rows=map(lambda x:x.cells,node.rows) #pdb.set_trace() #print lvl, len(rows), [ k._id for k in node.rows] #============================================================================== headerLabels={} [headerLabels.update({k.name:indx}) for indx, k in enumerate(tbl2.headers)]
def main(): random.seed(1) # data = o(src = "data/nasa93train.csv") # data = o(src = ["data/ant-1.3.csv","data/ant-1.7.csv", "data/ant-1.5.csv","data/ant-1.6.csv" ]) data = o(src = "data/ant-1.4.csv") m, sym2num= csv2py(data.src) num2sym = dict(zip(sym2num.values(), sym2num.keys())) Init(m) # init The class tree= where2(m, m._rows) # tree generated by clustering tbl1, row = clustertbl(data.src, tree, num2sym) # new table with cluster ID fname = "data/traningDataSet.csv" savetbl(tbl1,fname) # write new table to a file # clusterscore = calScore(tree) # testdata = buildtestdata(tbl1, 10) # select the testdata randomly testdata, actual = buildtestdata1(f = "data/ant-1.4.csv") Dtree = buildtdiv(tbl1) leaves=findleaves(Dtree) buildcontrast1(Dtree, leaves) testleaf = gotoleaf(testdata, Dtree) # all the leaves the testdata should go printtogo(testleaf) summarize(leaves, Dtree) _Abcd(testleaf, actual)
def _tdivdemo(file='data/nasa93dem.csv'): #============================================================================== # We start by recursively clustering the model. #============================================================================== makeaModel=makeAModel() m=makeaModel.csv2py(file) prepare(m) # Initialize all parameters for where2 to run tree=where2(m, m._rows) # Decision tree using where2 tbl = table(file) headerLabel='=klass' Rows=[] for k,_ in leaves(tree): for j in k.val: tmp=j.cells tmp.append(id(k) % 1000) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) tbl2=makeMeATable(tbl, headerLabel, Rows) t=discreteNums(tbl2, Rows) myTree=tdiv(t) showTdiv(myTree)
def main(): random.seed(1) # data = o(src = "data/nasa93train.csv") # data = o(src = ["data/ant-1.3.csv","data/ant-1.7.csv", "data/ant-1.5.csv","data/ant-1.6.csv" ]) data = o(src="data/ant-1.4.csv") m, sym2num = csv2py(data.src) num2sym = dict(zip(sym2num.values(), sym2num.keys())) Init(m) # init The class tree = where2(m, m._rows) # tree generated by clustering tbl1, row = clustertbl(data.src, tree, num2sym) # new table with cluster ID fname = "data/traningDataSet.csv" savetbl(tbl1, fname) # write new table to a file # clusterscore = calScore(tree) # testdata = buildtestdata(tbl1, 10) # select the testdata randomly testdata, actual = buildtestdata1(f="data/ant-1.4.csv") Dtree = buildtdiv(tbl1) leaves = findleaves(Dtree) buildcontrast1(Dtree, leaves) testleaf = gotoleaf(testdata, Dtree) # all the leaves the testdata should go printtogo(testleaf) summarize(leaves, Dtree) _Abcd(testleaf, actual)
def tdivPrec(where = None , dtree = None, train = None, test = None): rseed(1) makeaModel = makeAModel() # pdb.set_trace() """ Training """ _r = [] for t in train: m = makeaModel.csv2py(t) _r += m._rows m._rows = _r prepare(m, settings = where) # Initialize all parameters for where2 to run tree = where2(m, m._rows) # Decision tree using where2 tbl = table(t) headerLabel = '=klass' Rows = [] for k, _ in leaves(tree): # for k, _ in leaves(tree): for j in k.val: tmp = (j.cells) tmp.append('_' + str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) tbl2 = newTable(tbl, headerLabel, Rows) """ Testing """ _r = [] for tt in test: mTst = makeaModel.csv2py(tt) _r += mTst._rows mTst._rows = _r prepare(mTst, settings = where) # Initialize all parameters for where2 to run tree = where2(mTst, mTst._rows) # Decision tree using where2 tbl = table(tt) headerLabel = '=klass' Rows = [] for k, _ in leaves(tree): # for k, _ in leaves(tree): for j in k.val: tmp = (j.cells) tmp.append('_' + str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) tbl3 = newTable(tbl, headerLabel, Rows) temp = [] def sort(lst): return [i[0] for i in sorted(enumerate(lst), key = lambda x:x[1])], \ [i[1] for i in sorted(enumerate(lst), key = lambda x:x[1])] def thresh(val1, val2): indx, sorted = sort() def isdefective(case, test = False): if not test: return 'Defect' if case.cells[-2] > 0 else 'No Defect' else: bugs = [r.cells[-2] for r in case.rows]; meanBugs = np.mean(bugs); medianBugs = np.median(bugs); rangeBugs = (sorted(bugs)[0] + sorted(bugs)[-1]) / 2; temp.append(meanBugs); return 'Defect' if meanBugs > 1.5 else 'No Defect' testCase = tbl3._rows # print testCase testDefective = [] defectivClust = [] t = discreteNums(tbl2, map(lambda x: x.cells, tbl2._rows)) myTree = tdiv(t, opt = dtree) # showTdiv(myTree) testCase = tbl3._rows # # print testCase for tC in testCase: loc = drop(tC, myTree) # if len(loc.kids)==0: testDefective.append(isdefective(tC)) defectivClust.append(isdefective(loc, test = True)) # saveImg(temp, 10) # contrastSet = getContrastSet(loc, myTree) # print 'Contrast Set:', contrastSet return [testDefective, defectivClust]