def clustertbl(f, tree, num2sym): row = [] if isinstance(f, list): tbl1 = tbl = table(f[0]) else: tbl1 = tbl = table(f) newheader = Num() newheader.col = len(tbl.headers) newheader.name = "=klass" tbl1.headers += [newheader] # tbl1 : the new table with cluster ID count = 0 for k, _ in leaves(tree): for j in k.val: for i, cell in enumerate(j.cells): if isinstance(tbl.headers[i], Sym): j.cells[i] = num2sym.get(cell, cell) tmp = j.cells tmp.append(str(count)) # tmp.append(j.cells[tbl1.depen[0].col]) # add the FIRST objective into the last cell of the row tmp.append(j.cells[tbl1.depen[0].col]) # j.__dict__.update({'cells': tmp}) j.update(cells=tmp) row.append(j.cells) count += 1 tbl1 = clone(tbl1, row) return tbl1, row
def csv2py(f): sym2num = {} # sym2num hold all the characters with assinged numbers that never seen def str2num(t, p=0): def bigt(): if isinstance(tbl, list): t = tbl[0] for i in range(1, len(tbl)): t._rows += tbl[i]._rows else: t = tbl return t t = bigt() for r, row in enumerate(t._rows): for c, cell in enumerate(row.cells): if isinstance(cell, str) and c < t.depen[0].col and isinstance(t.headers[c], Sym): if sym2num.get(cell, 0) == 0: sym2num[cell] = p p += 1 t._rows[r].cells[c] = sym2num[cell] # update cell with num return t if isinstance(f, list): tbl = [table(src) for src in f] # tbl is a list of tables else: tbl = table(f) tbl_num = str2num(tbl) x = data( indep=[x.name for x in tbl_num.indep], less=[x.name for x in tbl_num.depen], _rows=[row.cells for row in tbl_num._rows], ) return x, sym2num
def _tdivPrec(dir='camel/'): #============================================================================== # Recursively clustering the model. #============================================================================== train=['camel-1.0.csv', 'camel-1.2.csv', 'camel-1.2.csv'] test=['camel-1.6.csv'] rseed(1) makeaModel=makeAModel() _rows=[] # Concatenate training cases for t in train: file=dir+t m=makeaModel.csv2py(file) prepare(m) # Initialize all parameters for where2 to run tree=where2(m, m._rows) # Decision tree using where2 tbl = table(file) headerLabel='=klass' Rows=[] for k,_ in leaves(tree): for j in k.val: tmp=(j.cells) tmp.append('_'+str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) _rows+=Rows tbl2=makeMeATable(tbl, headerLabel, _rows) # Test case! _rows=[] for tt in test: file=dir+tt m=makeaModel.csv2py(file) prepare(m) # Initialize all parameters for where2 to run tree=where2(m, m._rows) # Decision tree using where2 tbl = table(file) headerLabel='=klass' Rows=[] for k,_ in leaves(tree): for j in k.val: tmp=(j.cells) tmp.append('_'+str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) _rows+=Rows tbl3=makeMeATable(tbl, headerLabel, _rows) testCase=tbl3._rows print testCase t=discreteNums(tbl2, map(lambda x: x.cells, tbl2._rows)) myTree=tdiv(t) showTdiv(myTree) loc = leaveOneOut(testCase[randi(0, len(testCase))], myTree) contrastSet = getContrastSet(loc, myTree) print 'Contrast Set:', contrastSet
def csv2py(f): if isinstance(f, list): tbl = [table(src) for src in f] # tbl is a list of tables t = tbl[0] for i in range(1, len(tbl)): t._rows += tbl[i]._rows tbl = t else: tbl = table(f) tbl_num = tbl # no symbol data col in defect data sets. x = data(indep=[x.name for x in tbl_num.indep[:-1]], less=[x.name for x in tbl_num.depen], _rows=[row.cells for row in tbl_num._rows]) return x
def _tdivdemo(file='data/nasa93dem.csv'): #============================================================================== # We start by recursively clustering the model. #============================================================================== makeaModel=makeAModel() m=makeaModel.csv2py(file) prepare(m) # Initialize all parameters for where2 to run tree=where2(m, m._rows) # Decision tree using where2 tbl = table(file) headerLabel='=klass' Rows=[] for k,_ in leaves(tree): for j in k.val: tmp=j.cells tmp.append(id(k) % 1000) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) tbl2=makeMeATable(tbl, headerLabel, Rows) testCase=tbl2._rows.pop(randi(0,len(Rows)-1)) t=discreteNums(tbl2, map(lambda x: x.cells, tbl2._rows)) myTree=tdiv(t) loc=apex(testCase, myTree) print loc.__dict__ print 'Id: ',loc.mode, ' Level: ', loc.lvl, ' Variable: ', loc.f.name showTdiv(myTree) #============================================================================== for node, lvl in dtnodes(myTree): rows=map(lambda x:x.cells,node.rows)
def createTbl( data, settings=None, _smote=False, isBin=False, bugThres=2, duplicate=False): """ kwargs: _smote = True/False : SMOTE input data (or not) _isBin = True/False : Reduce bugs to defects/no defects _bugThres = int : Threshold for marking stuff as defective, default = 1. Not defective => Bugs < 1 """ makeaModel = makeAmodel.makeAModel() _r = [] for t in data: m = makeaModel.csv2py(t, _smote=_smote, duplicate=duplicate) _r += m._rows m._rows = _r prepare(m, settings=None) # Initialize all parameters for where2 to run tree = where2(m, m._rows) # Decision tree using where2 tbl = table(t) headerLabel = '=klass' Rows = [] for k, _ in leaves(tree): # for k, _ in leaves(tree): for j in k.val: tmp = j.cells if isBin: tmp[-1] = 0 if tmp[-1] < bugThres else 1 tmp.append('_' + str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) return newTable(tbl, headerLabel, Rows)
def _tdivdemo(file='data/nasa93dem.csv'): #============================================================================== # We start by recursively clustering the model. #============================================================================== makeaModel=makeAModel() m=makeaModel.csv2py(file) #alias = dict (zip(makeaModel.translate.values(),makeaModel.translate.keys())) #print alias #def num2str(lst): # return [alias[z] for z in lst] prepare(m) # Initialize all parameters for where2 to run tree=where2(m, m._rows) # Decision tree using where2 tbl = table(file) headerLabel='=klass' Rows=[] for k,_ in leaves(tree): for j in k.val: tmp=(j.cells) tmp.append('_'+str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) tbl2=makeMeATable(tbl, headerLabel, Rows) print testCase=[tbl2._rows.pop(randi(0, len(tbl2._rows))) for k in xrange(500)] t=discreteNums(tbl2, map(lambda x: x.cells, tbl2._rows)) myTree=tdiv(t) showTdiv(myTree) loc = leaveOneOut(testCase[randi(0, len(testCase))], myTree) contrastSet = getContrastSet(loc, myTree) print 'Contrast Set:', contrastSet
def csv2py(self, filename, _smote=False, duplicate=False): "Convert a csv file to a model file" tbl = table(filename) if _smote: tbl = smote.SMOTE( tbl, atleast=50, atmost=101, bugIndx=1, resample=duplicate) self.str2num(tbl) tonum = lambda x: self.translate[x] if isinstance(x, str) else x """ There's a bug in table.py that doesn't separate dependent and independent Variable. The following, badly written, piece of code corrects for it... """ for indx, k in enumerate(tbl.indep): for l in tbl.depen: if k.name == l.name: tbl.indep.pop(indx) return self.data(indep=[i.name for i in tbl.indep], less=[i.name for i in tbl.depen], _rows=map(lambda x: [tonum(xx) for xx in x.cells], tbl._rows))
def buildtestdata1(f): # build testdata from table actual = [] testdata = [] tbl = table(f) for row in tbl._rows: actual += ["Defective" if row.cells[tbl.depen[0].col] > 0 else "Non-Defective"] testdata += [row] return testdata, actual
def buildtestdata1(f, isdefect = False): # build testdata from table actual = [] testdata = [] if isinstance(f, list): tbl = [table(src) for src in f] # tbl is a list of tables t = tbl[0] for i in range(1, len(tbl)): t._rows += tbl[i]._rows tbl = t else: tbl = table(f) for row in tbl._rows: if isdefect: actual += ["Defective" if row.cells[tbl.depen[0].col] > 0 else "Non-Defective"] else: actual +=[str(row.cells[tbl.depen[0].col])] testdata += [row] return testdata, actual
def buildtestdata1(f, actual=[], testdata=[]):# build testdata from table tbl = table(f) for row in tbl._rows: temp=row.cells[tbl.depen[0].col] if temp >0: actual+=["Defective"] else: actual+=["Non-Defective"] testdata+=[row] return testdata, actual
def csv2py(self, filename): "Convert a csv file to a model file" tbl=table(filename) self.str2num(tbl) tonum= lambda x: self.translate[x] if isinstance(x, str) else x _rows=map(lambda x: [tonum(xx) for xx in x.cells], tbl._rows) return self.data(indep=[i.__dict__['name'] for i in tbl.indep], less=[i.__dict__['name'] for i in tbl.depen], _rows=map(lambda x: [tonum(xx) for xx in x.cells], tbl._rows))
def SMOTE(path="./data", k=5, ): folders = [f for f in listdir(path) if not isfile(join(path, f))] for folder in folders[:]: nextpath = join(path, folder) data = [join(nextpath, f) for f in listdir(nextpath) if isfile(join(nextpath, f))] for i in range(len(data)): tbl = table(data[i]) newfilename ="./Smote"+ data[i][1:] out = _SMOTE1(tbl, k) savetbl(tbl,out,newfilename)
def __init__(self, line): if line[:len(self.statement_title)] != self.statement_title: raise Exception('not createTable command: '+line) lineSplit = line[len(self.statement_title):].split() table_name = lineSplit[0] cvs = line[line.index('(')+1:line.index(')')].split(',') column_value = map(lambda kv: kv.split(), cvs) cv2 = [] for c,v in column_value: cv2.append(column(c,datatype.build(v))) self.table = table(table_name, cv2)
def clustertbl(f, tree, num2sym={}): row = [] if isinstance(f, list): tbl1 = tbl = table(f[0]) else: tbl1 = tbl = table(f) newheader = Num() newheader.col = len(tbl.headers) newheader.name = "=klass" tbl1.headers += [newheader] # tbl1 : the new table with cluster ID count = 0 for k, _ in leaves(tree): for j in k.val: tmp = j.cells tmp.append(str(count)) tmp.append(j.cells[tbl1.depen[0].col]) j.update(cells=tmp) row.append(j.cells) count += 1 tbl1 = clone(tbl1, row) return tbl1, row
def ideaed(f='data/nasa93.csv'): def change(x): prefix=suffix="" for ch in x: if ch == ">": prefix="?"; suffix="/" if ch == "<": prefix="?"; suffix="/" return prefix + x + suffix dists={} tbl=table(f) opt= distings( klass = lambda x,tbl,o: fromHell(tbl,x,o), how = nearest1, two = mostDistant ) tree1 = idea(tbl,opt=opt) klass = Sym("=klass") names = [change(h.name) for h in tbl.headers] + ["=KLASS"] tbl2=head(names,table0("clusters of "+ f)) for x in leaves(tree1): it = "_" + str(x._id) for row in x.rows: body(row.cells + [it],tbl2,True) tbl3= discreteNums(tbl2,[row.cells for row in tbl2._rows]) tree2 = tdiv(tbl3) showTdiv(tree2) snakesAndLadders(tree2,tbl3, lambda node: fromHells(tbl3, node.rows, opt)) ss0,ss1={},{} for node in dtleaves(tree2): says(node._id,':n',len(node.rows),' :score',g3(fromHells(tbl3,node.rows,opt))) if node.ladder: says(" :want",node.ladder._id," :plan",node.better) if node.snake: says(" :hate",node.snake._id," :watch",node.worse) nl() for row in node.rows: asIs,toBe= jumpUp(row,tree2) for h0,h1 in zip(asIs.tbl.less + asIs.tbl.more, toBe.tbl.less + toBe.tbl.more): s0 = ss0.get(h0.name,Num()) s1 = ss1.get(h1.name,Num()) s0 + h0.median() s1 + h1.median() ss0[h0.name] = s0 ss1[h0.name] = s1 print "" for key in ss0: saysln(key, ss0[key].median(), ss1[key].median(), \ ss0[key].iqr(),ss1[key].iqr())
def loosed(f='data/nasa93.csv'): dists={} t=table(f) opt= distings( klass = lambda x,t,o: x.cells[t.less[0].col], how = nearest1, tests = 5, #tiny = lambda x: 8, two = twoDistant #rprint(t.klass[0]); exit() ) nums = loos(t,opt) print "", int(100*nums.median()), int(100*nums.iqr())#sorted(nums.all())
def loosed(f='Data/nasa93.csv'): dists={} t=table(f) opt= distings( klass = lambda x,t,o: x.cells[t.less[0].col], how = nearest1, tests = 5, #tiny = lambda x: 8, two = twoDistant #rprint(t.klass[0]); exit() ) nums = loos(t,opt) print "", int(100*nums.median()), int(100*nums.iqr())#sorted(nums.all())
def csv2py(self, filename): "Convert a csv file to a model file" tbl=table(filename) self.str2num(tbl) tonum= lambda x: self.translate[x] # if isinstance(x, str) else x for indx, k in enumerate(tbl.indep): for l in tbl.depen: if k.name==l.name: tbl.indep.pop(indx) return self.data(indep=[i.name for i in tbl.indep], less=[i.name for i in tbl.depen], _rows=map(lambda x: [tonum(xx) for xx in x.cells], tbl._rows))
def genic(src='data/diabetes.csv',opt=None): w = o(num=[], sym=[], dep=[], indep=[], centroids=[], min={}, max={}, name={},index={}, opt=opt or genic0()) for n, row in table(src,w): data(w,row) if len(w.centroids) < w.opt.k: more(w,n,row) else: fuse(w,row,nearest(w,row)) if not (n % w.opt.era): less(w,n) return w,sorted(w.centroids,reverse=True)
def moea(f='data.dat/coc81dem.csv'): seed(1) dists = {} t = table(f) opt = distings( klass=lambda x, t, o: fromHell(t, x, o), how=nearest1, tiny=lambda x: 4, some=10000, retry=1, repeats=1, two=mostDistant, #err = lambda p,a: abs(p-a)/(a + 0.001) #rprint(t.klass[0]); exit() ) loosMoea(t, opt)
def moea(f='data/coc81dem.csv'): seed(1) dists={} t=table(f) opt= distings( klass = lambda x,t,o: fromHell(t,x,o), how = nearest1, tiny = lambda x: 4, some = 10000, retry = 1, repeats=1, two = mostDistant, #err = lambda p,a: abs(p-a)/(a + 0.001) #rprint(t.klass[0]); exit() ) loosMoea(t,opt)
def csv2py(self, filename): "Convert a csv file to a model file" tbl = table(filename) self.str2num(tbl) tonum = lambda x: self.translate[x] if isinstance(x, str) else x for indx, k in enumerate(tbl.indep): for l in tbl.depen: if k.name == l.name: tbl.indep.pop(indx) #[(sys.stdout.write(tI.name), sys.stdout.write(' ')) for tI in tbl.depen] return self.data(indep=[i.name for i in tbl.indep], less=[i.name for i in tbl.depen], _rows=map(lambda x: [tonum(xx) for xx in x.cells], tbl._rows))
def csv2py(self, filename): "Convert a csv file to a model file" tbl = table(filename) self.str2num(tbl) tonum = lambda x: self.translate[x] if isinstance(x, str) else x """ There's a bug in table.py that doesn't separate dependent and independent Variable. The following, badly written, piece of code corrects for it... """ for indx, k in enumerate(tbl.indep): for l in tbl.depen: if k.name == l.name: tbl.indep.pop(indx) return self.data(indep=[i.name for i in tbl.indep], less=[i.name for i in tbl.depen], _rows=map(lambda x: [tonum(xx) for xx in x.cells], tbl._rows))
def clustertbl(f,tree, num2sym, row=[]): tbl1 = tbl = table(f)# open the first table newheader = Num() newheader.col = len(tbl.headers) newheader.name = "=klass" tbl1.headers +=[newheader] # tbl1 : the new table with cluster ID for k,_ in leaves(tree): for j in k.val: for i, cell in enumerate(j.cells): if isinstance(tbl.headers[i], Sym): j.cells[i] = num2sym.get(cell, cell) tmp=j.cells tmp.append(id(k) % 1000) tmp.append(j.cells[tbl1.depen[0].col]) # add the FIRST objective into the last cell of the row # j.__dict__.update({'cells': tmp}) j.update(cells=tmp) row.append(j.cells) tbl1 = clone(tbl1, row) return tbl1, row
def createTbl(data): makeaModel = makeAModel() _r = [] for t in data: m = makeaModel.csv2py(t) _r += m._rows m._rows = _r prepare(m, settings=None) # Initialize all parameters for where2 to run tree = where2(m, m._rows) # Decision tree using where2 tbl = table(t) headerLabel = '=klass' Rows = [] for k, _ in leaves(tree): # for k, _ in leaves(tree): for j in k.val: tmp = (j.cells) tmp.append('_' + str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) return newTable(tbl, headerLabel, Rows)
def createTbl(data): makeaModel = makeAModel() _r = [] for t in data: m = makeaModel.csv2py(t) _r += m._rows m._rows = _r prepare(m, settings = None) # Initialize all parameters for where2 to run tree = where2(m, m._rows) # Decision tree using where2 tbl = table(t) headerLabel = '=klass' Rows = [] for k, _ in leaves(tree): # for k, _ in leaves(tree): for j in k.val: tmp = (j.cells) tmp.append('_' + str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) return newTable(tbl, headerLabel, Rows)
def add_widgets(self): self.table = table(self, data=pd.DataFrame({'W': [""]})) self.menubar = Menu(self.master) self.filemenu = Menu(self.menubar, tearoff=0) self.filemenu.add_command(label="Generate wave spectrum", command=self.Generate_wavespectrum) self.filemenu.add_command(label="Generate Encounter spectrum", command=self.Generate_encounterspectrum) self.plot = Menu(self.menubar, tearoff=0) self.plot.add_command(label="plot Wave spectrum", command=self.plot_wavespectrum) self.plot.add_command(label="plot Encounter spectrum", command=self.plot_Encounterspectrum) self.help = Menu(self.menubar, tearoff=0) self.help.add_command(label="about", command=self.about) self.filemenu.add_separator() self.menubar.add_cascade(label="Spectrum", menu=self.filemenu) self.menubar.add_cascade(label="Plot", menu=self.plot) self.menubar.add_cascade(label="Help", menu=self.help) self.master.config(menu=self.menubar)
def clustertbl(f, tree, num2sym, row=[]): tbl1 = tbl = table(f) newheader = Num() newheader.col = len(tbl.headers) newheader.name = "=klass" tbl1.headers += [newheader] # tbl1 : the new table with cluster ID for k, _ in leaves(tree): for j in k.val: for i, cell in enumerate(j.cells): if isinstance(tbl.headers[i], Sym): j.cells[i] = num2sym.get(cell, cell) tmp = j.cells tmp.append(id(k) % 1000) tmp.append( j.cells[tbl1.depen[0].col] ) # add the FIRST objective into the last cell of the row # j.__dict__.update({'cells': tmp}) j.update(cells=tmp) row.append(j.cells) tbl1 = clone(tbl1, row) return tbl1, row
def csv2py(f): sym2num = { } # hold all the characters with assinged numbers that never seen def str2num(t, p=0): for r, row in enumerate(t._rows): for c, cell in enumerate(row.cells): if isinstance(cell, str) and c < t.depen[0].col and isinstance( t.headers[c], Sym): if sym2num.get(cell, 0) == 0: sym2num[cell] = p p += 1 t._rows[r].cells[c] = sym2num[cell] # update cell with num return t tbl = table(f) tbl_num = str2num(tbl) x = data(indep=[x.name for x in tbl_num.indep], less=[x.name for x in tbl_num.depen], _rows=[row.cells for row in tbl_num._rows]) return x, sym2num
def _sdiv(): t = table(cols(FILE('data/nasa93.csv'))) # cook the klasses w1,klasses = sdiv1(t.rows, x= lambda z:z.raw[-1]) for klass in sorted(klasses,key=lambda x:x.n): for row in klass.has: row.cooked[-1] = klass.n sayl([":klass",klass.n,":lo",klass.x.lo,":hi",klass.x.hi]) todos = {} bestRange( [ereport(t.rows,id=t.header[n], sym1 =lambda z:z.raw[n], sym2 =lambda z:z.cooked[-1]) for n in t.inSyms], todos) bestRange( [ediv(t.rows, id = t.header[n], num =lambda z:z.raw[n], sym =lambda z:z.cooked[-1]) for n in t.inNums ], todos) for k in todos: todos[k] = sorted(todos[k],key=lambda z:(z.w,-1*z.y.n)) print("\n------") for one in todos[k]: sayl(showOne(one))
def _tdivdemo(file='data/nasa93dem.csv'): #============================================================================== # We start by recursively clustering the model. #============================================================================== makeaModel=makeAModel() m=makeaModel.csv2py(file) prepare(m) # Initialize all parameters for where2 to run tree=where2(m, m._rows) # Decision tree using where2 tbl = table(file) headerLabel='=klass' Rows=[] for k,_ in leaves(tree): for j in k.val: tmp=j.cells tmp.append(id(k) % 1000) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) tbl2=makeMeATable(tbl, headerLabel, Rows) t=discreteNums(tbl2, Rows) myTree=tdiv(t) showTdiv(myTree)
def _tdivdemo(file='data/nasa93dem.csv'): #============================================================================== # We start by recursively clustering the model. #============================================================================== makeaModel=makeAModel() m=makeaModel.csv2py(file) alias = dict (zip(makeaModel.translate.values(),makeaModel.translate.keys())) def num2str(lst): return [alias[z] for z in lst] prepare(m) # Initialize all parameters for where2 to run tree=where2(m, m._rows) # Decision tree using where2 tbl = table(file) headerLabel='=klass' Rows=[] for k,_ in leaves(tree): for j in k.val: tmp=num2str(j.cells) tmp.append('_'+str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) tbl2=makeMeATable(tbl, headerLabel, Rows) testCase=tbl2._rows.pop(1) t=discreteNums(tbl2, map(lambda x: x.cells, tbl2._rows)) myTree=tdiv(t) showTdiv(myTree) loc = leaveOneOut(testCase, myTree) print loc.__dict__ getContrastSet(loc, myTree) #============================================================================== #for node, lvl in dtnodes(myTree): #rows=map(lambda x:x.cells,node.rows) #pdb.set_trace() #print lvl, len(rows), [ k._id for k in node.rows] #============================================================================== headerLabels={} [headerLabels.update({k.name:indx}) for indx, k in enumerate(tbl2.headers)]
def createTbl(data, settings=None, _smote=False, isBin=False, bugThres=1, duplicate=False): """ kwargs: _smote = True/False : SMOTE input data (or not) _isBin = True/False : Reduce bugs to defects/no defects _bugThres = int : Threshold for marking stuff as defective, default = 1. Not defective => Bugs < 1 """ makeaModel = makeAModel() _r = [] for t in data: m = makeaModel.csv2py(t, _smote=_smote, duplicate=duplicate) _r += m._rows m._rows = _r # Initialize all parameters for where2 to run prepare(m, settings=None) tree = where2(m, m._rows) # Decision tree using where2 tbl = table(t) headerLabel = '=klass' Rows = [] for k, _ in leaves(tree): # for k, _ in leaves(tree): for j in k.val: tmp = j.cells if isBin: tmp[-1] = 0 if tmp[-1] < bugThres else 1 tmp.append('_' + str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) return newTable(tbl, headerLabel, Rows)
lo[indx]=i; if(hi[indx]==None or hi[indx]<i): hi[indx]=i; indx+=1 return hi, lo def normalizit(val,high,low): indx=0; for i in xrange(len(val)): if(not isa(val[i],str)): val[i]=(val[i]-low[indx])/(high[indx]-low[indx]); indx+=1 return val """ t0 = table(source) rows = map(lambda x: x.cells, t0._rows) #______________________________________________________________________________ for x in t0._rows: # Obtain the dictionary values of each row y = x.__dict__ # Obtain the keys and values of a dictionary values = y['cells'] #Max, Min= minmax(values) #print values #print Max, Min #______________________________________________________________________________ t1 = clone(t0)
file.close() #close the file after reading lines #first line read is table_name table_name = input_string[0] #second line and third read is list of attributes names and type attr_names = input_string[1] attr_types = input_string[2] #strip and split attribute names and types attr_names = attr_names.strip().split(',') attr_types = attr_types.strip().split(',') #create student table student_tbl = table('Student') student_tbl.create(attr_names, attr_types) #get attribute names from table attributes = student_tbl.getAttribNames() #insert values into database for obj in input_string[3:len(input_string)]: student_tbl.addRow(obj) #perform age group by print('Average GPA grouped by Age:') curr_avg = student_tbl.performAgeGroupBY('GPA', 'Age') for obj in curr_avg: print(obj)
#Kavilan Naidoo #06-01-2015 #2D lists from table import * player = [ ["Name","Kills","Death"], ["K1llmAchine",51,49], ["bob2247",5,99], ["hAxOr12",70,30] ] table(player) print(player[0][0],player[0][1],player[0][2]) print(player[1][0],player[1][1],player[1][2]) print(player[2][0],player[2][1],player[2][2]) print(player[3][0],player[3][1],player[3][2])
def sidesed(f='Data/diabetes.csv'): t=table(f)
5: '#EA528E', # pink 6: '#009DDF', # blue 7: '#76B82A', # vert 8: '#EF7D00', # orange foncé 9: '#5488C7', # violet 10: '#E7344C', #rouge 0: 'white', # valeur par defaut 'spacer': "grey", 'premier': "#C0C0C0" # pour nombre premier : gris clair } ma_table = table( \ couleurs = couleurs, longueur_tasseau = 400, # en mm largeur_tasseau = 22, # en mm largueur_espace = 8.5, # Correspond à l'interval en mm entre deux tasseaux dans le gabarit longueur_unit = 4, # Correspond à la hauteur en mm de 1 longueur_spacer = 3, #Correspond à l'épaisseur de la lame de scie outfolder = "SVG" ) ma_table.add_tasseau(1, tasseau(blocs=[bloc(100, [10, 10, 2, 50])])) ma_table.add_tasseau( 2, tasseau(blocs=[bloc(90, [10, 9, 2, 45]), bloc(9, [3, 3, 1, 9])])) ma_table.add_tasseau( 3, tasseau(blocs=[ bloc(80, [10, 8, 2, 40]), bloc(10, [10, 1, 2, 5], color_face4='white'), # ilfaut que la face 4 soit blanche!!!
# import math from math import sqrt from table import * print(2 + 3) 5 * 87 print('Marie Bouczo mange des fruits') marie = 'Marie Bouczo mange des fruits' print(marie) print(marie * 2) bouczo = [marie] print(len(bouczo)) antoine = 'Antoine Bouczo est sur son tracteur' bouczo.append(antoine) print(len(bouczo)) for person in bouczo: print(person) table(4, 100) print(sqrt(16))
def sidesed(f='data/diabetes.csv'): t=table(f)
def sidesed(f='data.dat/diabetes.csv'): t = table(f)
def add_table(self, table_name, colomns): if self.tables.get(table_name) is None: self.tables[table_name] = table(name, bd, colomns)
def convert(md_text): """ Convert markdown string to html format :param md_text: str, the markdown file :return: str, the html content """ # separate by line md_text = md_text.split('\n') # save the html content for return html_text = '' # begin looping from the first line index = -1 last_line_unordered = False while index < len(md_text) - 1: index += 1 line = md_text[index] # code segment if len(line) >= 3 and line[:3] == '```': html_line = "" language = line[3:].replace(' ', '') if len(language) == 0: language = False order_index = index + 1 find_end = False while order_index < len(md_text): if md_text[order_index][:3] == '```': find_end = True break else: temp_line = md_text[order_index] temp_line = code_replace(temp_line) html_line += temp_line + '<br />' order_index += 1 if find_end: if language is False: html_text += ('<pre><code>' + html_line + '</code></pre>') else: html_text += ('<pre><code class="' + str(language) + '">' + html_line + '</code></pre>') # print(language) index = order_index continue pre_text = md_text[:index] md_text = pre_text + table(md_text[index:]) line = md_text[index] # header is_header, html_line = check_header(line) if is_header: html_text = html_text + html_line continue # horizontal rule is_horizontal_rule, html_line = check_horizontal_rule(line) if is_horizontal_rule: html_text = html_text + html_line continue # block quote line = check_blockquote(line) # checkbox line = line.replace( "- [ ] ", "<input type=\"checkbox\" onclick=\"return false;\"/> ") line = line.replace( "- [x] ", "<input type=\"checkbox\" checked onclick=\"return false;\"/> ") # deal with ordered list if len(line.split('.')) != 0 and '1.' == line[:2]: html_line = '<ol>' order_index = index while order_index < len(md_text)\ and len(md_text[order_index].split('.')) != 0\ and (str(order_index - index + 1) == md_text[order_index].split('.')[0] or '1' == md_text[order_index].split('.')[0]): to_replace = [str(order_index - index + 1) + '.', '1.'] for replace_content in to_replace: md_text[order_index] = md_text[order_index].replace( replace_content, '') html_line = html_line + '<li>' + md_text[order_index] + '</li>' order_index += 1 index = order_index - 1 html_line = html_line + '</ol>' line = html_line # deal with unordered list is_unordered_list, html_line = check_unordered_list(line) if is_unordered_list and (not last_line_unordered): line = '<ul>' + html_line last_line_unordered = True elif is_unordered_list and last_line_unordered: line = html_line last_line_unordered = True elif (not is_unordered_list) and last_line_unordered: line = '</ul>' + line # inline code rest = line line = '' while rest.count('`') > 1: first_sign = rest.index('`') line = line + convert_not_inline(rest[:first_sign]) rest = rest[first_sign + 1:] second_sign = rest.index('`') line = line + '<pre><code>' + code_replace( rest[:second_sign]) + '</code></pre>' rest = rest[second_sign + 1:] line = line + convert_not_inline(rest) html_text = html_text + line if not is_unordered_list: html_text = html_text + '<br>' last_line_unordered = False html_text = table(html_text) return html_text
def tdivPrec(where = None , dtree = None, train = None, test = None): rseed(1) makeaModel = makeAModel() # pdb.set_trace() """ Training """ _r = [] for t in train: m = makeaModel.csv2py(t) _r += m._rows m._rows = _r prepare(m, settings = where) # Initialize all parameters for where2 to run tree = where2(m, m._rows) # Decision tree using where2 tbl = table(t) headerLabel = '=klass' Rows = [] for k, _ in leaves(tree): # for k, _ in leaves(tree): for j in k.val: tmp = (j.cells) tmp.append('_' + str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) tbl2 = newTable(tbl, headerLabel, Rows) """ Testing """ _r = [] for tt in test: mTst = makeaModel.csv2py(tt) _r += mTst._rows mTst._rows = _r prepare(mTst, settings = where) # Initialize all parameters for where2 to run tree = where2(mTst, mTst._rows) # Decision tree using where2 tbl = table(tt) headerLabel = '=klass' Rows = [] for k, _ in leaves(tree): # for k, _ in leaves(tree): for j in k.val: tmp = (j.cells) tmp.append('_' + str(id(k) % 1000)) j.__dict__.update({'cells': tmp}) Rows.append(j.cells) tbl3 = newTable(tbl, headerLabel, Rows) temp = [] def sort(lst): return [i[0] for i in sorted(enumerate(lst), key = lambda x:x[1])], \ [i[1] for i in sorted(enumerate(lst), key = lambda x:x[1])] def thresh(val1, val2): indx, sorted = sort() def isdefective(case, test = False): if not test: return 'Defect' if case.cells[-2] > 0 else 'No Defect' else: bugs = [r.cells[-2] for r in case.rows]; meanBugs = np.mean(bugs); medianBugs = np.median(bugs); rangeBugs = (sorted(bugs)[0] + sorted(bugs)[-1]) / 2; temp.append(meanBugs); return 'Defect' if meanBugs > 1.5 else 'No Defect' testCase = tbl3._rows # print testCase testDefective = [] defectivClust = [] t = discreteNums(tbl2, map(lambda x: x.cells, tbl2._rows)) myTree = tdiv(t, opt = dtree) # showTdiv(myTree) testCase = tbl3._rows # # print testCase for tC in testCase: loc = drop(tC, myTree) # if len(loc.kids)==0: testDefective.append(isdefective(tC)) defectivClust.append(isdefective(loc, test = True)) # saveImg(temp, 10) # contrastSet = getContrastSet(loc, myTree) # print 'Contrast Set:', contrastSet return [testDefective, defectivClust]
lo[indx]=i; if(hi[indx]==None or hi[indx]<i): hi[indx]=i; indx+=1 return hi, lo def normalizit(val,high,low): indx=0; for i in xrange(len(val)): if(not isa(val[i],str)): val[i]=(val[i]-low[indx])/(high[indx]-low[indx]); indx+=1 return val """ t0=table(source) rows = map(lambda x :x.cells, t0._rows) depenCol=[] for h in t0.klass: print h print t0.depen[0].__dict__ for p in t0.depen: depenCol=p.__dict__['col'] print depenCol #______________________________________________________________________________ for x in t0._rows: # Obtain the dictionary values of each row y=x.__dict__ # Obtain the keys and values of a dictionary values=y['cells']
def readBug(filen, path="./data"): ''' read the last column of data file to compute defetive vs non-defective ''' def makeTex(train, tune, test): def mulcol(num, style, name): # return "\multicolumn{"+num+"}{"+style+"}{" +name+"}" return name def datasetName(): return ("&").join([mulcol("1","c", i[:-2]) if "0" in i else mulcol("1","c", i) for i in extractFeatures.dataset]) def lstStat(num): # pdb.set_trace() return (" &").join(num) def divide(lst): return [lst[:(len(lst)/2+1)], lst[(len(lst)/2+1):]] extractFeatures = features("",filen) extractFeatures.keep() f = open(extractFeatures.filename+'DefNonDeflatex', 'w') space = " " datasets = divide(datasetName()) train = divide(train) tune = divide(tune) test = divide(test) texCommand ="" for i in range(2): texCommand += "\\begin{figure*}[!ht]\n"\ "\\scriptsize\n"\ "\\centering\n"\ +space+"\\begin{tabular}{"+"c "*10+"}\n"\ +space+"\\hline\\hline\n"\ +space+"Dataset &"+datasets[i]+"\n\\\\\\hline\n"\ +space+"training &"+lstStat(train[i])+"\n\\\\"\ +space+"tunning &"+lstStat(tune[i])+"\n\\\\"\ +space+"testing &"+lstStat(test[i])+"\n\\\\"\ +space+"\\end{tabular}\n"\ "\\end{figure*}\n" f.write(texCommand+'\n') f.close() folders = [f for f in listdir(path) if not isfile(join(path, f))] # stat = collections.OrderedDict stats = {} train = [] tune = [] test = [] for one in folders: nextpath = join(path, one) filename = [f for f in listdir(nextpath) if isfile(join(nextpath, f))] filepath = [join(nextpath, f) for f in listdir(nextpath) if isfile(join(nextpath, f))] for dataname in filename: filepath = join(nextpath,dataname) defNum = 0 nondefNum = 0 tbl = table(filepath) for row in tbl._rows: if row.cells[-1]>=1: defNum+=1 else: nondefNum+=1 stats[dataname] = str(defNum)+"/"+ str(nondefNum +defNum) # stats.append([dataname,str(defNum)+"/"+ str(nondefNum)]) for i in range(len(filename)): dataname = one +"V"+str(i) try: # pdb.set_trace() test += [stats[filename[i+2]]] tune += [stats[filename[i+1]]] train += [stats[filename[i]]] except IndexError, e: print one+" done!" break
def readBug(filen, path="./data"): ''' read the last column of data file to compute defetive vs non-defective ''' def makeTex(train, tune, test): def mulcol(num, style, name): # return "\multicolumn{"+num+"}{"+style+"}{" +name+"}" return name def datasetName(): return ("&").join([ mulcol("1", "c", i[:-2]) if "0" in i else mulcol("1", "c", i) for i in extractFeatures.dataset ]) def lstStat(num): # pdb.set_trace() return (" &").join(num) def divide(lst): return [lst[:(len(lst) / 2 + 1)], lst[(len(lst) / 2 + 1):]] extractFeatures = features("", filen) extractFeatures.keep() f = open(extractFeatures.filename + 'DefNonDeflatex', 'w') space = " " datasets = divide(datasetName()) train = divide(train) tune = divide(tune) test = divide(test) texCommand = "" for i in range(2): texCommand += "\\begin{figure*}[!ht]\n"\ "\\scriptsize\n"\ "\\centering\n"\ +space+"\\begin{tabular}{"+"c "*10+"}\n"\ +space+"\\hline\\hline\n"\ +space+"Dataset &"+datasets[i]+"\n\\\\\\hline\n"\ +space+"training &"+lstStat(train[i])+"\n\\\\"\ +space+"tunning &"+lstStat(tune[i])+"\n\\\\"\ +space+"testing &"+lstStat(test[i])+"\n\\\\"\ +space+"\\end{tabular}\n"\ "\\end{figure*}\n" f.write(texCommand + '\n') f.close() folders = [f for f in listdir(path) if not isfile(join(path, f))] # stat = collections.OrderedDict stats = {} train = [] tune = [] test = [] for one in folders: nextpath = join(path, one) filename = [f for f in listdir(nextpath) if isfile(join(nextpath, f))] filepath = [ join(nextpath, f) for f in listdir(nextpath) if isfile(join(nextpath, f)) ] for dataname in filename: filepath = join(nextpath, dataname) defNum = 0 nondefNum = 0 tbl = table(filepath) for row in tbl._rows: if row.cells[-1] >= 1: defNum += 1 else: nondefNum += 1 stats[dataname] = str(defNum) + "/" + str(nondefNum + defNum) # stats.append([dataname,str(defNum)+"/"+ str(nondefNum)]) for i in range(len(filename)): dataname = one + "V" + str(i) try: # pdb.set_trace() test += [stats[filename[i + 2]]] tune += [stats[filename[i + 1]]] train += [stats[filename[i]]] except IndexError, e: print one + " done!" break