Ejemplo n.º 1
0
def clustertbl(f, tree, num2sym):
    row = []
    if isinstance(f, list):
        tbl1 = tbl = table(f[0])
    else:
        tbl1 = tbl = table(f)
    newheader = Num()
    newheader.col = len(tbl.headers)
    newheader.name = "=klass"
    tbl1.headers += [newheader]  # tbl1 : the new table with cluster ID
    count = 0
    for k, _ in leaves(tree):
        for j in k.val:
            for i, cell in enumerate(j.cells):
                if isinstance(tbl.headers[i], Sym):
                    j.cells[i] = num2sym.get(cell, cell)
            tmp = j.cells
            tmp.append(str(count))
            # tmp.append(j.cells[tbl1.depen[0].col])
            # add the FIRST objective into the last cell of the row
            tmp.append(j.cells[tbl1.depen[0].col])
            # j.__dict__.update({'cells': tmp})
            j.update(cells=tmp)
            row.append(j.cells)
        count += 1
    tbl1 = clone(tbl1, row)
    return tbl1, row
Ejemplo n.º 2
0
def csv2py(f):
    sym2num = {}
    # sym2num hold all the characters with assinged numbers that never seen

    def str2num(t, p=0):
        def bigt():
            if isinstance(tbl, list):
                t = tbl[0]
                for i in range(1, len(tbl)):
                    t._rows += tbl[i]._rows
            else:
                t = tbl
            return t

        t = bigt()
        for r, row in enumerate(t._rows):
            for c, cell in enumerate(row.cells):
                if isinstance(cell, str) and c < t.depen[0].col and isinstance(t.headers[c], Sym):
                    if sym2num.get(cell, 0) == 0:
                        sym2num[cell] = p
                        p += 1
                    t._rows[r].cells[c] = sym2num[cell]  # update cell with num
        return t

    if isinstance(f, list):
        tbl = [table(src) for src in f]  # tbl is a list of tables
    else:
        tbl = table(f)
    tbl_num = str2num(tbl)
    x = data(
        indep=[x.name for x in tbl_num.indep],
        less=[x.name for x in tbl_num.depen],
        _rows=[row.cells for row in tbl_num._rows],
    )
    return x, sym2num
Ejemplo n.º 3
0
def _tdivPrec(dir='camel/'): 
 #==============================================================================
 # Recursively clustering the model.
 #==============================================================================
 train=['camel-1.0.csv', 'camel-1.2.csv', 'camel-1.2.csv']
 test=['camel-1.6.csv']
 rseed(1)
 makeaModel=makeAModel()
 _rows=[]
 
 # Concatenate training cases
 for t in train:
  file=dir+t
  m=makeaModel.csv2py(file)
  prepare(m) # Initialize all parameters for where2 to run
  tree=where2(m, m._rows) # Decision tree using where2
  tbl = table(file)  
  headerLabel='=klass'
  Rows=[]
  for k,_ in leaves(tree):
   for j in k.val:
     tmp=(j.cells)
     tmp.append('_'+str(id(k) % 1000)) 
     j.__dict__.update({'cells': tmp})
     Rows.append(j.cells)
  _rows+=Rows
  tbl2=makeMeATable(tbl, headerLabel, _rows)
 
 # Test case!
 _rows=[]
 for tt in test:
  file=dir+tt
  m=makeaModel.csv2py(file)
  prepare(m) # Initialize all parameters for where2 to run
  tree=where2(m, m._rows) # Decision tree using where2
  tbl = table(file)  
  headerLabel='=klass'
  Rows=[]
  for k,_ in leaves(tree):
   for j in k.val:
     tmp=(j.cells)
     tmp.append('_'+str(id(k) % 1000)) 
     j.__dict__.update({'cells': tmp})
     Rows.append(j.cells)
  _rows+=Rows
  tbl3=makeMeATable(tbl, headerLabel, _rows)
  
 testCase=tbl3._rows
 print testCase 
 t=discreteNums(tbl2, map(lambda x: x.cells, tbl2._rows))
 myTree=tdiv(t) 
 showTdiv(myTree)
 loc = leaveOneOut(testCase[randi(0, len(testCase))], myTree)
 contrastSet = getContrastSet(loc, myTree)
 print 'Contrast Set:', contrastSet
Ejemplo n.º 4
0
Archivo: main.py Proyecto: ai-se/HPO
def csv2py(f):
  if isinstance(f, list):
    tbl = [table(src) for src in f]  # tbl is a list of tables
    t = tbl[0]
    for i in range(1, len(tbl)):
      t._rows += tbl[i]._rows
    tbl = t
  else:
    tbl = table(f)
  tbl_num = tbl  # no symbol data col in defect data sets.
  x = data(indep=[x.name for x in tbl_num.indep[:-1]], less=[x.name for x in tbl_num.depen],
           _rows=[row.cells for row in tbl_num._rows])
  return x
Ejemplo n.º 5
0
def _tdivdemo(file='data/nasa93dem.csv'): 
 #==============================================================================
 # We start by recursively clustering the model.
 #==============================================================================
 makeaModel=makeAModel()
 m=makeaModel.csv2py(file) 
 prepare(m) # Initialize all parameters for where2 to run
 tree=where2(m, m._rows) # Decision tree using where2
 tbl = table(file)  
 headerLabel='=klass'
 Rows=[]
 for k,_ in leaves(tree):
  for j in k.val:
    tmp=j.cells
    tmp.append(id(k) % 1000) 
    j.__dict__.update({'cells': tmp})
    Rows.append(j.cells)
 tbl2=makeMeATable(tbl, headerLabel, Rows)
 
 testCase=tbl2._rows.pop(randi(0,len(Rows)-1))
 t=discreteNums(tbl2, map(lambda x: x.cells, tbl2._rows))  
 myTree=tdiv(t) 
 loc=apex(testCase, myTree)
 print loc.__dict__
 print 'Id: ',loc.mode, ' Level: ', loc.lvl, ' Variable: ', loc.f.name
 showTdiv(myTree)
 #==============================================================================
 for node, lvl in dtnodes(myTree):
   rows=map(lambda x:x.cells,node.rows)
Ejemplo n.º 6
0
def createTbl(
        data, settings=None, _smote=False, isBin=False, bugThres=2, duplicate=False):
  """
  kwargs:
  _smote = True/False : SMOTE input data (or not)
  _isBin = True/False : Reduce bugs to defects/no defects
  _bugThres = int : Threshold for marking stuff as defective,
                    default = 1. Not defective => Bugs < 1
  """
  makeaModel = makeAmodel.makeAModel()
  _r = []
  for t in data:
    m = makeaModel.csv2py(t, _smote=_smote, duplicate=duplicate)
    _r += m._rows
  m._rows = _r
  prepare(m, settings=None)  # Initialize all parameters for where2 to run
  tree = where2(m, m._rows)  # Decision tree using where2
  tbl = table(t)

  headerLabel = '=klass'
  Rows = []
  for k, _ in leaves(tree):  # for k, _ in leaves(tree):
    for j in k.val:
      tmp = j.cells
      if isBin:
        tmp[-1] = 0 if tmp[-1] < bugThres else 1
      tmp.append('_' + str(id(k) % 1000))
      j.__dict__.update({'cells': tmp})
      Rows.append(j.cells)

  return newTable(tbl, headerLabel, Rows)
Ejemplo n.º 7
0
def _tdivdemo(file='data/nasa93dem.csv'): 
 #==============================================================================
 # We start by recursively clustering the model.
 #==============================================================================
 makeaModel=makeAModel()
 m=makeaModel.csv2py(file)
 
 #alias =  dict (zip(makeaModel.translate.values(),makeaModel.translate.keys()))
 #print alias
 #def num2str(lst):
 # return [alias[z] for z in lst]
 
 prepare(m) # Initialize all parameters for where2 to run
 tree=where2(m, m._rows) # Decision tree using where2
 tbl = table(file)  
 headerLabel='=klass'
 Rows=[]
 for k,_ in leaves(tree):
  for j in k.val:
    tmp=(j.cells)
    tmp.append('_'+str(id(k) % 1000)) 
    j.__dict__.update({'cells': tmp})
    Rows.append(j.cells)
 tbl2=makeMeATable(tbl, headerLabel, Rows)
 print 
 testCase=[tbl2._rows.pop(randi(0, len(tbl2._rows))) for k in xrange(500)]
 t=discreteNums(tbl2, map(lambda x: x.cells, tbl2._rows))
 myTree=tdiv(t) 
 showTdiv(myTree)
 loc = leaveOneOut(testCase[randi(0, len(testCase))], myTree)
 contrastSet = getContrastSet(loc, myTree)
 print 'Contrast Set:', contrastSet
Ejemplo n.º 8
0
  def csv2py(self, filename, _smote=False, duplicate=False):
    "Convert a csv file to a model file"
    tbl = table(filename)
    if _smote:
      tbl = smote.SMOTE(
          tbl,
          atleast=50,
          atmost=101,
          bugIndx=1,
          resample=duplicate)
    self.str2num(tbl)
    tonum = lambda x: self.translate[x] if isinstance(x, str) else x

    """ There's a bug in table.py that doesn't separate dependent and independent
      Variable. The following, badly written, piece of code corrects for it...
  """
    for indx, k in enumerate(tbl.indep):
      for l in tbl.depen:
        if k.name == l.name:
          tbl.indep.pop(indx)

    return self.data(indep=[i.name for i in tbl.indep],
                     less=[i.name for i in tbl.depen],
                     _rows=map(lambda x: [tonum(xx) for xx in x.cells],
                               tbl._rows))
Ejemplo n.º 9
0
def buildtestdata1(f):  # build testdata from table
    actual = []
    testdata = []
    tbl = table(f)
    for row in tbl._rows:
        actual += ["Defective" if row.cells[tbl.depen[0].col] > 0 else "Non-Defective"]
        testdata += [row]
    return testdata, actual
Ejemplo n.º 10
0
Archivo: main.py Proyecto: ai-se/HPO
def buildtestdata1(f, isdefect = False):  # build testdata from table
  actual = []
  testdata = []
  if isinstance(f, list):
    tbl = [table(src) for src in f]  # tbl is a list of tables
    t = tbl[0]
    for i in range(1, len(tbl)):
      t._rows += tbl[i]._rows
    tbl = t
  else:
    tbl = table(f)
  for row in tbl._rows:
    if isdefect:
      actual += ["Defective" if row.cells[tbl.depen[0].col] > 0 else "Non-Defective"]
    else:
      actual +=[str(row.cells[tbl.depen[0].col])]
    testdata += [row]
  return testdata, actual
Ejemplo n.º 11
0
def buildtestdata1(f, actual=[], testdata=[]):# build testdata from table
  tbl = table(f)
  for row in tbl._rows:
    temp=row.cells[tbl.depen[0].col]
    if temp >0:
      actual+=["Defective"]
    else:
      actual+=["Non-Defective"]
    testdata+=[row]
  return testdata, actual
Ejemplo n.º 12
0
 def csv2py(self, filename):
  "Convert a csv file to a model file"
  tbl=table(filename)
  self.str2num(tbl)
  tonum= lambda x: self.translate[x] if isinstance(x, str) else x
  _rows=map(lambda x: [tonum(xx) for xx in x.cells], tbl._rows)
  
  return self.data(indep=[i.__dict__['name'] for i in tbl.indep],
                   less=[i.__dict__['name'] for i in tbl.depen],
                   _rows=map(lambda x: [tonum(xx) for xx in x.cells], tbl._rows))
Ejemplo n.º 13
0
Archivo: smote.py Proyecto: ai-se/HPO
def SMOTE(path="./data", k=5, ):
  folders = [f for f in listdir(path) if not isfile(join(path, f))]
  for folder in folders[:]:
    nextpath = join(path, folder)
    data = [join(nextpath, f) for f in listdir(nextpath) if isfile(join(nextpath, f))]
    for i in range(len(data)):
      tbl = table(data[i])
      newfilename ="./Smote"+ data[i][1:]
      out = _SMOTE1(tbl, k)
      savetbl(tbl,out,newfilename)
Ejemplo n.º 14
0
    def __init__(self, line):
        if line[:len(self.statement_title)] != self.statement_title:
            raise Exception('not createTable command: '+line)
        lineSplit = line[len(self.statement_title):].split()
        table_name = lineSplit[0]
        cvs = line[line.index('(')+1:line.index(')')].split(',')
        column_value = map(lambda kv: kv.split(), cvs)
        cv2 = []
        for c,v in column_value:
            cv2.append(column(c,datatype.build(v)))

        self.table = table(table_name, cv2)
Ejemplo n.º 15
0
Archivo: main.py Proyecto: ai-se/HPO
def clustertbl(f, tree, num2sym={}):
  row = []
  if isinstance(f, list):
    tbl1 = tbl = table(f[0])
  else:
    tbl1 = tbl = table(f)
  newheader = Num()
  newheader.col = len(tbl.headers)
  newheader.name = "=klass"
  tbl1.headers += [newheader]  # tbl1 : the new table with cluster ID
  count = 0
  for k, _ in leaves(tree):
    for j in k.val:
      tmp = j.cells
      tmp.append(str(count))
      tmp.append(j.cells[tbl1.depen[0].col])
      j.update(cells=tmp)
      row.append(j.cells)
    count += 1
  tbl1 = clone(tbl1, row)
  return tbl1, row
Ejemplo n.º 16
0
Archivo: cube.py Proyecto: WeiFoo/axe
def ideaed(f='data/nasa93.csv'):
  def change(x):
    prefix=suffix=""
    for ch in x:
      if ch == ">": prefix="?"; suffix="/"
      if ch == "<": prefix="?"; suffix="/"
    return prefix + x + suffix
  dists={}
  tbl=table(f)
  opt= distings(
    klass = lambda x,tbl,o: fromHell(tbl,x,o), 
    how   = nearest1,
    two   = mostDistant
    )
  tree1 = idea(tbl,opt=opt)
  klass = Sym("=klass")
  names = [change(h.name) for h in tbl.headers] + ["=KLASS"]
  tbl2=head(names,table0("clusters of "+ f))
  for x in leaves(tree1):
    it = "_" + str(x._id)
    for row in x.rows:
      body(row.cells + [it],tbl2,True)
  tbl3= discreteNums(tbl2,[row.cells for row in tbl2._rows])
  tree2 = tdiv(tbl3)
  showTdiv(tree2)
  snakesAndLadders(tree2,tbl3,
                   lambda node: fromHells(tbl3,
                                         node.rows,
                                         opt))
  ss0,ss1={},{}
  for node in dtleaves(tree2):
    says(node._id,':n',len(node.rows),' :score',g3(fromHells(tbl3,node.rows,opt)))
    if node.ladder:
      says(" :want",node.ladder._id," :plan",node.better)
    if node.snake:
      says(" :hate",node.snake._id," :watch",node.worse)
    nl()
   
    for row in node.rows:
      asIs,toBe= jumpUp(row,tree2)
      for h0,h1 in zip(asIs.tbl.less + asIs.tbl.more,
                   toBe.tbl.less + toBe.tbl.more):
        s0 = ss0.get(h0.name,Num())
        s1 = ss1.get(h1.name,Num())
        s0 + h0.median()
        s1 + h1.median()
        ss0[h0.name] = s0
        ss1[h0.name] = s1
  print ""
  for key in ss0:
    saysln(key, ss0[key].median(), ss1[key].median(), \
             ss0[key].iqr(),ss1[key].iqr())
Ejemplo n.º 17
0
Archivo: cube.py Proyecto: WeiFoo/axe
def loosed(f='data/nasa93.csv'):
  dists={}
  t=table(f)
  opt= distings(
    klass = lambda x,t,o: x.cells[t.less[0].col],
    how   = nearest1,
    tests = 5,
    #tiny  = lambda x: 8,
    two   =  twoDistant
  #rprint(t.klass[0]); exit()
  )
  nums = loos(t,opt)
  print "", int(100*nums.median()), int(100*nums.iqr())#sorted(nums.all())
Ejemplo n.º 18
0
Archivo: cube.py Proyecto: spati2/storm
def loosed(f='Data/nasa93.csv'):
  dists={}
  t=table(f)
  opt= distings(
    klass = lambda x,t,o: x.cells[t.less[0].col],
    how   = nearest1,
    tests = 5,
    #tiny  = lambda x: 8,
    two   =  twoDistant
  #rprint(t.klass[0]); exit()
  )
  nums = loos(t,opt)
  print "", int(100*nums.median()), int(100*nums.iqr())#sorted(nums.all())
Ejemplo n.º 19
0
 def csv2py(self, filename):
  "Convert a csv file to a model file"
  tbl=table(filename)
  self.str2num(tbl)
  tonum= lambda x: self.translate[x] # if isinstance(x, str) else x
  
  for indx, k in enumerate(tbl.indep):
   for l in tbl.depen:
    if k.name==l.name:
     tbl.indep.pop(indx)
    
  return self.data(indep=[i.name for i in tbl.indep],
                   less=[i.name for i in tbl.depen],
                   _rows=map(lambda x: [tonum(xx) for xx in x.cells], tbl._rows))
Ejemplo n.º 20
0
def genic(src='data/diabetes.csv',opt=None):
  w = o(num=[], sym=[], dep=[], indep=[],
        centroids=[],
        min={}, max={}, name={},index={},
        opt=opt or genic0())
  for n, row in table(src,w):
    data(w,row)
    if len(w.centroids) < w.opt.k:
      more(w,n,row)
    else:
      fuse(w,row,nearest(w,row))
      if not (n % w.opt.era):
        less(w,n)
  return w,sorted(w.centroids,reverse=True)
Ejemplo n.º 21
0
def moea(f='data.dat/coc81dem.csv'):
    seed(1)
    dists = {}
    t = table(f)
    opt = distings(
        klass=lambda x, t, o: fromHell(t, x, o),
        how=nearest1,
        tiny=lambda x: 4,
        some=10000,
        retry=1,
        repeats=1,
        two=mostDistant,
        #err   = lambda p,a: abs(p-a)/(a + 0.001)
        #rprint(t.klass[0]); exit()
    )
    loosMoea(t, opt)
Ejemplo n.º 22
0
Archivo: cube.py Proyecto: WeiFoo/axe
def moea(f='data/coc81dem.csv'):
  seed(1)
  dists={}
  t=table(f)
  opt= distings(
    klass = lambda x,t,o: fromHell(t,x,o),
    how   = nearest1,
    tiny  = lambda x: 4,
    some  = 10000,
    retry = 1,
    repeats=1,
    two   =  mostDistant,
    #err   = lambda p,a: abs(p-a)/(a + 0.001)
  #rprint(t.klass[0]); exit()
  )
  loosMoea(t,opt)
Ejemplo n.º 23
0
    def csv2py(self, filename):
        "Convert a csv file to a model file"
        tbl = table(filename)
        self.str2num(tbl)
        tonum = lambda x: self.translate[x] if isinstance(x, str) else x

        for indx, k in enumerate(tbl.indep):
            for l in tbl.depen:
                if k.name == l.name:
                    tbl.indep.pop(indx)

        #[(sys.stdout.write(tI.name), sys.stdout.write(' ')) for tI in tbl.depen]

        return self.data(indep=[i.name for i in tbl.indep],
                         less=[i.name for i in tbl.depen],
                         _rows=map(lambda x: [tonum(xx) for xx in x.cells],
                                   tbl._rows))
Ejemplo n.º 24
0
    def csv2py(self, filename):
        "Convert a csv file to a model file"
        tbl = table(filename)
        self.str2num(tbl)
        tonum = lambda x: self.translate[x] if isinstance(x, str) else x
        """ There's a bug in table.py that doesn't separate dependent and independent
      Variable. The following, badly written, piece of code corrects for it...
  """
        for indx, k in enumerate(tbl.indep):
            for l in tbl.depen:
                if k.name == l.name:
                    tbl.indep.pop(indx)

        return self.data(indep=[i.name for i in tbl.indep],
                         less=[i.name for i in tbl.depen],
                         _rows=map(lambda x: [tonum(xx) for xx in x.cells],
                                   tbl._rows))
Ejemplo n.º 25
0
def clustertbl(f,tree, num2sym, row=[]):
  tbl1 = tbl = table(f)# open the first table
  newheader = Num()
  newheader.col = len(tbl.headers)
  newheader.name = "=klass"
  tbl1.headers +=[newheader] # tbl1 : the new table with cluster ID
  for k,_ in leaves(tree):
    for j in k.val:
      for i, cell in enumerate(j.cells):
        if isinstance(tbl.headers[i], Sym): 
          j.cells[i] = num2sym.get(cell, cell)
      tmp=j.cells
      tmp.append(id(k) % 1000) 
      tmp.append(j.cells[tbl1.depen[0].col]) # add the FIRST objective into the last cell of the row
      # j.__dict__.update({'cells': tmp})
      j.update(cells=tmp)
      row.append(j.cells)
  tbl1 = clone(tbl1, row)
  return tbl1, row
Ejemplo n.º 26
0
def createTbl(data):
    makeaModel = makeAModel()
    _r = []
    for t in data:
        m = makeaModel.csv2py(t)
        _r += m._rows
    m._rows = _r
    prepare(m, settings=None)  # Initialize all parameters for where2 to run
    tree = where2(m, m._rows)  # Decision tree using where2
    tbl = table(t)
    headerLabel = '=klass'
    Rows = []
    for k, _ in leaves(tree):  # for k, _ in leaves(tree):
        for j in k.val:
            tmp = (j.cells)
            tmp.append('_' + str(id(k) % 1000))
            j.__dict__.update({'cells': tmp})
            Rows.append(j.cells)
    return newTable(tbl, headerLabel, Rows)
Ejemplo n.º 27
0
def createTbl(data):
 makeaModel = makeAModel()
 _r = []
 for t in data:
  m = makeaModel.csv2py(t)
  _r += m._rows
 m._rows = _r
 prepare(m, settings = None)  # Initialize all parameters for where2 to run
 tree = where2(m, m._rows)  # Decision tree using where2
 tbl = table(t)
 headerLabel = '=klass'
 Rows = []
 for k, _ in leaves(tree):  # for k, _ in leaves(tree):
  for j in k.val:
   tmp = (j.cells)
   tmp.append('_' + str(id(k) % 1000))
   j.__dict__.update({'cells': tmp})
   Rows.append(j.cells)
 return newTable(tbl, headerLabel, Rows)
Ejemplo n.º 28
0
 def add_widgets(self):
     self.table = table(self, data=pd.DataFrame({'W': [""]}))
     self.menubar = Menu(self.master)
     self.filemenu = Menu(self.menubar, tearoff=0)
     self.filemenu.add_command(label="Generate wave spectrum",
                               command=self.Generate_wavespectrum)
     self.filemenu.add_command(label="Generate Encounter spectrum",
                               command=self.Generate_encounterspectrum)
     self.plot = Menu(self.menubar, tearoff=0)
     self.plot.add_command(label="plot Wave spectrum",
                           command=self.plot_wavespectrum)
     self.plot.add_command(label="plot Encounter spectrum",
                           command=self.plot_Encounterspectrum)
     self.help = Menu(self.menubar, tearoff=0)
     self.help.add_command(label="about", command=self.about)
     self.filemenu.add_separator()
     self.menubar.add_cascade(label="Spectrum", menu=self.filemenu)
     self.menubar.add_cascade(label="Plot", menu=self.plot)
     self.menubar.add_cascade(label="Help", menu=self.help)
     self.master.config(menu=self.menubar)
Ejemplo n.º 29
0
def clustertbl(f, tree, num2sym, row=[]):
    tbl1 = tbl = table(f)
    newheader = Num()
    newheader.col = len(tbl.headers)
    newheader.name = "=klass"
    tbl1.headers += [newheader]  # tbl1 : the new table with cluster ID
    for k, _ in leaves(tree):
        for j in k.val:
            for i, cell in enumerate(j.cells):
                if isinstance(tbl.headers[i], Sym):
                    j.cells[i] = num2sym.get(cell, cell)
            tmp = j.cells
            tmp.append(id(k) % 1000)
            tmp.append(
                j.cells[tbl1.depen[0].col]
            )  # add the FIRST objective into the last cell of the row
            # j.__dict__.update({'cells': tmp})
            j.update(cells=tmp)
            row.append(j.cells)
    tbl1 = clone(tbl1, row)
    return tbl1, row
Ejemplo n.º 30
0
def csv2py(f):
    sym2num = {
    }  # hold all the characters with assinged numbers that never seen

    def str2num(t, p=0):
        for r, row in enumerate(t._rows):
            for c, cell in enumerate(row.cells):
                if isinstance(cell, str) and c < t.depen[0].col and isinstance(
                        t.headers[c], Sym):
                    if sym2num.get(cell, 0) == 0:
                        sym2num[cell] = p
                        p += 1
                    t._rows[r].cells[c] = sym2num[cell]  # update cell with num
        return t

    tbl = table(f)
    tbl_num = str2num(tbl)
    x = data(indep=[x.name for x in tbl_num.indep],
             less=[x.name for x in tbl_num.depen],
             _rows=[row.cells for row in tbl_num._rows])
    return x, sym2num
Ejemplo n.º 31
0
Archivo: divs.py Proyecto: timm/16
def _sdiv():
  t  = table(cols(FILE('data/nasa93.csv')))
  # cook the klasses
  w1,klasses = sdiv1(t.rows,  x= lambda z:z.raw[-1]) 
  for klass in sorted(klasses,key=lambda x:x.n): 
     for row in klass.has:
       row.cooked[-1] =  klass.n 
     sayl([":klass",klass.n,":lo",klass.x.lo,":hi",klass.x.hi]) 
  todos = {}
  bestRange( [ereport(t.rows,id=t.header[n], 
                        sym1 =lambda z:z.raw[n],
                        sym2 =lambda z:z.cooked[-1])
                  for n in t.inSyms], todos)
  bestRange( [ediv(t.rows, id = t.header[n],
                        num =lambda z:z.raw[n],
                        sym =lambda z:z.cooked[-1]) 
                   for n in t.inNums ], todos) 
  for k in todos:
    todos[k] = sorted(todos[k],key=lambda z:(z.w,-1*z.y.n))
    print("\n------")
    for one in todos[k]:
      sayl(showOne(one))
Ejemplo n.º 32
0
def _tdivdemo(file='data/nasa93dem.csv'): 
 #==============================================================================
 # We start by recursively clustering the model.
 #==============================================================================
 makeaModel=makeAModel()
 m=makeaModel.csv2py(file) 
 prepare(m) # Initialize all parameters for where2 to run
 tree=where2(m, m._rows) # Decision tree using where2
 tbl = table(file)  
 headerLabel='=klass'
 Rows=[]
 for k,_ in leaves(tree):
  for j in k.val:
    tmp=j.cells
    tmp.append(id(k) % 1000) 
    j.__dict__.update({'cells': tmp})
    Rows.append(j.cells)
  
 tbl2=makeMeATable(tbl, headerLabel, Rows)
 t=discreteNums(tbl2, Rows)  
 myTree=tdiv(t) 
 showTdiv(myTree)
Ejemplo n.º 33
0
def _tdivdemo(file='data/nasa93dem.csv'): 
 #==============================================================================
 # We start by recursively clustering the model.
 #==============================================================================
 makeaModel=makeAModel()
 m=makeaModel.csv2py(file)
 alias =  dict (zip(makeaModel.translate.values(),makeaModel.translate.keys()))
 def num2str(lst):
  return [alias[z] for z in lst]
   
 prepare(m) # Initialize all parameters for where2 to run
 tree=where2(m, m._rows) # Decision tree using where2
 tbl = table(file)  
 headerLabel='=klass'
 Rows=[]

 for k,_ in leaves(tree):
  for j in k.val:
    tmp=num2str(j.cells)
    tmp.append('_'+str(id(k) % 1000)) 
    j.__dict__.update({'cells': tmp})
    Rows.append(j.cells)
 tbl2=makeMeATable(tbl, headerLabel, Rows)
 
 testCase=tbl2._rows.pop(1)
 t=discreteNums(tbl2, map(lambda x: x.cells, tbl2._rows))  
 myTree=tdiv(t) 
 showTdiv(myTree)
 loc = leaveOneOut(testCase, myTree)
 print loc.__dict__
 getContrastSet(loc, myTree)
 #==============================================================================
 #for node, lvl in dtnodes(myTree):
   #rows=map(lambda x:x.cells,node.rows)
   #pdb.set_trace()
   #print lvl, len(rows), [ k._id for k in node.rows]
 #============================================================================== 
 headerLabels={}
 [headerLabels.update({k.name:indx}) for indx, k in enumerate(tbl2.headers)]
Ejemplo n.º 34
0
def createTbl(data,
              settings=None,
              _smote=False,
              isBin=False,
              bugThres=1,
              duplicate=False):
    """
  kwargs:
  _smote = True/False : SMOTE input data (or not)
  _isBin = True/False : Reduce bugs to defects/no defects
  _bugThres = int : Threshold for marking stuff as defective,
                    default = 1. Not defective => Bugs < 1
  """
    makeaModel = makeAModel()
    _r = []
    for t in data:
        m = makeaModel.csv2py(t, _smote=_smote, duplicate=duplicate)
        _r += m._rows
    m._rows = _r
    # Initialize all parameters for where2 to run
    prepare(m, settings=None)
    tree = where2(m, m._rows)  # Decision tree using where2
    tbl = table(t)

    headerLabel = '=klass'
    Rows = []
    for k, _ in leaves(tree):  # for k, _ in leaves(tree):
        for j in k.val:
            tmp = j.cells
            if isBin:
                tmp[-1] = 0 if tmp[-1] < bugThres else 1
            tmp.append('_' + str(id(k) % 1000))
            j.__dict__.update({'cells': tmp})
            Rows.append(j.cells)

    return newTable(tbl, headerLabel, Rows)
Ejemplo n.º 35
0
                lo[indx]=i;
            if(hi[indx]==None or hi[indx]<i):
                hi[indx]=i;
            indx+=1
    return hi, lo

def normalizit(val,high,low):
    indx=0;
    for i in xrange(len(val)):
        if(not isa(val[i],str)):
            val[i]=(val[i]-low[indx])/(high[indx]-low[indx]);
            indx+=1
    return val
"""

t0 = table(source)

rows = map(lambda x: x.cells, t0._rows)

#______________________________________________________________________________
for x in t0._rows:
    # Obtain the dictionary values of each row
    y = x.__dict__
    # Obtain the keys and values of a dictionary
    values = y['cells']
    #Max, Min= minmax(values)
    #print values
    #print Max, Min
#______________________________________________________________________________

t1 = clone(t0)
Ejemplo n.º 36
0
file.close()  #close the file after reading lines

#first line read is table_name
table_name = input_string[0]

#second line and third read is list of attributes names and type
attr_names = input_string[1]
attr_types = input_string[2]

#strip and split attribute names and types
attr_names = attr_names.strip().split(',')
attr_types = attr_types.strip().split(',')

#create student table
student_tbl = table('Student')
student_tbl.create(attr_names, attr_types)

#get attribute names from table
attributes = student_tbl.getAttribNames()

#insert values into database
for obj in input_string[3:len(input_string)]:
    student_tbl.addRow(obj)

#perform age group by
print('Average GPA grouped by Age:')
curr_avg = student_tbl.performAgeGroupBY('GPA', 'Age')
for obj in curr_avg:
    print(obj)
Ejemplo n.º 37
0
#Kavilan Naidoo
#06-01-2015
#2D lists
from table import *
player = [
    ["Name","Kills","Death"],
    ["K1llmAchine",51,49],
    ["bob2247",5,99],
    ["hAxOr12",70,30]
]

table(player)

print(player[0][0],player[0][1],player[0][2])
print(player[1][0],player[1][1],player[1][2])
print(player[2][0],player[2][1],player[2][2])
print(player[3][0],player[3][1],player[3][2])




        
    
Ejemplo n.º 38
0
Archivo: cube.py Proyecto: spati2/storm
def sidesed(f='Data/diabetes.csv'):
  t=table(f)
Ejemplo n.º 39
0
    5: '#EA528E',  # pink
    6: '#009DDF',  # blue
    7: '#76B82A',  # vert
    8: '#EF7D00',  # orange foncé
    9: '#5488C7',  # violet
    10: '#E7344C',  #rouge
    0: 'white',  # valeur par defaut
    'spacer': "grey",
    'premier': "#C0C0C0"  # pour nombre premier : gris clair
}

ma_table = table( \
                    couleurs = couleurs,
                    longueur_tasseau = 400, # en mm
                    largeur_tasseau = 22, # en mm
                    largueur_espace = 8.5, # Correspond à l'interval en mm entre deux tasseaux dans le gabarit
                    longueur_unit = 4, # Correspond à la hauteur en mm de 1
                    longueur_spacer = 3, #Correspond à l'épaisseur de la lame de scie
                    outfolder = "SVG"
                    )

ma_table.add_tasseau(1, tasseau(blocs=[bloc(100, [10, 10, 2, 50])]))
ma_table.add_tasseau(
    2, tasseau(blocs=[bloc(90, [10, 9, 2, 45]),
                      bloc(9, [3, 3, 1, 9])]))
ma_table.add_tasseau(
    3,
    tasseau(blocs=[
        bloc(80, [10, 8, 2, 40]),
        bloc(10, [10, 1, 2, 5],
             color_face4='white'),  #  ilfaut que la face 4 soit blanche!!!
Ejemplo n.º 40
0
# import math
from math import sqrt
from table import *

print(2 + 3)
5 * 87
print('Marie Bouczo mange des fruits')
marie = 'Marie Bouczo mange des fruits'
print(marie)
print(marie * 2)
bouczo = [marie]
print(len(bouczo))
antoine = 'Antoine Bouczo est sur son tracteur'
bouczo.append(antoine)
print(len(bouczo))
for person in bouczo:
    print(person)

table(4, 100)

print(sqrt(16))
Ejemplo n.º 41
0
Archivo: cube.py Proyecto: WeiFoo/axe
def sidesed(f='data/diabetes.csv'):
  t=table(f)
Ejemplo n.º 42
0
def sidesed(f='data.dat/diabetes.csv'):
    t = table(f)
Ejemplo n.º 43
0
 def add_table(self, table_name, colomns):
     if self.tables.get(table_name) is None:
         self.tables[table_name] = table(name, bd, colomns)
Ejemplo n.º 44
0
def convert(md_text):
    """ Convert markdown string to html format

    :param md_text: str, the markdown file
    :return: str, the html content
    """
    # separate by line
    md_text = md_text.split('\n')

    # save the html content for return
    html_text = ''

    # begin looping from the first line
    index = -1
    last_line_unordered = False

    while index < len(md_text) - 1:
        index += 1
        line = md_text[index]

        # code segment
        if len(line) >= 3 and line[:3] == '```':
            html_line = ""
            language = line[3:].replace(' ', '')
            if len(language) == 0:
                language = False
            order_index = index + 1
            find_end = False
            while order_index < len(md_text):
                if md_text[order_index][:3] == '```':
                    find_end = True
                    break
                else:
                    temp_line = md_text[order_index]
                    temp_line = code_replace(temp_line)
                    html_line += temp_line + '<br />'

                    order_index += 1

            if find_end:
                if language is False:
                    html_text += ('<pre><code>' + html_line + '</code></pre>')
                else:
                    html_text += ('<pre><code class="' + str(language) + '">' +
                                  html_line + '</code></pre>')

                # print(language)
                index = order_index
                continue

        pre_text = md_text[:index]
        md_text = pre_text + table(md_text[index:])
        line = md_text[index]

        # header
        is_header, html_line = check_header(line)
        if is_header:
            html_text = html_text + html_line
            continue

        # horizontal rule
        is_horizontal_rule, html_line = check_horizontal_rule(line)
        if is_horizontal_rule:
            html_text = html_text + html_line
            continue

        # block quote
        line = check_blockquote(line)

        # checkbox
        line = line.replace(
            "- [ ] ", "<input type=\"checkbox\" onclick=\"return false;\"/> ")
        line = line.replace(
            "- [x] ",
            "<input type=\"checkbox\" checked onclick=\"return false;\"/> ")

        # deal with ordered list
        if len(line.split('.')) != 0 and '1.' == line[:2]:
            html_line = '<ol>'
            order_index = index
            while order_index < len(md_text)\
                    and len(md_text[order_index].split('.')) != 0\
                    and (str(order_index - index + 1) == md_text[order_index].split('.')[0]
                         or '1' == md_text[order_index].split('.')[0]):
                to_replace = [str(order_index - index + 1) + '.', '1.']
                for replace_content in to_replace:
                    md_text[order_index] = md_text[order_index].replace(
                        replace_content, '')
                html_line = html_line + '<li>' + md_text[order_index] + '</li>'

                order_index += 1
            index = order_index - 1
            html_line = html_line + '</ol>'
            line = html_line

        # deal with unordered list
        is_unordered_list, html_line = check_unordered_list(line)
        if is_unordered_list and (not last_line_unordered):
            line = '<ul>' + html_line
            last_line_unordered = True
        elif is_unordered_list and last_line_unordered:
            line = html_line
            last_line_unordered = True
        elif (not is_unordered_list) and last_line_unordered:
            line = '</ul>' + line

        # inline code
        rest = line
        line = ''
        while rest.count('`') > 1:
            first_sign = rest.index('`')
            line = line + convert_not_inline(rest[:first_sign])
            rest = rest[first_sign + 1:]
            second_sign = rest.index('`')

            line = line + '<pre><code>' + code_replace(
                rest[:second_sign]) + '</code></pre>'
            rest = rest[second_sign + 1:]

        line = line + convert_not_inline(rest)

        html_text = html_text + line
        if not is_unordered_list:
            html_text = html_text + '<br>'
            last_line_unordered = False
    html_text = table(html_text)
    return html_text
Ejemplo n.º 45
0
def tdivPrec(where = None , dtree = None, train = None, test = None):
 rseed(1)
 makeaModel = makeAModel()

 # pdb.set_trace()

 """
 Training
 """
 _r = []
 for t in train:
  m = makeaModel.csv2py(t)
  _r += m._rows
 m._rows = _r
 prepare(m, settings = where)  # Initialize all parameters for where2 to run
 tree = where2(m, m._rows)  # Decision tree using where2
 tbl = table(t)
 headerLabel = '=klass'
 Rows = []
 for k, _ in leaves(tree):  # for k, _ in leaves(tree):
  for j in k.val:
   tmp = (j.cells)
   tmp.append('_' + str(id(k) % 1000))
   j.__dict__.update({'cells': tmp})
   Rows.append(j.cells)
 tbl2 = newTable(tbl, headerLabel, Rows)


 """
 Testing
 """
 _r = []
 for tt in test:
  mTst = makeaModel.csv2py(tt)
  _r += mTst._rows
 mTst._rows = _r
 prepare(mTst, settings = where)  # Initialize all parameters for where2 to run
 tree = where2(mTst, mTst._rows)  # Decision tree using where2
 tbl = table(tt)
 headerLabel = '=klass'
 Rows = []
 for k, _ in leaves(tree):  # for k, _ in leaves(tree):
  for j in k.val:
   tmp = (j.cells)
   tmp.append('_' + str(id(k) % 1000))
   j.__dict__.update({'cells': tmp})
   Rows.append(j.cells)
 tbl3 = newTable(tbl, headerLabel, Rows)
 temp = []

 def sort(lst):
  return [i[0] for i in sorted(enumerate(lst), key = lambda x:x[1])], \
         [i[1] for i in sorted(enumerate(lst), key = lambda x:x[1])]

 def thresh(val1, val2):
  indx, sorted = sort()
 def isdefective(case, test = False):
  if not test:
   return 'Defect' if case.cells[-2] > 0 else 'No Defect'
  else:
   bugs = [r.cells[-2] for r in case.rows];
   meanBugs = np.mean(bugs);
   medianBugs = np.median(bugs);
   rangeBugs = (sorted(bugs)[0] + sorted(bugs)[-1]) / 2;
   temp.append(meanBugs);
   return 'Defect' if meanBugs > 1.5 else 'No Defect'

 testCase = tbl3._rows
 # print testCase

 testDefective = []
 defectivClust = []

 t = discreteNums(tbl2, map(lambda x: x.cells, tbl2._rows))
 myTree = tdiv(t, opt = dtree)
 # showTdiv(myTree)

 testCase = tbl3._rows
#   # print testCase

 for tC in testCase:
  loc = drop(tC, myTree)
  # if len(loc.kids)==0:
  testDefective.append(isdefective(tC))
  defectivClust.append(isdefective(loc, test = True))
 #
 saveImg(temp, 10)

#   contrastSet = getContrastSet(loc, myTree)
#   print 'Contrast Set:', contrastSet
 return [testDefective, defectivClust]
Ejemplo n.º 46
0
                lo[indx]=i;
            if(hi[indx]==None or hi[indx]<i):
                hi[indx]=i;
            indx+=1
    return hi, lo

def normalizit(val,high,low):
    indx=0;
    for i in xrange(len(val)):
        if(not isa(val[i],str)):
            val[i]=(val[i]-low[indx])/(high[indx]-low[indx]);
            indx+=1
    return val
"""

t0=table(source)
rows = map(lambda x :x.cells, t0._rows)
depenCol=[]
for h in t0.klass:
  print h
print t0.depen[0].__dict__
for p in t0.depen:
  depenCol=p.__dict__['col']
print depenCol

#______________________________________________________________________________
for x in t0._rows:
    # Obtain the dictionary values of each row
    y=x.__dict__
    # Obtain the keys and values of a dictionary
    values=y['cells'] 
Ejemplo n.º 47
0
Archivo: data.py Proyecto: ai-se/Caret
def readBug(filen, path="./data"):
  '''
   read the last column of data file to compute defetive vs non-defective
  '''
  def makeTex(train, tune, test):
    def mulcol(num, style, name):
      # return "\multicolumn{"+num+"}{"+style+"}{" +name+"}"
      return name
    def datasetName():
      return ("&").join([mulcol("1","c", i[:-2]) if "0" in i else mulcol("1","c", i) for i in extractFeatures.dataset])
    def lstStat(num):
      # pdb.set_trace()
      return (" &").join(num)
    def divide(lst):
      return [lst[:(len(lst)/2+1)], lst[(len(lst)/2+1):]]
    extractFeatures = features("",filen)
    extractFeatures.keep()
    f = open(extractFeatures.filename+'DefNonDeflatex', 'w')
    space = "  "
    datasets = divide(datasetName())
    train = divide(train)
    tune = divide(tune)
    test = divide(test)
    texCommand =""
    for i in range(2):
      texCommand += "\\begin{figure*}[!ht]\n"\
                "\\scriptsize\n"\
                "\\centering\n"\
                +space+"\\begin{tabular}{"+"c "*10+"}\n"\
                +space+"\\hline\\hline\n"\
                +space+"Dataset &"+datasets[i]+"\n\\\\\\hline\n"\
                +space+"training &"+lstStat(train[i])+"\n\\\\"\
                +space+"tunning  &"+lstStat(tune[i])+"\n\\\\"\
                +space+"testing &"+lstStat(test[i])+"\n\\\\"\
                +space+"\\end{tabular}\n"\
                "\\end{figure*}\n"

    f.write(texCommand+'\n')
    f.close()



  folders = [f for f in listdir(path) if not isfile(join(path, f))]
  # stat = collections.OrderedDict
  stats = {}
  train = []
  tune = []
  test = []
  for one in folders:
    nextpath = join(path, one)
    filename = [f for f in listdir(nextpath) if isfile(join(nextpath, f))]
    filepath = [join(nextpath, f)
            for f in listdir(nextpath) if isfile(join(nextpath, f))]
    for dataname in filename:
      filepath = join(nextpath,dataname)
      defNum = 0
      nondefNum = 0
      tbl = table(filepath)
      for row in tbl._rows:
        if row.cells[-1]>=1:
          defNum+=1
        else:
          nondefNum+=1

      stats[dataname] = str(defNum)+"/"+ str(nondefNum +defNum)
      # stats.append([dataname,str(defNum)+"/"+ str(nondefNum)])
    for i in range(len(filename)):
      dataname = one +"V"+str(i)
      try:
        # pdb.set_trace()
        test += [stats[filename[i+2]]]
        tune += [stats[filename[i+1]]]
        train += [stats[filename[i]]]
      except IndexError, e:
        print one+" done!"
        break
Ejemplo n.º 48
0
def readBug(filen, path="./data"):
    '''
   read the last column of data file to compute defetive vs non-defective
  '''
    def makeTex(train, tune, test):
        def mulcol(num, style, name):
            # return "\multicolumn{"+num+"}{"+style+"}{" +name+"}"
            return name

        def datasetName():
            return ("&").join([
                mulcol("1", "c", i[:-2]) if "0" in i else mulcol("1", "c", i)
                for i in extractFeatures.dataset
            ])

        def lstStat(num):
            # pdb.set_trace()
            return (" &").join(num)

        def divide(lst):
            return [lst[:(len(lst) / 2 + 1)], lst[(len(lst) / 2 + 1):]]

        extractFeatures = features("", filen)
        extractFeatures.keep()
        f = open(extractFeatures.filename + 'DefNonDeflatex', 'w')
        space = "  "
        datasets = divide(datasetName())
        train = divide(train)
        tune = divide(tune)
        test = divide(test)
        texCommand = ""
        for i in range(2):
            texCommand += "\\begin{figure*}[!ht]\n"\
                      "\\scriptsize\n"\
                      "\\centering\n"\
                      +space+"\\begin{tabular}{"+"c "*10+"}\n"\
                      +space+"\\hline\\hline\n"\
                      +space+"Dataset &"+datasets[i]+"\n\\\\\\hline\n"\
                      +space+"training &"+lstStat(train[i])+"\n\\\\"\
                      +space+"tunning  &"+lstStat(tune[i])+"\n\\\\"\
                      +space+"testing &"+lstStat(test[i])+"\n\\\\"\
                      +space+"\\end{tabular}\n"\
                      "\\end{figure*}\n"

        f.write(texCommand + '\n')
        f.close()

    folders = [f for f in listdir(path) if not isfile(join(path, f))]
    # stat = collections.OrderedDict
    stats = {}
    train = []
    tune = []
    test = []
    for one in folders:
        nextpath = join(path, one)
        filename = [f for f in listdir(nextpath) if isfile(join(nextpath, f))]
        filepath = [
            join(nextpath, f) for f in listdir(nextpath)
            if isfile(join(nextpath, f))
        ]
        for dataname in filename:
            filepath = join(nextpath, dataname)
            defNum = 0
            nondefNum = 0
            tbl = table(filepath)
            for row in tbl._rows:
                if row.cells[-1] >= 1:
                    defNum += 1
                else:
                    nondefNum += 1

            stats[dataname] = str(defNum) + "/" + str(nondefNum + defNum)
            # stats.append([dataname,str(defNum)+"/"+ str(nondefNum)])
        for i in range(len(filename)):
            dataname = one + "V" + str(i)
            try:
                # pdb.set_trace()
                test += [stats[filename[i + 2]]]
                tune += [stats[filename[i + 1]]]
                train += [stats[filename[i]]]
            except IndexError, e:
                print one + " done!"
                break