Esempio n. 1
0
def tdivPrec(where=None, dtree=None, train=None, test=None):
    rseed(1)
    makeaModel = makeAModel()

    # pdb.set_trace()
    """
 Training
 """
    _r = []
    for t in train:
        m = makeaModel.csv2py(t)
        _r += m._rows
    m._rows = _r
    prepare(m, settings=where)  # Initialize all parameters for where2 to run
    tree = where2(m, m._rows)  # Decision tree using where2
    tbl = table(t)
    headerLabel = '=klass'
    Rows = []
    for k, _ in leaves(tree):  # for k, _ in leaves(tree):
        for j in k.val:
            tmp = (j.cells)
            tmp.append('_' + str(id(k) % 1000))
            j.__dict__.update({'cells': tmp})
            Rows.append(j.cells)
    tbl2 = newTable(tbl, headerLabel, Rows)
    """
 Testing
 """
    _r = []
    for tt in test:
        mTst = makeaModel.csv2py(tt)
        _r += mTst._rows
    mTst._rows = _r
    prepare(mTst,
            settings=where)  # Initialize all parameters for where2 to run
    tree = where2(mTst, mTst._rows)  # Decision tree using where2
    tbl = table(tt)
    headerLabel = '=klass'
    Rows = []
    for k, _ in leaves(tree):  # for k, _ in leaves(tree):
        for j in k.val:
            tmp = (j.cells)
            tmp.append('_' + str(id(k) % 1000))
            j.__dict__.update({'cells': tmp})
            Rows.append(j.cells)
    tbl3 = newTable(tbl, headerLabel, Rows)
    temp = []

    def sort(lst):
        return [i[0] for i in sorted(enumerate(lst), key = lambda x:x[1])], \
               [i[1] for i in sorted(enumerate(lst), key = lambda x:x[1])]

    def thresh(val1, val2):
        indx, sorted = sort()

    def isdefective(case, test=False):
Esempio n. 2
0
def _tdivPrec(dir='camel/'):
    #==============================================================================
    # Recursively clustering the model.
    #==============================================================================
    train = ['camel-1.0.csv', 'camel-1.2.csv', 'camel-1.2.csv']
    test = ['camel-1.6.csv']
    rseed(1)
    makeaModel = makeAModel()
    _rows = []

    # Concatenate training cases
    for t in train:
        file = dir + t
        m = makeaModel.csv2py(file)
        prepare(m)  # Initialize all parameters for where2 to run
        tree = where2(m, m._rows)  # Decision tree using where2
        tbl = table(file)
        headerLabel = '=klass'
        Rows = []
        for k, _ in leaves(tree):
            for j in k.val:
                tmp = (j.cells)
                tmp.append('_' + str(id(k) % 1000))
                j.__dict__.update({'cells': tmp})
                Rows.append(j.cells)
        _rows += Rows
        tbl2 = makeMeATable(tbl, headerLabel, _rows)

    # Test case!
    _rows = []
    for tt in test:
        file = dir + tt
        m = makeaModel.csv2py(file)
        prepare(m)  # Initialize all parameters for where2 to run
        tree = where2(m, m._rows)  # Decision tree using where2
        tbl = table(file)
        headerLabel = '=klass'
        Rows = []
        for k, _ in leaves(tree):
            for j in k.val:
                tmp = (j.cells)
                tmp.append('_' + str(id(k) % 1000))
                j.__dict__.update({'cells': tmp})
                Rows.append(j.cells)
        _rows += Rows
        tbl3 = makeMeATable(tbl, headerLabel, _rows)

    testCase = tbl3._rows
    print testCase
    t = discreteNums(tbl2, map(lambda x: x.cells, tbl2._rows))
    myTree = tdiv(t)
    showTdiv(myTree)
    loc = leaveOneOut(testCase[randi(0, len(testCase))], myTree)
    contrastSet = getContrastSet(loc, myTree)
    print 'Contrast Set:', contrastSet
Esempio n. 3
0
def _tdivPrec(dir='camel/'): 
 #==============================================================================
 # Recursively clustering the model.
 #==============================================================================
 train=['camel-1.0.csv', 'camel-1.2.csv', 'camel-1.2.csv']
 test=['camel-1.6.csv']
 rseed(1)
 makeaModel=makeAModel()
 _rows=[]
 
 # Concatenate training cases
 for t in train:
  file=dir+t
  m=makeaModel.csv2py(file)
  prepare(m) # Initialize all parameters for where2 to run
  tree=where2(m, m._rows) # Decision tree using where2
  tbl = table(file)  
  headerLabel='=klass'
  Rows=[]
  for k,_ in leaves(tree):
   for j in k.val:
     tmp=(j.cells)
     tmp.append('_'+str(id(k) % 1000)) 
     j.__dict__.update({'cells': tmp})
     Rows.append(j.cells)
  _rows+=Rows
  tbl2=makeMeATable(tbl, headerLabel, _rows)
 
 # Test case!
 _rows=[]
 for tt in test:
  file=dir+tt
  m=makeaModel.csv2py(file)
  prepare(m) # Initialize all parameters for where2 to run
  tree=where2(m, m._rows) # Decision tree using where2
  tbl = table(file)  
  headerLabel='=klass'
  Rows=[]
  for k,_ in leaves(tree):
   for j in k.val:
     tmp=(j.cells)
     tmp.append('_'+str(id(k) % 1000)) 
     j.__dict__.update({'cells': tmp})
     Rows.append(j.cells)
  _rows+=Rows
  tbl3=makeMeATable(tbl, headerLabel, _rows)
  
 testCase=tbl3._rows
 print testCase 
 t=discreteNums(tbl2, map(lambda x: x.cells, tbl2._rows))
 myTree=tdiv(t) 
 showTdiv(myTree)
 loc = leaveOneOut(testCase[randi(0, len(testCase))], myTree)
 contrastSet = getContrastSet(loc, myTree)
 print 'Contrast Set:', contrastSet
Esempio n. 4
0
def _tdivdemo(file='data/nasa93dem.csv'):
    #==============================================================================
    # We start by recursively clustering the model.
    #==============================================================================
    makeaModel = makeAModel()
    m = makeaModel.csv2py(file)
    rseed(1)
    #alias =  dict (zip(makeaModel.translate.values(),makeaModel.translate.keys()))
    #print alias
    #def num2str(lst):
    # return [alias[z] for z in lst]

    prepare(m)  # Initialize all parameters for where2 to run
    tree = where2(m, m._rows)  # Decision tree using where2
    tbl = table(file)
    headerLabel = '=klass'
    Rows = []
    for k, _ in leaves(tree):
        for j in k.val:
            tmp = (j.cells)
            tmp.append('_' + str(id(k) % 1000))
            j.__dict__.update({'cells': tmp})
            Rows.append(j.cells)
    tbl2 = makeMeATable(tbl, headerLabel, Rows)
    #print
    testCase = [tbl2._rows.pop(randi(0, len(tbl2._rows))) for k in xrange(500)]
    t = discreteNums(tbl2, map(lambda x: x.cells, tbl2._rows))
    myTree = tdiv(t)
    showTdiv(myTree)
    loc = leaveOneOut(testCase[randi(0, len(testCase))], myTree)
Esempio n. 5
0
def _tdivdemo(file='data/nasa93dem.csv'): 
 #==============================================================================
 # We start by recursively clustering the model.
 #==============================================================================
 makeaModel=makeAModel()
 m=makeaModel.csv2py(file) 
 prepare(m) # Initialize all parameters for where2 to run
 tree=where2(m, m._rows) # Decision tree using where2
 tbl = table(file)  
 headerLabel='=klass'
 Rows=[]
 for k,_ in leaves(tree):
  for j in k.val:
    tmp=j.cells
    tmp.append(id(k) % 1000) 
    j.__dict__.update({'cells': tmp})
    Rows.append(j.cells)
 tbl2=makeMeATable(tbl, headerLabel, Rows)
 
 testCase=tbl2._rows.pop(randi(0,len(Rows)-1))
 t=discreteNums(tbl2, map(lambda x: x.cells, tbl2._rows))  
 myTree=tdiv(t) 
 loc=apex(testCase, myTree)
 print loc.__dict__
 print 'Id: ',loc.mode, ' Level: ', loc.lvl, ' Variable: ', loc.f.name
 showTdiv(myTree)
 #==============================================================================
 for node, lvl in dtnodes(myTree):
   rows=map(lambda x:x.cells,node.rows)
Esempio n. 6
0
def _tdivdemo(file='data/nasa93dem.csv'): 
 #==============================================================================
 # We start by recursively clustering the model.
 #==============================================================================
 makeaModel=makeAModel()
 m=makeaModel.csv2py(file)
 
 #alias =  dict (zip(makeaModel.translate.values(),makeaModel.translate.keys()))
 #print alias
 #def num2str(lst):
 # return [alias[z] for z in lst]
 
 prepare(m) # Initialize all parameters for where2 to run
 tree=where2(m, m._rows) # Decision tree using where2
 tbl = table(file)  
 headerLabel='=klass'
 Rows=[]
 for k,_ in leaves(tree):
  for j in k.val:
    tmp=(j.cells)
    tmp.append('_'+str(id(k) % 1000)) 
    j.__dict__.update({'cells': tmp})
    Rows.append(j.cells)
 tbl2=makeMeATable(tbl, headerLabel, Rows)
 print 
 testCase=[tbl2._rows.pop(randi(0, len(tbl2._rows))) for k in xrange(500)]
 t=discreteNums(tbl2, map(lambda x: x.cells, tbl2._rows))
 myTree=tdiv(t) 
 showTdiv(myTree)
 loc = leaveOneOut(testCase[randi(0, len(testCase))], myTree)
 contrastSet = getContrastSet(loc, myTree)
 print 'Contrast Set:', contrastSet
Esempio n. 7
0
File: main.py Progetto: ai-se/HPO
def main():
  global The
  The.option.showWhere = False
  The.option.showDTree = False
  testdata, actual = buildtestdata1(The.data.predict)
  m = csv2py(The.data.train)
  Init(m)  # init WHere!!
  tree = where2(m, m._rows)  # tree generated by clustering
  tbl1, row = clustertbl(The.data.train, tree)  # new table with cluster ID
  The.option.clustering = True
  Dtree = buildtdiv(tbl1)
  testleaf = gotoleaf(testdata, Dtree)  # all the leaves the testdata should go
  score = _Abcd(testleaf, testdata, actual)
  return score
Esempio n. 8
0
def createDF(data):
  makeaModel = makeAModel()
  _r = []
  for t in data:
    m = makeaModel.csv2py(t)
    _r += m._rows
  m._rows = _r
  prepare(m)
  tree = where2(m, m._rows)
  Rows = []
  for k, _ in leaves(tree):  # for k, _ in leaves(tree):
    for j in k.val:
      tmp = (j.cells)
      tmp.append('Class_' + str(id(k) % 1000))
      j.__dict__.update({'cells': tmp})
      Rows.append(j.cells)
  return pd.DataFrame(Rows, columns = get_headers(data) + ['klass'])
Esempio n. 9
0
def createDF(data):
    makeaModel = makeAModel()
    _r = []
    for t in data:
        m = makeaModel.csv2py(t)
        _r += m._rows
    m._rows = _r
    prepare(m)
    tree = where2(m, m._rows)
    Rows = []
    for k, _ in leaves(tree):  # for k, _ in leaves(tree):
        for j in k.val:
            tmp = (j.cells)
            tmp.append('Class_' + str(id(k) % 1000))
            j.__dict__.update({'cells': tmp})
            Rows.append(j.cells)
    return pd.DataFrame(Rows, columns=get_headers(data) + ['klass'])
Esempio n. 10
0
def run(train, test):
  testdata, actual = buildtestdata1(test)
  m, sym2num = csv2py(train)
  num2sym = dict(zip(sym2num.values(), sym2num.keys()))
  Init(m)  # init The class
  tree = where2(m, m._rows)  # tree generated by clustering
  tbl1, row = clustertbl(train, tree, num2sym)  # new table with cluster ID
  # savetbl(tbl1, "data/trainingData")  # write new table to a file
  The.option.clustering = True
  Dtree = buildtdiv(tbl1)
  leaves = findleaves(Dtree)
  buildcontrast1(Dtree, leaves)
  testleaf = gotoleaf(testdata, Dtree)  # all the leaves the testdata should go
  printtogo(testleaf)
  summarize(leaves, Dtree)
  score = _Abcd(testleaf, testdata, actual)
  #print "Score: ", score
  return score
Esempio n. 11
0
def run(train, test):
    # random.seed(1)
    # data = o(src = "data/nasa93train.csv")
    testdata, actual = buildtestdata1(test)
    m, sym2num = csv2py(train)
    num2sym = dict(zip(sym2num.values(), sym2num.keys()))
    Init(m)  # init The class
    tree = where2(m, m._rows)  # tree generated by clustering
    tbl1, row = clustertbl(train, tree, num2sym)  # new table with cluster ID
    # savetbl(tbl1, "data/trainingData")  # write new table to a file
    Dtree = buildtdiv(tbl1)
    leaves = findleaves(Dtree)
    buildcontrast1(Dtree, leaves)
    testleaf = gotoleaf(testdata,
                        Dtree)  # all the leaves the testdata should go
    printtogo(testleaf)
    summarize(leaves, Dtree)
    score = _Abcd(testleaf, testdata, actual)
    return score
Esempio n. 12
0
def main():
    random.seed(1)
    data = o(src="data/nasa93train.csv")
    # data = o(src = "data/ant-1.3.csv")
    m, sym2num = csv2py(data.src)
    num2sym = dict(zip(sym2num.values(), sym2num.keys()))
    Init(m)  # init The class
    tree = where2(m, m._rows)  # tree generated by clustering
    tbl1, row = clustertbl(data.src, tree,
                           num2sym)  # new table with cluster ID
    fname = data.src[:-4] + '_copy' + data.src[-4:]
    savetbl(tbl1, fname)  # write new table to a file
    # clusterscore = calScore(tree)
    testdata = buildtestdata(tbl1, 30)  # select the testdata
    Dtree = buildtdiv(tbl1)
    leaves = findleaves(Dtree)
    testleaf = gotoleaf(testdata,
                        Dtree)  # all the leaves the testdata should go
    buildcontrast1(Dtree, leaves)
    printtogo(testleaf)
    summerize(leaves, Dtree)
Esempio n. 13
0
def _tdivdemo(file='data/nasa93dem.csv'): 
 #==============================================================================
 # We start by recursively clustering the model.
 #==============================================================================
 makeaModel=makeAModel()
 m=makeaModel.csv2py(file)
 alias =  dict (zip(makeaModel.translate.values(),makeaModel.translate.keys()))
 def num2str(lst):
  return [alias[z] for z in lst]
   
 prepare(m) # Initialize all parameters for where2 to run
 tree=where2(m, m._rows) # Decision tree using where2
 tbl = table(file)  
 headerLabel='=klass'
 Rows=[]

 for k,_ in leaves(tree):
  for j in k.val:
    tmp=num2str(j.cells)
    tmp.append('_'+str(id(k) % 1000)) 
    j.__dict__.update({'cells': tmp})
    Rows.append(j.cells)
 tbl2=makeMeATable(tbl, headerLabel, Rows)
 
 testCase=tbl2._rows.pop(1)
 t=discreteNums(tbl2, map(lambda x: x.cells, tbl2._rows))  
 myTree=tdiv(t) 
 showTdiv(myTree)
 loc = leaveOneOut(testCase, myTree)
 print loc.__dict__
 getContrastSet(loc, myTree)
 #==============================================================================
 #for node, lvl in dtnodes(myTree):
   #rows=map(lambda x:x.cells,node.rows)
   #pdb.set_trace()
   #print lvl, len(rows), [ k._id for k in node.rows]
 #============================================================================== 
 headerLabels={}
 [headerLabels.update({k.name:indx}) for indx, k in enumerate(tbl2.headers)]
Esempio n. 14
0
def main():
  random.seed(1)
  # data = o(src = "data/nasa93train.csv")
  # data = o(src = ["data/ant-1.3.csv","data/ant-1.7.csv", "data/ant-1.5.csv","data/ant-1.6.csv" ])
  data = o(src = "data/ant-1.4.csv")
  m, sym2num= csv2py(data.src)
  num2sym = dict(zip(sym2num.values(), sym2num.keys()))
  Init(m) # init The class
  tree= where2(m, m._rows) # tree generated by clustering 
  tbl1, row = clustertbl(data.src, tree, num2sym) # new table with cluster ID 
  fname = "data/traningDataSet.csv"
  savetbl(tbl1,fname) # write new table to a file
  # clusterscore = calScore(tree)
  # testdata = buildtestdata(tbl1, 10) # select the testdata randomly 
  testdata, actual = buildtestdata1(f = "data/ant-1.4.csv") 
  Dtree = buildtdiv(tbl1)
  leaves=findleaves(Dtree)
  buildcontrast1(Dtree, leaves)
  testleaf = gotoleaf(testdata, Dtree) # all the leaves the testdata should go
  printtogo(testleaf)
  summarize(leaves, Dtree)
  _Abcd(testleaf, actual)
Esempio n. 15
0
def _tdivdemo(file='data/nasa93dem.csv'): 
 #==============================================================================
 # We start by recursively clustering the model.
 #==============================================================================
 makeaModel=makeAModel()
 m=makeaModel.csv2py(file) 
 prepare(m) # Initialize all parameters for where2 to run
 tree=where2(m, m._rows) # Decision tree using where2
 tbl = table(file)  
 headerLabel='=klass'
 Rows=[]
 for k,_ in leaves(tree):
  for j in k.val:
    tmp=j.cells
    tmp.append(id(k) % 1000) 
    j.__dict__.update({'cells': tmp})
    Rows.append(j.cells)
  
 tbl2=makeMeATable(tbl, headerLabel, Rows)
 t=discreteNums(tbl2, Rows)  
 myTree=tdiv(t) 
 showTdiv(myTree)
Esempio n. 16
0
def main():
    random.seed(1)
    # data = o(src = "data/nasa93train.csv")
    # data = o(src = ["data/ant-1.3.csv","data/ant-1.7.csv", "data/ant-1.5.csv","data/ant-1.6.csv" ])
    data = o(src="data/ant-1.4.csv")
    m, sym2num = csv2py(data.src)
    num2sym = dict(zip(sym2num.values(), sym2num.keys()))
    Init(m)  # init The class
    tree = where2(m, m._rows)  # tree generated by clustering
    tbl1, row = clustertbl(data.src, tree,
                           num2sym)  # new table with cluster ID
    fname = "data/traningDataSet.csv"
    savetbl(tbl1, fname)  # write new table to a file
    # clusterscore = calScore(tree)
    # testdata = buildtestdata(tbl1, 10) # select the testdata randomly
    testdata, actual = buildtestdata1(f="data/ant-1.4.csv")
    Dtree = buildtdiv(tbl1)
    leaves = findleaves(Dtree)
    buildcontrast1(Dtree, leaves)
    testleaf = gotoleaf(testdata,
                        Dtree)  # all the leaves the testdata should go
    printtogo(testleaf)
    summarize(leaves, Dtree)
    _Abcd(testleaf, actual)
Esempio n. 17
0
def tdivPrec(where = None , dtree = None, train = None, test = None):
 rseed(1)
 makeaModel = makeAModel()

 # pdb.set_trace()

 """
 Training
 """
 _r = []
 for t in train:
  m = makeaModel.csv2py(t)
  _r += m._rows
 m._rows = _r
 prepare(m, settings = where)  # Initialize all parameters for where2 to run
 tree = where2(m, m._rows)  # Decision tree using where2
 tbl = table(t)
 headerLabel = '=klass'
 Rows = []
 for k, _ in leaves(tree):  # for k, _ in leaves(tree):
  for j in k.val:
   tmp = (j.cells)
   tmp.append('_' + str(id(k) % 1000))
   j.__dict__.update({'cells': tmp})
   Rows.append(j.cells)
 tbl2 = newTable(tbl, headerLabel, Rows)


 """
 Testing
 """
 _r = []
 for tt in test:
  mTst = makeaModel.csv2py(tt)
  _r += mTst._rows
 mTst._rows = _r
 prepare(mTst, settings = where)  # Initialize all parameters for where2 to run
 tree = where2(mTst, mTst._rows)  # Decision tree using where2
 tbl = table(tt)
 headerLabel = '=klass'
 Rows = []
 for k, _ in leaves(tree):  # for k, _ in leaves(tree):
  for j in k.val:
   tmp = (j.cells)
   tmp.append('_' + str(id(k) % 1000))
   j.__dict__.update({'cells': tmp})
   Rows.append(j.cells)
 tbl3 = newTable(tbl, headerLabel, Rows)
 temp = []

 def sort(lst):
  return [i[0] for i in sorted(enumerate(lst), key = lambda x:x[1])], \
         [i[1] for i in sorted(enumerate(lst), key = lambda x:x[1])]

 def thresh(val1, val2):
  indx, sorted = sort()
 def isdefective(case, test = False):
  if not test:
   return 'Defect' if case.cells[-2] > 0 else 'No Defect'
  else:
   bugs = [r.cells[-2] for r in case.rows];
   meanBugs = np.mean(bugs);
   medianBugs = np.median(bugs);
   rangeBugs = (sorted(bugs)[0] + sorted(bugs)[-1]) / 2;
   temp.append(meanBugs);
   return 'Defect' if meanBugs > 1.5 else 'No Defect'

 testCase = tbl3._rows
 # print testCase

 testDefective = []
 defectivClust = []

 t = discreteNums(tbl2, map(lambda x: x.cells, tbl2._rows))
 myTree = tdiv(t, opt = dtree)
 # showTdiv(myTree)

 testCase = tbl3._rows
#   # print testCase

 for tC in testCase:
  loc = drop(tC, myTree)
  # if len(loc.kids)==0:
  testDefective.append(isdefective(tC))
  defectivClust.append(isdefective(loc, test = True))
 #
 saveImg(temp, 10)

#   contrastSet = getContrastSet(loc, myTree)
#   print 'Contrast Set:', contrastSet
 return [testDefective, defectivClust]