Exemple #1
0
def cross(file='data/housingD.csv',rseed=1):
  def klass(test):
    return test.cells[train.klass[0].col]
  seed(rseed)
  tbl = discreteTable(file)
  n=0
  abcd=Abcd()
  nLeaves=Num()
  nNodes=Num()
  for tests, train in xval(tbl):
     tree = tdiv(train)
     for node in dtnodes(tree):
       print node.branch
     nLeaves + len([n for n in dtleaves(tree)])
     nNodes +  len([n for n in dtnodes(tree)])
     for test in tests:
       want = klass(test)
       got  = classify(test,tree)
       abcd(want,got)
     exit()
  nl()
  abcd.header()
  abcd.report()
  print ":nodes",sorted(nNodes.some.all())
  print ":leaves",sorted(nLeaves.some.all())
Exemple #2
0
def cross(file='data/housingD.csv', rseed=1):
    def klass(test):
        return test.cells[train.klass[0].col]

    seed(rseed)
    tbl = discreteTable(file)
    n = 0
    abcd = Abcd()
    nLeaves = Num()
    nNodes = Num()
    for tests, train in xval(tbl):
        tree = tdiv(train)
        for node in dtnodes(tree):
            print node.branch
        nLeaves + len([n for n in dtleaves(tree)])
        nNodes + len([n for n in dtnodes(tree)])
        for test in tests:
            want = klass(test)
            got = classify(test, tree)
            abcd(want, got)
        exit()
    nl()
    abcd.header()
    abcd.report()
    print ":nodes", sorted(nNodes.some.all())
    print ":leaves", sorted(nLeaves.some.all())
def _Abcd(testleaf, train=[], test=[]):
    abcd = Abcd(db='Traing', rx='Testing')

    def isDef(x):
        return "Defective" if x > 0.5 else "Non-Defective"

    for leaf in testleaf:
        try:
            test += [isDef(leaf.score)]
        except Exception, e:  # go to middle points
            # give the median of all rows in this point
            # test += [isDef(leafscore(leaf))]
            continue
Exemple #4
0
def learns(tests,trains,indep=lambda x: x[:-1],
                    dep = lambda x: x[-1],
                    rf  = Abcd(),
                    lg  = Abcd(),
                    dt  = Abcd(),
                    nb  = Abcd()):
  x1,y1,x2,y2= trainTest(tests,trains,indep,dep) 
  forest = RandomForestClassifier(n_estimators = 50)  
  forest = forest.fit(x1,y1)
  for n,got in enumerate(forest.predict(x2)):
    rf(predicted = got, actual = y2[n])
  logreg = linear_model.LogisticRegression(C=1e5)
  logreg.fit(x1, y1)
  for n,got in enumerate(logreg.predict(x2)):
    lg(predicted = got, actual = y2[n])
  bayes =  GaussianNB()
  bayes.fit(x1,y1)
  for n,got in enumerate(bayes.predict(x2)):
    nb(predicted = got, actual = y2[n])
  dectree = DecisionTreeClassifier(criterion="entropy",
                         random_state=1)
  dectree.fit(x1,y1)
  for n,got in enumerate(dectree.predict(x2)):
    dt(predicted = got, actual = y2[n])
Exemple #5
0
def _Abcd(predicted, actual):
  predicted_txt = []
  abcd = Abcd(db='Traing', rx='Testing')

  def isDef(x):
    return "Defective" if x > 0 else "Non-Defective"
  for data in predicted:
    predicted_txt +=[isDef(data)]
  for act, pre in zip(actual, predicted_txt):
    abcd.tell(act, pre)
  abcd.header()
  score = abcd.ask()
  # pdb.set_trace()
  return score
Exemple #6
0
def _Abcd(testleaf, testdata, train):
  # train=[]
  test = []
  abcd = Abcd(db='Traing', rx='Testing')

  def isDef(x):
    return "Defective" if x > The.option.threshold else "Non-Defective"
  for leaf, data in zip(testleaf, testdata):
    try:
      test += [isDef(leaf.score)]
      # test +=[isDef(majorityscore(data,leaf))]
    except Exception:  # go to middle points
      # give the median of all rows in this point
      # pdb.set_trace()
      test += [isDef(leafscore(leaf))]
      continue
  for actual, predicted in zip(train, test):
    abcd.tell(actual, predicted)
  abcd.header()
  score = abcd.ask()
  return score
Exemple #7
0
def _Abcd(testleaf, testdata, train):
  # train=[]
  test = []
  abcd = Abcd(db='Traing', rx='Testing')

  def isDef(x):
    return "Defective" if x > The.option.threshold else "Non-Defective"
  for leaf, data in zip(testleaf, testdata):
    try:
      test += [isDef(leaf.score)]
      # test +=[isDef(majorityscore(data,leaf))]
    except Exception:  # go to middle points
      # give the median of all rows in this point
      # pdb.set_trace()
      test += [isDef(leafscore(leaf))]
      continue
  for actual, predicted in zip(train, test):
    abcd.tell(actual, predicted)
  abcd.header()
  score = abcd.ask()
  return score
Exemple #8
0
def snl(file='data/poi-1.5D.csv', rseed=1, w=dict(_1=0, _0=1)):
    def klass(x):
        return x.cells[train.klass[0].col]

    def val((x, y)):
        return y if x == ninf else x

    seed(rseed)
    nl()
    print "#", file
    tbl = discreteTable(file)
    tree0 = tdiv(tbl)
    showTdiv(tree0)
    nl()
    old, better, worse = Sym(), Sym(), Sym()
    abcd1, abcd2 = Abcd(db=file, rx="where"), Abcd(db=file, rx="ranfor")
    abcd3 = Abcd(db=file, rx="logref")
    abcd4 = Abcd(db=file, rx="dt")
    abcd5 = Abcd(db=file, rx="nb")
    for tests, train in xval(tbl):
        learns(tests,
               train._rows,
               indep=lambda row: map(val, row.cells[:-2]),
               dep=lambda row: row.cells[-1],
               rf=abcd2,
               lg=abcd3,
               dt=abcd4,
               nb=abcd5),
        tree = tdiv(train)
        snakesAndLadders(tree, train, w)
        for test in tests:
            abcd1(actual=klass(test), predicted=classify(test, tree))
            a, b = improve(test, tree)
            old + a
            better + b
            _, c = degrade(test, tree)
            worse + c
    print "\n:asIs", old.counts
    print ":plan", better.counts
    print ":warn", worse.counts
    abcd1.header()
    abcd1.report()
    abcd2.report()
    abcd3.report()
    abcd4.report()
    abcd5.report()
Exemple #9
0
def snl(file='data/poi-1.5D.csv',rseed=1,w=dict(_1=0,_0=1)):  
  def klass(x): return x.cells[train.klass[0].col]
  def val((x,y)):
    return y if x == ninf else x
  seed(rseed)
  nl(); print "#",file
  tbl = discreteTable(file)
  tree0 = tdiv(tbl)
  showTdiv(tree0); nl()
  old, better, worse = Sym(), Sym(), Sym()
  abcd1, abcd2  = Abcd(db=file,rx="where"), Abcd(db=file,rx="ranfor")
  abcd3 = Abcd(db=file, rx="logref")
  abcd4 = Abcd(db=file, rx="dt")
  abcd5 = Abcd(db=file, rx="nb")
  for tests, train in xval(tbl):
     learns(tests,train._rows,
            indep=lambda row: map(val,row.cells[:-2]),
            dep = lambda row: row.cells[-1],
            rf  = abcd2,
            lg  = abcd3,
            dt  = abcd4,
            nb  = abcd5),
     tree = tdiv(train)
     snakesAndLadders(tree,train,w)
     for test in tests:
       abcd1(actual    = klass(test),
            predicted = classify(test,tree))
       a,b  = improve(test,tree); old + a; better + b
       _,c  = degrade(test,tree);          worse  + c
  print "\n:asIs",old.counts
  print ":plan",better.counts
  print ":warn",worse.counts
  abcd1.header()
  abcd1.report()
  abcd2.report()
  abcd3.report()
  abcd4.report()
  abcd5.report()