예제 #1
0
def haupt():
    train, test = explore('./Data')
    clf = RandomForestClassifier(n_estimators=100, n_jobs=2)
    """
  Cluster using FASTMAP
  """
    # Training data
    train_DF = createDF(train[1])

    # Testing data
    test_df = createDF(test[1])
    # set_trace()
    features = train_DF.columns[3:-2]
    klass = train_DF[train_DF.columns[-1]]
    clf.fit(train_DF[features], klass)
    preds = clf.predict(test_df[features]).tolist()
    # print preds

    #   def isdefective(data):
    label = set(train_DF.columns[-1])
    _id = list(set(train_DF[train_DF.columns[-1]]))
    dfct = {lbl: str(np.mean(list(train_DF[train_DF['klass'] == lbl]['$<bug'])) >= 0.3) \
               for lbl in _id}
    # print label
    predictions = [dfct[i] for i in preds]
    actuals = [str(not i == 0) for i in test_df[test_df.columns[-2]].tolist()]
    return _runAbcd(train=actuals, test=predictions, verbose=False)
예제 #2
0
 def f1(rows):
  indep = rows[1:-1]; case = 0
  # set_trace()
  whereParm, tree = update(indep)
  [test, train] = tdivPrec(where = None, dtree = tree, train = trainDat[1], test = testDat[1]);
  g = _runAbcd(train = train, test = test, verbose = False)
  return g
예제 #3
0
def haupt():
  train, test = explore('./Data')
  clf = RandomForestClassifier(n_estimators = 100, n_jobs = 2)
  """
  Cluster using FASTMAP
  """
  # Training data
  train_DF = createDF(train[1])

  # Testing data
  test_df = createDF(test[1])
  # set_trace()
  features = train_DF.columns[3:-2]
  klass = train_DF[train_DF.columns[-1]];
  clf.fit(train_DF[features], klass)
  preds = clf.predict(test_df[features]).tolist()
  # print preds

#   def isdefective(data):
  label = set(train_DF.columns[-1]);
  _id = list(set(train_DF[train_DF.columns[-1]]))
  dfct = {lbl: str(np.mean(list(train_DF[train_DF['klass'] == lbl]['$<bug'])) >= 0.3) \
             for lbl in _id}
    # print label
  predictions = [dfct[i] for i in preds]
  actuals = [str(not i == 0) for i in test_df[test_df.columns[-2]].tolist()]
  return _runAbcd(train = actuals, test = predictions, verbose = False)
예제 #4
0
def main(dir = None):
  whereParm, tree = None, None  # _de()
  G = []; G1 = []; reps = 1;
  trainDat, testDat = explore(dir = 'Data/')
  for _ in xrange(reps):
    print reps
    [test, train] = tdivPrec(whereParm, tree, train = trainDat[1], test = testDat[0]);
    g = _runAbcd(train = train, test = test, verbose = False)
    G.append(g)
  G.insert(0, 'DT  ')

  for _ in xrange(reps):
    print reps
    [test, train] = tdivPrec1(whereParm, tree, train = trainDat[1], test = testDat[0]);
    g = _runAbcd(train = train, test = test, verbose = False)
    G1.append(g)
  G1.insert(0, 'C4.5')
  return [G, G1]