def haupt(): train, test = explore('./Data') clf = RandomForestClassifier(n_estimators=100, n_jobs=2) """ Cluster using FASTMAP """ # Training data train_DF = createDF(train[1]) # Testing data test_df = createDF(test[1]) # set_trace() features = train_DF.columns[3:-2] klass = train_DF[train_DF.columns[-1]] clf.fit(train_DF[features], klass) preds = clf.predict(test_df[features]).tolist() # print preds # def isdefective(data): label = set(train_DF.columns[-1]) _id = list(set(train_DF[train_DF.columns[-1]])) dfct = {lbl: str(np.mean(list(train_DF[train_DF['klass'] == lbl]['$<bug'])) >= 0.3) \ for lbl in _id} # print label predictions = [dfct[i] for i in preds] actuals = [str(not i == 0) for i in test_df[test_df.columns[-2]].tolist()] return _runAbcd(train=actuals, test=predictions, verbose=False)
def f1(rows): indep = rows[1:-1]; case = 0 # set_trace() whereParm, tree = update(indep) [test, train] = tdivPrec(where = None, dtree = tree, train = trainDat[1], test = testDat[1]); g = _runAbcd(train = train, test = test, verbose = False) return g
def haupt(): train, test = explore('./Data') clf = RandomForestClassifier(n_estimators = 100, n_jobs = 2) """ Cluster using FASTMAP """ # Training data train_DF = createDF(train[1]) # Testing data test_df = createDF(test[1]) # set_trace() features = train_DF.columns[3:-2] klass = train_DF[train_DF.columns[-1]]; clf.fit(train_DF[features], klass) preds = clf.predict(test_df[features]).tolist() # print preds # def isdefective(data): label = set(train_DF.columns[-1]); _id = list(set(train_DF[train_DF.columns[-1]])) dfct = {lbl: str(np.mean(list(train_DF[train_DF['klass'] == lbl]['$<bug'])) >= 0.3) \ for lbl in _id} # print label predictions = [dfct[i] for i in preds] actuals = [str(not i == 0) for i in test_df[test_df.columns[-2]].tolist()] return _runAbcd(train = actuals, test = predictions, verbose = False)
def main(dir = None): whereParm, tree = None, None # _de() G = []; G1 = []; reps = 1; trainDat, testDat = explore(dir = 'Data/') for _ in xrange(reps): print reps [test, train] = tdivPrec(whereParm, tree, train = trainDat[1], test = testDat[0]); g = _runAbcd(train = train, test = test, verbose = False) G.append(g) G.insert(0, 'DT ') for _ in xrange(reps): print reps [test, train] = tdivPrec1(whereParm, tree, train = trainDat[1], test = testDat[0]); g = _runAbcd(train = train, test = test, verbose = False) G1.append(g) G1.insert(0, 'C4.5') return [G, G1]