def train(self, train_set, label, N_feat = 5, N_tree = 50, max_depth = 10): """ Trains the forest, and stores trees for prediction Arguments: train_set - pandas.DataFrame containing the training data label - Name of the label column in the DataFrame N_feat - Number of features to consider for each trees Default = 5 N_tree - Number of trees to use in the forest Default = 50 max_depth - Maximum depth of trees to create Default = 10 """ self.trees = [] self.N_feat = N_feat self.N_tree = N_tree for i in range(N_tree): t_set = train_set.copy() t_set = reduce_features(self.N_feat, t_set, label) self.trees.append(dt.get_tree(t_set, label, max_depth))
def LoadAndTreeEval(): """ Load the data and create a decision tree Returns: DTree generated for the data """ train, test, tune = l.CreateDataFrames('data/Build229Data.txt', 'data/FeatureNames.txt') tree = dt.get_tree(train, 'Winner', 10) p.dump(tree, open('classifiers/dtree03.p','wb')) pred = tree.predict(test) true = test['Winner'].values print an.accuracy(pred, true) print an.f1_score(pred, true) return tree
def train(self, train_set, label, M = 10): """ Trains the bagger and stores the resulting trees Arguments: train_set - pandas.DataFrame containing the training data label - Name of the label column in the DataFrame M - Number of trees to generate Default = 10 """ N = train_set.shape[0] self.M = M self.trees = [] for i in range(M): idxs = an.bootstrap(range(N)) b_set = train_set.ix[idxs] b_set.index = np.arange(N) self.trees.append(dt.get_tree(b_set, label))