def train(self, train_set, label, N_feat = 5, N_tree = 50, max_depth = 10):
        """ Trains the forest, and stores trees for prediction

        Arguments:
            train_set   - pandas.DataFrame containing the training data

            label       - Name of the label column in the DataFrame

            N_feat      - Number of features to consider for each trees
                          Default = 5

            N_tree      - Number of trees to use in the forest
                          Default = 50

            max_depth   - Maximum depth of trees to create
                          Default = 10        
        """

        self.trees = []
        self.N_feat = N_feat
        self.N_tree = N_tree
        for i in range(N_tree):
            t_set = train_set.copy()
            t_set = reduce_features(self.N_feat, t_set, label)
            self.trees.append(dt.get_tree(t_set, label, max_depth))
Esempio n. 2
0
def LoadAndTreeEval():
    """ Load the data and create a decision tree
    
    Returns:
        DTree generated for the data
    """

    train, test, tune = l.CreateDataFrames('data/Build229Data.txt', 'data/FeatureNames.txt')
    tree = dt.get_tree(train, 'Winner', 10)
    p.dump(tree, open('classifiers/dtree03.p','wb'))
    pred = tree.predict(test)
    true = test['Winner'].values
    print an.accuracy(pred, true)
    print an.f1_score(pred, true)

    return tree
Esempio n. 3
0
    def train(self, train_set, label, M = 10):
        """ Trains the bagger and stores the resulting trees

        Arguments:
            train_set   - pandas.DataFrame containing the training data

            label       - Name of the label column in the DataFrame

            M           - Number of trees to generate
                          Default = 10
        """

        N = train_set.shape[0]
        self.M = M
        self.trees = []

        for i in range(M):
            idxs = an.bootstrap(range(N))
            b_set = train_set.ix[idxs]
            b_set.index = np.arange(N)
            self.trees.append(dt.get_tree(b_set, label))