def train_svm_forest(X, Y, num_trees = 10, max_depth = 3, bagging_percent=0.65, randomize_C = False, model_args ={}, tree_args={}): """A random forest whose base classifier is a SVM-Tree (rather than splitting individual features we project each point onto a hyperplane) Parameters ---------- X : numpy array containing input data. Should have samples for rows and features for columns. Y : numpy array containing class labels for each sample num_trees : how big is the forest? bagging_percent : what subset of the data is each tree trained on? randomize_C : bool model_args : parameters for each SVM classifier tree_args : parameters for each tree of classifiers """ tree = mk_svm_tree(max_depth, randomize_C, model_args, tree_args) forest = ClassifierEnsemble( base_model = tree, num_models = num_trees, bagging_percent = bagging_percent) forest.fit(X,Y) return forest
def train_sgd_forest(X, Y, num_trees = 20, max_depth = 3, bagging_percent=0.65, randomize_alpha=False, model_args = {}, tree_args= {}): """A random forest whose base classifier is a tree of SGD classifiers Parameters ---------- X : numpy array containing input data. Should have samples for rows and features for columns. Y : numpy array containing class labels for each sample num_trees : how big is the forest? bagging_percent : what subset of the data is each tree trained on? randomize_alpha : bool model_args : parameters for each SGD classifier tree_args : parameters for each tree """ bagsize = bagging_percent * X.shape[0] tree = mk_sgd_tree(bagsize, max_depth, randomize_alpha, model_args, tree_args) forest = ClassifierEnsemble( base_model = tree, num_models = num_trees, bagging_percent = bagging_percent) forest.fit(X,Y) return forest
def test_stacked_random_forest(): t = RandomizedTree(min_leaf_size=1) lr = LogisticRegression() ensemble = ClassifierEnsemble(base_model=t, stacking_model=lr) ensemble.fit(data, labels) try_predictor(ensemble)