def fit(self, X, y): self.trees = [] for m in range(self.num_trees): print("Fitting tree %02d/%d..." % (m+1,self.num_trees)) tree = RandomTree(max_depth = self.max_depth) tree.fit(X,y) self.trees.append(tree)
def fit(self, X, y): # Fit each tree treeList = [] for i in range(self.num_trees): tree = RandomTree(self.max_depth) tree.fit(X, y) treeList.append(tree) self.treeList = treeList
def fit(self, X, y): self.random_trees = [] for i in range(self.num_trees): random_tree = RandomTree(max_depth=self.max_depth) random_tree.fit(X, y) self.random_trees.append(random_tree)
def fit(self, X, y): N, D = X.shape trees = [None] * self.num_trees for i in range(self.num_trees): model = RandomTree(max_depth=self.max_depth) model.fit(X, y) trees[i] = model self.trees = trees
def main(): """ Function creates new tree with all files and directories. """ tree = RandomTree(3, 3) for item in recognize_type(tree.get()): create(*item) print(repr(tree))
def fit(self, X, y): # Train data on each tree # initialize a forest self.random_forest = [] for tree in range(self.num_trees): # Bootstrapping and Random Trees Step one_tree = RandomTree(max_depth=self.max_depth) one_tree.fit(X, y) self.random_forest.append(one_tree)
def fit(self, X, y): numTrees = self.num_trees list_of_trees = [] for x in range(numTrees): tree = RandomTree(max_depth=self.max_depth) tree.fit(X, y) list_of_trees.append(tree) self.list_of_trees = list_of_trees
def fit(self, X, y): listOfModels = [] for i in range(0, self.num_trees): model = RandomTree(max_depth=np.inf) model.fit(X, y) listOfModels.append(model) self.LOM = listOfModels self.y_length = y.shape[0]
def fit(self, X, y): self.trees = [] num_trees = self.num_trees max_depth = self.max_depth for m in range(num_trees): tree = RandomTree(max_depth=max_depth) tree.fit(X, y) self.trees.append(tree)
def load(self, filename): file = open(filename, "r") jsonModel = file.read() model = json.loads(jsonModel)["model"] self.trees = [] for i in range(len(model)): tree = RandomTree(max_depth=self.max_depth) tree.load(model[i]) self.trees.append(tree) file.close()
def fit(self, X, y): forest = [] for n in range(self.num_trees): model = RandomTree( max_depth = self.max_depth) model.fit(X,y) forest.append(model) self.forest = forest
def fit(self, X, y): self.X = X self.y = y self.stats = [] for i in range(self.num_trees): self.stats = np.append(self.stats, RandomTree(self.max_depth)) self.stats[i].fit(X, y)
def fit(self, X, y): rt = [] M = self.num_trees for n in range(M): #rt.append(RandomTree(self.max_depth)) rt.append(RandomTree.fit(self, X, y)) self.rt = rt
def __init__(self, num_trees, max_depth): self.num_trees = num_trees self.max_depth = max_depth # Create array of size num_trees with RandomTree models self.rand_trees = [] for i in range(self.num_trees): self.rand_trees.append(RandomTree(max_depth=self.max_depth))
def fit(self, X, Y): n, d = X.shape self.trees = [] for i in range(self.n_trees): idx = np.arange(n) np.random.seed(np.int(time() / 150)) np.random.shuffle(idx) X = X[idx] Y = Y[idx] train = np.int(self.ratio_per_tree * n) Xtrain = X[:train, :] Ytrain = Y[:train] clf = RandomTree(max_depth=self.max_depth, ratio_features=self.ratio_features) clf.fit(Xtrain, Ytrain) self.trees.append(clf)
def gen_random_forest(dataset, attributes, ntrees, nattributes, depth_limit=None): """ :param dataset: the dataset dataframe :param attributes: the attributes dict :param ntrees: int, number of trees :param nattributes: int, number of attributes of each tree (aka. "m") Algorithm in slide 32 class 16 """ return [ RandomTree(*select_attributes(bootstrap(dataset), attributes, nattributes), depth_limit=depth_limit) for _ in range(0, ntrees) ]
if question == '5': dataset = utils.load_dataset('vowel') X = dataset['X'] y = dataset['y'] X_test = dataset['Xtest'] y_test = dataset['ytest'] print("n = %d, d = %d" % X.shape) def evaluate_model(model): model.fit(X, y) y_pred = model.predict(X) tr_error = np.mean(y_pred != y) t = time.time() y_pred = model.predict(X_test) te_error = np.mean(y_pred != y_test) print("Training error: %.3f" % tr_error) print("Testing error: %.3f" % te_error) print("Time taken: %f" % (time.time() - t)) evaluate_model( DecisionTree(max_depth=50, stump_class=DecisionStumpInfoGain)) evaluate_model(RandomTree(max_depth=50)) evaluate_model( RandomForestClassifier(n_estimators=50, max_depth=50, criterion='entropy', random_state=1)) evaluate_model(RandomForest(num_trees=50, max_depth=np.inf))
def fit(self, X, y): self.Random_Forest = [] for x in range(0, self.num_trees): New_tree = RandomTree(self.max_depth) New_tree.fit(X, y) self.Random_Forest.append(New_tree)
def __init__(self, max_depth, num_trees): RandomTree.__init__(self, max_depth) self.num_trees = num_trees
model.fit(X,y) y_pred = model.predict(X) tr_error = np.mean(y_pred != y) y_pred = model.predict(X_test) te_error = np.mean(y_pred != y_test) print(" Training error: %.3f" % tr_error) print(" Testing error: %.3f" % te_error) print("Our implementations:") print(" Decision tree info gain") evaluate_model(DecisionTree(max_depth=np.inf, stump_class=DecisionStumpInfoGain)) print(" Random tree info gain") evaluate_model(RandomTree(max_depth=np.inf)) print(" Random forest info gain") evaluate_model(RandomForest(max_depth=np.inf, num_trees=50)) print("sklearn implementations") print(" Decision tree info gain") evaluate_model(DecisionTreeClassifier(criterion="entropy")) print(" Random forest info gain") evaluate_model(RandomForestClassifier(criterion="entropy")) print(" Random forest info gain, more trees") evaluate_model(RandomForestClassifier(criterion="entropy", n_estimators=50)) elif question == '3': X = load_dataset('clusterData.pkl')['X']
def fit(self, X, y): for i in range(self.num_trees): model = RandomTree(max_depth=self.max_depth) model.fit(X, y) self.models.append(model)
mode_parser = parser() if str(mode_parser.mode) == 'verify': # benchmark m = [] with open('datasets/benchmark/benchmark.csv') as bfile: breader = csv.reader(bfile, delimiter=';') for row in breader: m.append(row) attributes = {x: ['categorical'] for x in m[0][:-1]} attributes_names = m[0][:-1] del m[0] dataset = pd.DataFrame(m, columns=attributes_names + ['y']) for x in attributes.keys(): attributes[x].append(dataset[x].unique()) RT = RandomTree(dataset, attributes) RT.print_tree() sys.exit() elif str(mode_parser.mode) == 'wine': dataset, attributes = read_dataset('wine') elif str(mode_parser.mode) == 'survival': dataset, attributes = read_dataset('survival') elif str(mode_parser.mode) == 'cancer': dataset, attributes = read_dataset('cancer') elif str(mode_parser.mode) == 'contraceptive': dataset, attributes = read_dataset('contraceptive') """
def __init__(self, max_depth, num_trees): self.num_trees = num_trees RandomTree.__init__(self, max_depth=max_depth) DecisionTree.__init__(self, max_depth=max_depth, stump_class=RandomStumpInfoGain)