def trainAdaBoost(x, y, num_of_trees): rows, _ = np.shape(x) class_names = np.unique(y) class_names = np.flip(class_names) #Weights for each class in each 'parallell' tree list_of_ensembles = [ ] # Three ensembles for each class_name poor, median, excellent for i in range(len(class_names)): y_binary = convertOneVsAllToBinary(y, class_names[i]) ensemble = [] weights = np.ones(shape=( rows, 1, ), dtype=float) for _ in range(num_of_trees): stump = Tree(x, y_binary, 1, weights) stump.train(x, y_binary, 'boosting', weights) y_pred = testTree(x, stump) y_pred = y_pred.astype(int) epsilon = calculateEpsilon(y_binary, y_pred, weights) alpha = calculateAlpha(epsilon) ensemble.append(( alpha, stump, )) #reweights weights = reweight(y_binary, y_pred, weights) # should make three ensembles list_of_ensembles.append(ensemble) return list_of_ensembles
def trainTree(x, y, depth, weights=None): # return tree that have been trained # Initiate tree object dTree = Tree(x, y, depth) # Need to create the tree dTree.train(x, y, 'boosting', weights) # Train it bruh # TBD return dTree
def trainBaggingEnsemble(x, y, depth, num_of_trees): percentage = 3 / 4 ensemble = [] rows, _ = np.shape(x) row_indexes = np.arange(rows) num_of_training_data = math.ceil(rows * percentage) for _ in range(num_of_trees): # I want to only use 75% of the data when training # Randomize which data to pull 75% out of x for each tree np.random.shuffle(row_indexes) x = x[row_indexes, :] y = y[row_indexes, :] training_x = x[0:num_of_training_data, :] training_y = y[0:num_of_training_data, :] dTree = Tree(training_x, training_y, depth) dTree.train(training_x, training_y, 'bagging') ensemble.append(dTree) return ensemble