def __init__(self, n_hidden, n_iterations=3000, learning_rate=0.001): self.n_hidden = n_hidden self.n_iterations = n_iterations self.learning_rate = learning_rate self.hidden_activation = Sigmoid() self.output_activation = Softmax() self.loss = CrossEntropy()
def __init__(self, n_estimators, learning_rate, min_samples_split, min_impurity, max_depth, regression): self.n_estimators = n_estimators self.learning_rate = learning_rate self.min_samples_split = min_samples_split self.min_impurity = min_impurity self.max_depth = max_depth self.regression = regression self.bar = progressbar.ProgressBar(widgets=bar_widgets) #sqrare loss for regression # log loss for classification self.loss = SquareLoss() if not self.regression: self.loss = CrossEntropy() #initialize regression trees self.trees = [] for _ in range(n_estimators): tree = RegressionTree( min__samples_split = self.min_samples_split, min_impurity=min_impurity, max_depth=self.max_depth ) self.trees.append(tree)
class GradientBoosting(object): """docstring for GradientBoosting""" def __init__(self, n_estimators, learning_rate, min_samples_split, min_impurity, max_depth, regression): super(GradientBoosting, self).__init__() self.n_estimators = n_estimators self.learning_rate = learning_rate self.min_samples_split = min_samples_split self.min_impurity = min_impurity self.max_depth = max_depth self.regression = regression self.bar = progressbar.ProgressBar(widgets=bar_widgets) # Square loss for regression # Log loss for classification self.loss = SquareLoss() if not self.regression: self.loss = CrossEntropy() # Initailize the trees self.trees = [] for _ in range(n_estimators): tree = RegressionTree(min_samples_split=self.min_samples_split, min_impurity=min_impurity, max_depth=self.max_depth) self.trees.append(tree) def fit(self, X, y): # r_im is also the y_pred for the current tree r_im = np.full(np.shape(y), np.mean(y, axis=0)) for i in self.bar(range(self.n_estimators)): gradient = self.loss.gradient(y, r_im) self.trees[i].fit(X, gradient) update = self.trees[i].predict(X) # update the residual r_im -= np.multiply(self.learning_rate, update) def predict(self, X): y_pred = np.array([]) for tree in self.trees: update = tree.predict(X) update = np.multiply(self.learning_rate, update) y_pred = -update if not y_pred.any() else y_pred - update if not self.regression: y_pred = np.exp(y_pred) / np.expand_dims( np.sum(np.exp(y_pred), axis=1), axis=1) y_pred = np.argmax(y_pred, axis=1) return y_pred
class GradientBoosting(object): """ """ def __init__(self, n_estimators, learning_rate, min_samples_split, min_impurity, max_depth, regression): self.n_estimators = n_estimators self.learning_rate = learning_rate self.min_samples_split = min_samples_split self.min_impurity = min_impurity self.max_depth = max_depth self.regression = regression self.bar = progressbar.ProgressBar(widgets=bar_widgets) self.n_classes = None if self.regression: self.train_loss = SquareLoss() else: self.train_loss = CrossEntropy() self.trees = [] for _ in range(self.n_estimators): tree = RegressionTree(min_samples_split=self.min_samples_split, min_impurity=self.min_impurity, max_depth=self.max_depth) self.trees.append(tree) def fit(self, X, y): X = np.array(X) y = y.reshape(len(y), -1) y_pred = np.full(y.shape, np.mean(y, axis=0)) for tree in self.bar(self.trees): grad = self.train_loss.backward(y, y_pred) tree.fit(X, grad) update = tree.predict(X) y_pred = y_pred - self.learning_rate * update.reshape( len(update), -1) def predict(self, X_test): X_test = np.array(X_test) y_pred = np.array([]) for tree in self.trees: update = tree.predict(X_test) update = self.learning_rate * update.reshape(len(update), -1) y_pred = -update if not y_pred.any() else y_pred - update if not self.regression: y_pred = softmax(y_pred) y_pred = np.argmax(y_pred, axis=1) return y_pred
def __init__(self, n_estimators, learning_rate, min_samples_split, min_impurity, max_depth, regression): self.n_estimators = n_estimators self.learning_rate = learning_rate self.min_samples_split = min_samples_split self.min_impurity = min_impurity self.max_depth = max_depth self.regression = regression self.bar = progressbar.ProgressBar(widgets=bar_widgets) self.n_classes = None if self.regression: self.train_loss = SquareLoss() else: self.train_loss = CrossEntropy() self.trees = [] for _ in range(self.n_estimators): tree = RegressionTree(min_samples_split=self.min_samples_split, min_impurity=self.min_impurity, max_depth=self.max_depth) self.trees.append(tree)
class MultilayerPerceptron(): """Multilayer Perceptron classifier. A fully-connected neural network with one hidden layer. Unrolled to display the whole forward and backward pass. Parameters: ----------- n_hidden: int: The number of processing nodes (neurons) in the hidden layer. n_iterations: float The number of training iterations the algorithm will tune the weights for. learning_rate: float The step length that will be used when updating the weights. """ def __init__(self, n_hidden, n_iterations=3000, learning_rate=0.001): self.n_hidden = n_hidden self.n_iterations = n_iterations self.learning_rate = learning_rate self.hidden_activation = Sigmoid() self.output_activation = Softmax() self.loss = CrossEntropy() def _initialize_weights(self, X, y): n_samples, n_features = X.shape _, n_outputs = y.shape #hidden layer limit = 1 / math.sqrt(n_features) self.W = np.random.uniform(-limit, limit, (n_features, self.n_hidden)) self.w0 = np.zeros((1, self.n_hidden)) #output layer limit = 1 / math.sqrt(self.n_hidden) self.V = np.random.uniform(-limit, limit, (self.n_hidden, n_outputs)) self.v0 = np.zeros((1, n_outputs)) def fit(self, X, y): self._initialize_weights(X, y) for i in range(self.n_iterations): # ............ # Forward Pass # ............ # hidden layer hidden_input = X.dot(self.W) + self.w0 hidden_output = self.hidden_activation(hidden_input) #ouput layer output_layer_input = hidden_output.dot(self.V) + self.v0 y_pred = self.output_activation(output_layer_input) #.............. #Backward Pass #.............. #ouput layer #Grad w.r.t input of output layer grad_wrt_out_l_input = self.loss.gradient( y, y_pred) * self.output_activation.gradient(output_layer_input) grad_v = hidden_output.T.dot(grad_wrt_out_l_input) grad_v0 = np.sum(grad_wrt_out_l_input, axis=0, keepdims=True) #hidden layer #Grad w.r.t input of hidden layer grad_wrt_hidden_l_input = grad_wrt_out_l_input.dot( self.V.T) * self.hidden_activation.gradient(hidden_input) grad_w = X.T.dot(grad_wrt_hidden_l_input) grad_w0 = np.sum(grad_wrt_hidden_l_input, axis=0, keepdims=True) # update weights (by gradient descent) # move against the gradient to minimize loss self.V -= self.learning_rate * grad_v self.v0 -= self.learning_rate * grad_v0 self.W -= self.learning_rate * grad_w self.w0 -= self.learning_rate * grad_w0 # use the trained model to predict labels of X def predict(self, X): # forward pass hidden_input = X.dot(self.W) + self.w0 hidden_output = self.hidden_activation(hidden_input) output_layer_input = hidden_output.dot(self.V) + self.v0 y_pred = self.output_activation(output_layer_input) return y_pred
class GradientBoosting(object): """ super class of gradientboostingclassifier and gradientboostingregressor. use a collections of regression trees that trains on predicting the gradient of the loss function. parameters: -------- n_estimators:int the number of classification trees that are used. learning_rate:float the step length that will be taken when following the negative gradient during training. min_samples_split:int the minimum number of samples needed to make a split when building atree. min_impurity:float the minimum impurity required to split the tree further. max_depth:int the maximum depth of a tree regression:boolean true or false depending on if we are doing regression or classification. """ def __init__(self, n_estimators, learning_rate, min_samples_split, min_impurity, max_depth, regression): self.n_estimators = n_estimators self.learning_rate = learning_rate self.min_samples_split = min_samples_split self.min_impurity = min_impurity self.max_depth = max_depth self.regression = regression self.bar = progressbar.ProgressBar(widgets=bar_widgets) #sqrare loss for regression # log loss for classification self.loss = SquareLoss() if not self.regression: self.loss = CrossEntropy() #initialize regression trees self.trees = [] for _ in range(n_estimators): tree = RegressionTree( min__samples_split = self.min_samples_split, min_impurity=min_impurity, max_depth=self.max_depth ) self.trees.append(tree) def fit(self, X, y): y_pred = np.full(np.shape(y), np.mean(y, axis=0)) for i in self.bar(range(self.n_estimators)): gradient = self.loss.gradient(y, y_pred) self.trees[i].fit(X, gradient) update = self.trees[i].predict(X) #update y prediction y_pred -= np.multiply(self.learning_rate, update) def predict(self, X): y_pred = np.array([]) #make prediction for tree in self.trees: update = tree.predict(X) update = np.multiply(self.learning_rate, update) y_pred = -update if not y_pred.any() else y_pred - update if not self.regression: #turn into probability distribution y_pred = np.exp(y_pred)/np.expand_dims(np.sum(np.exp(y_pred), axis=1),axis=1) #set label to the value that maximizes probability y_pred = np.argmax(y_pred, axis=1) return y_pred