def __init__(self, n_hidden, n_iterations=3000, learning_rate=0.01): self.n_hidden = n_hidden self.n_iterations = n_iterations self.learning_rate = learning_rate self.hidden_activation = Sigmoid() self.output_activation = Softmax() self.loss = CrossEntropy()
def __init__(self, n_hidden, n_iterations=3000, learning_rate=0.01): self.n_hidden = n_hidden self.n_iterations = n_iterations self.learning_rate = learning_rate self.hidden_activation = Sigmoid() self.output_activation = Softmax() self.loss = CrossEntropy() self.W = None # Hidden layer weights self.w0 = None # Hidden layer bias self.V = None # Output layer weights self.v0 = None # Output layer bias
class LogisticRegression(): """The Logistic Regression classifier. Parameters: ----------- learning_rate: float The step length that will be taken when following the negative gradient during training. gradient_descent: boolean True or false depending if gradient descent should be used when training. If false then we use batch optimization by least squares. """ def __init__(self, learning_rate=.1, gradient_descent=True): self.param = None self.learning_rate = learning_rate self.gradient_descent = gradient_descent self.sigmoid = Sigmoid() def fit(self, X, y, n_iterations=4000): # Add dummy ones for bias weights X = np.insert(X, 0, 1, axis=1) n_samples, n_features = np.shape(X) # Initialize parameters between [-1/sqrt(N), 1/sqrt(N)] limit = 1 / math.sqrt(n_features) self.param = np.random.uniform(-limit, limit, (n_features, )) # Tune parameters for n iterations for i in range(n_iterations): # Make a new prediction y_pred = self.sigmoid.function(X.dot(self.param)) if self.gradient_descent: # Move against the gradient of the loss function with # respect to the parameters to minimize the loss self.param -= self.learning_rate * -(y - y_pred).dot(X) else: # Make a diagonal matrix of the sigmoid gradient column vector diag_gradient = make_diagonal( self.sigmoid.gradient(X.dot(self.param))) # Batch opt: self.param = np.linalg.pinv(X.T.dot(diag_gradient).dot(X)).dot( X.T).dot( diag_gradient.dot(X).dot(self.param) + y - y_pred) def predict(self, X): # Add dummy ones for bias weights X = np.insert(X, 0, 1, axis=1) # Print a final prediction dot = X.dot(self.param) y_pred = np.round(self.sigmoid.function(dot)).astype(int) return y_pred
class LogisticRegression(): def __init__(self, learning_rate=.1, gradient_descent=True): self.param = None self.learning_rate = learning_rate self.gradient_descent = gradient_descent self.sigmoid = Sigmoid() def _initialize_parameters(self, X): n_features = np.shape(X)[1] # Initialize parameters between [-1/sqrt(N), 1/sqrt(N)] limit = 1 / math.sqrt(n_features) self.param = np.random.uniform(-limit, limit, (n_features,)) def fit(self, X, y, n_iterations=4000): self._initialize_parameters(X) # Tune parameters for n iterations for i in range(n_iterations): # Make a new prediction y_pred = self.sigmoid(X.dot(self.param)) if self.gradient_descent: # Move against the gradient of the loss function with # respect to the parameters to minimize the loss self.param -= self.learning_rate * -(y - y_pred).dot(X) else: # Make a diagonal matrix of the sigmoid gradient column vector diag_gradient = make_diagonal(self.sigmoid.gradient(X.dot(self.param))) # Batch opt: self.param = np.linalg.pinv(X.T.dot(diag_gradient).dot(X)).dot(X.T).dot(diag_gradient.dot(X).dot(self.param) + y - y_pred) def predict(self, X): y_pred = np.round(self.sigmoid(X.dot(self.param))).astype(int) return y_pred
def __init__(self, learning_rate=.1, gradient_descent=True): self.param = None self.learning_rate = learning_rate self.gradient_descent = gradient_descent self.sigmoid = Sigmoid()
class MultilayerPerceptron(): """Multilayer Perceptron classifier. A fully-connected neural network with one hidden layer. Unrolled to display the whole forward and backward pass. Parameters: ----------- n_hidden: int: The number of processing nodes (neurons) in the hidden layer. n_iterations: float The number of training iterations the algorithm will tune the weights for. learning_rate: float The step length that will be used when updating the weights. """ def __init__(self, n_hidden, n_iterations=3000, learning_rate=0.01): self.n_hidden = n_hidden self.n_iterations = n_iterations self.learning_rate = learning_rate self.hidden_activation = Sigmoid() self.output_activation = Softmax() self.loss = CrossEntropy() def _initialize_weights(self, X, y): n_samples, n_features = X.shape _, n_outputs = y.shape # Hidden layer limit = 1 / math.sqrt(n_features) self.W = np.random.uniform(-limit, limit, (n_features, self.n_hidden)) self.w0 = np.zeros((1, self.n_hidden)) # Output layer limit = 1 / math.sqrt(self.n_hidden) self.V = np.random.uniform(-limit, limit, (self.n_hidden, n_outputs)) self.v0 = np.zeros((1, n_outputs)) def fit(self, X, y): self._initialize_weights(X, y) for i in range(self.n_iterations): # .............. # Forward Pass # .............. # HIDDEN LAYER hidden_input = X.dot(self.W) + self.w0 hidden_output = self.hidden_activation(hidden_input) # OUTPUT LAYER output_layer_input = hidden_output.dot(self.V) + self.v0 y_pred = self.output_activation(output_layer_input) # ............... # Backward Pass # ............... # OUTPUT LAYER # Grad. w.r.t input of output layer grad_wrt_out_l_input = self.loss.gradient( y, y_pred) * self.output_activation.gradient(output_layer_input) grad_v = hidden_output.T.dot(grad_wrt_out_l_input) grad_v0 = np.sum(grad_wrt_out_l_input, axis=0, keepdims=True) # HIDDEN LAYER # Grad. w.r.t input of hidden layer grad_wrt_hidden_l_input = grad_wrt_out_l_input.dot( self.V.T) * self.hidden_activation.gradient(hidden_input) grad_w = X.T.dot(grad_wrt_hidden_l_input) grad_w0 = np.sum(grad_wrt_hidden_l_input, axis=0, keepdims=True) # Update weights (by gradient descent) # Move against the gradient to minimize loss self.V -= self.learning_rate * grad_v self.v0 -= self.learning_rate * grad_v0 self.W -= self.learning_rate * grad_w self.w0 -= self.learning_rate * grad_w0 # Use the trained model to predict labels of X def predict(self, X): # Forward pass: hidden_input = X.dot(self.W) + self.w0 hidden_output = self.hidden_activation(hidden_input) output_layer_input = hidden_output.dot(self.V) + self.v0 y_pred = self.output_activation(output_layer_input) return y_pred
import logging import numpy as np import progressbar from mlfromscratch.utils.misc import bar_widgets from mlfromscratch.utils import batch_iterator from mlfromscratch.deep_learning.activation_functions import Sigmoid sigmoid = Sigmoid() class RBM(): """Bernoulli Restricted Boltzmann Machine (RBM) Parameters: ----------- n_hidden: int: The number of processing nodes (neurons) in the hidden layer. learning_rate: float The step length that will be used when updating the weights. batch_size: int The size of the mini-batch used to calculate each weight update. n_iterations: float The number of training iterations the algorithm will tune the weights for. Reference: A Practical Guide to Training Restricted Boltzmann Machines URL: https://www.cs.toronto.edu/~hinton/absps/guideTR.pdf """ def __init__(self, n_hidden=128,
def __init__(self): sigmoid = Sigmoid() self.log_func = sigmoid self.log_grad = sigmoid.gradient