def test_returns_jacobian_matrix_of_valid_shape(self): z = np.array([1, 2, -2], float) j = Softmax.gradient(z) self.assertTupleEqual(j.shape, (3, 3)) z = np.array([1, 2], float) j = Softmax.gradient(z) self.assertTupleEqual(j.shape, (2, 2))
class Activation_SoftMax(Layer): """A layer that applies an activation operation to the input. Parameters: ----------- name: string The name of the activation function that will be used. """ def __init__(self, input_shape=None): self.layer_name = 'softmax' self.input_shape = input_shape self.activation_func = Softmax() self.trainable = False def initialize(self): # Just to set the output shape, but not needed below self.output_shape = self.input_shape def get_output_shape(self): return self.output_shape def forward(self, Z, training=True): self.layer_input = Z return self.activation_func(Z) def backward(self, dA): Z = self.layer_input dact = self.activation_func.gradient(Z) #assert Z.shape == dact.shape dZ = np.sum(np.multiply(dA, dact), axis=1) assert (dZ.shape == (Z.shape)) return dZ
def test_derivatives_with_different_indices_in_jacobian_matrix(self): z = np.array([1, -1.5], float) j = Softmax.gradient(z) s = Softmax.activation(z) self.assertEqual(j[0, 1], s[0] * s[1]) s = Softmax.activation(z) self.assertEqual(j[1, 0], s[1] * s[0])
def test_get_final_layer_error_for_arrays(self): cross_entropy = cost_functions.CrossEntropyCost(self.net) z_last = np.array([3, -1], float) z_last_prime = Softmax.gradient(z_last) y = np.array([0, 0.5], float) a_last = Softmax.activation(z_last) nabla = cross_entropy.get_final_layer_error(a_last, y, z_last_prime) self.assertAlmostEqual(nabla[0], a_last[0] - y[0], places=2) self.assertAlmostEqual(nabla[1], a_last[1] - y[1], places=2)
class MultilayerPerceptron(): """Multilayer Perceptron classifier. A fully-connected neural network with one hidden layer. Unrolled to display the whole forward and backward pass. Parameters: ----------- n_hidden: int: The number of processing nodes (neurons) in the hidden layer. n_iterations: float The number of training iterations the algorithm will tune the weights for. learning_rate: float The step length that will be used when updating the weights. """ def __init__(self, n_hidden, n_iterations=3000, learning_rate=0.01): self.n_hidden = n_hidden self.n_iterations = n_iterations self.learning_rate = learning_rate self.hidden_activation = Sigmoid() self.output_activation = Softmax() self.loss = CrossEntropy() def _initialize_weights(self, X, y): n_samples, n_features = X.shape _, n_outputs = y.shape # Hidden layer limit = 1 / math.sqrt(n_features) self.W = np.random.uniform(-limit, limit, (n_features, self.n_hidden)) self.w0 = np.zeros((1, self.n_hidden)) # Output layer limit = 1 / math.sqrt(self.n_hidden) self.V = np.random.uniform(-limit, limit, (self.n_hidden, n_outputs)) self.v0 = np.zeros((1, n_outputs)) def fit(self, X, y): self._initialize_weights(X, y) for i in range(self.n_iterations): # .............. # Forward Pass # .............. # HIDDEN LAYER hidden_input = X.dot(self.W) + self.w0 #(1079*64)(64,16)+(1,16) -> (1079,16)+(1,16)->(1079,16) hidden_output = self.hidden_activation(hidden_input) # OUTPUT LAYER output_layer_input = hidden_output.dot(self.V) + self.v0 y_pred = self.output_activation(output_layer_input) # ............... # Backward Pass # ............... # OUTPUT LAYER # Grad. w.r.t input of output layer grad_wrt_out_l_input = self.loss.gradient(y, y_pred) * self.output_activation.gradient(output_layer_input) #(1079,10)(1079,10)->(1079,10) grad_v = hidden_output.T.dot(grad_wrt_out_l_input) # (16,1079)(1079,10)->(16,10) grad_v0 = np.sum(grad_wrt_out_l_input, axis=0, keepdims=True) # (1,10) # HIDDEN LAYER # Grad. w.r.t input of hidden layer # (1079,10) grad_wrt_hidden_l_input = grad_wrt_out_l_input.dot(self.V.T) * self.hidden_activation.gradient(hidden_input) grad_w = X.T.dot(grad_wrt_hidden_l_input) grad_w0 = np.sum(grad_wrt_hidden_l_input, axis=0, keepdims=True) # Update weights (by gradient descent) # Move against the gradient to minimize loss self.V -= self.learning_rate * grad_v self.v0 -= self.learning_rate * grad_v0 self.W -= self.learning_rate * grad_w self.w0 -= self.learning_rate * grad_w0 # Use the trained model to predict labels of X def predict(self, X): # Forward pass: hidden_input = X.dot(self.W) + self.w0 hidden_output = self.hidden_activation(hidden_input) output_layer_input = hidden_output.dot(self.V) + self.v0 y_pred = self.output_activation(output_layer_input) return y_pred
# Testing derivative test_matrix = np.random.rand(5,3) test_matrix.shape Delta= 0.000000001 displaced = np.zeros(test_matrix.shape) displaced[:,:] = test_matrix displaced[np.arange(0,1), :] =displaced[np.arange(0,1), :] + Delta ans = ((soft(displaced) - soft(test_matrix) )/Delta ) [:,:]- \ ( soft.gradient(test_matrix) )[:,0,:] < 0.0000001 print(ans) displaced = np.zeros(test_matrix.shape) displaced[:,:] = test_matrix displaced[np.arange(2,3), :] =displaced[np.arange(2,3), :] + Delta ans = ((soft(displaced) - soft(test_matrix) )/Delta ) [:,:]- \ ( soft.gradient(test_matrix) )[:,2,:] < 0.0000001 print(ans) # Cross checking soft function with and without soft max included from loss_functions import MultiClassCrossEntropy import numpy as np nb_classes = 5