def finalTest(size_training, size_test, hidden_layers, lambd, num_iterations): print "\nBeginning of the finalTest... \n" images_training, labels_training, images_test, labels_test = read_dataset(size_training, size_test) # Setup the parameters you will use for this exercise input_layer_size = 784 # 28x28 Input Images of Digits num_labels = 10 # 10 labels, from 0 to 9 (one label for each digit) layers = [input_layer_size] + hidden_layers + [num_labels] num_of_hidden_layers = len(hidden_layers) # Fill the randInitializeWeights.py in order to initialize the neural network weights. Theta = randInitializeWeights(layers) # Unroll parameters nn_weights = unroll_params(Theta) res = fmin_l_bfgs_b(costFunction, nn_weights, fprime=backwards, args=(layers, images_training, labels_training, num_labels, lambd), maxfun = num_iterations, factr = 1., disp = True) Theta = roll_params(res[0], layers) print "\nTesting Neural Network... \n" pred_training = predict(Theta, images_training) print '\nAccuracy on training set: ' + str(mean(labels_training == pred_training) * 100) pred = predict(Theta, images_test) print '\nAccuracy on test set: ' + str(mean(labels_test == pred) * 100) # Display the images where the algorithm got wrong temp = (labels_test == pred) indexes_false = [] for i in range(size_test): if temp[i] == 0: indexes_false.append(i) displayData(images_training[indexes_false, :])
def backwards(nn_weights, layers, X, y, num_labels, lambd): # Computes the gradient fo the neural network. # nn_weights: Neural network parameters (vector) # layers: a list with the number of units per layer. # X: a matrix where every row is a training example for a handwritten digit image # y: a vector with the labels of each instance # num_labels: the number of units in the output layer # lambd: regularization factor # Setup some useful variables m = X.shape[0] num_layers = len(layers) # Roll Params # The parameters for the neural network are "unrolled" into the vector # nn_params and need to be converted back into the weight matrices. Theta = roll_params(nn_weights, layers) # You need to return the following variables correctly Theta_grad = [zeros(w.shape) for w in Theta] # ================================ TODO ================================ # The vector y passed into the function is a vector of labels # containing values from 1..K. You need to map this vector into a # binary vector of 1's and 0's to be used with the neural network # cost function. yv = zeros((num_labels, m)) for i in range(m): yv[y[i]][i] = 1 # ================================ TODO ================================ # In this point implement the backpropagaition algorithm a = [[] for i in range(num_layers)] z = [[] for i in range(num_layers)] delta=[[] for i in range(num_layers)] for t in range(m): a[0] = X[t] for i in range(0, num_layers - 1): a[i] = insert(a[i], 0, 1) z[i] = Theta[i].dot(transpose(a[i])) a[i + 1] = sigmoid(z[i]) delta[-1] = a[-1] - yv[:,t] for i in range(num_layers - 1, 0, -1): if i > 1: delta[i - 1] = (transpose(Theta[i-1][:, 1:]).dot(delta[i])) * sigmoidGradient(z[i - 2]) #because z[0] corresponds to z2 for i in range(0, num_layers - 1): Theta_grad[i] += atleast_2d(delta[i+1]).T.dot(atleast_2d(a[i])) # regularization for l in range(0, num_layers - 1): for i in range(Theta[l].shape[0]): for j in range(1, Theta[l].shape[1]): Theta_grad[l][i][j] += lambd * Theta[l][i][j] # Unroll Params Theta_grad = unroll_params(Theta_grad) return Theta_grad/m
def checkNNGradients(lambd): input_layer_size = 3 hidden_layer_size = 5 num_labels = 3 m = 5 layers = [3, 5, 3] # In this point we generate a number of random data Theta = [] Theta.append(debugInitializeWeights(hidden_layer_size, input_layer_size)) Theta.append(debugInitializeWeights(num_labels, hidden_layer_size)) X = debugInitializeWeights(m, input_layer_size - 1) y = remainder(arange(m) + 1, num_labels) # Unroll parameters nn_params = unroll_params(Theta) # Compute Numerical Gradient numgrad = computeNumericalGradient(nn_params, layers, X, y, num_labels, lambd) # Compute Analytical Gradient (BackPropagation) truegrad = backwards(nn_params, layers, X, y, num_labels, lambd) print concatenate(([numgrad], [truegrad]), axis=0).transpose() print "The above two columns must be very similar.\n(Left-Numerical Gradient, Right-Analytical Gradient (BackPropagation)\n" diff = linalg.norm(numgrad - truegrad) / linalg.norm(numgrad + truegrad) print "\nNote: If the implementation of the backpropagation is correct, the relative different must be quite small (less that 1e-09)." print "Relative difference: " + str(diff) + "\n"
def backwards(nn_weights, layers, X, y, num_labels, lambd): # Computes the gradient fo the neural network. # nn_weights: Neural network parameters (vector) # layers: a list with the number of units per layer. # X: a matrix where every row is a training example for a handwritten digit image # y: a vector with the labels of each instance # num_labels: the number of units in the output layer # lambd: regularization factor # Setup some useful variables m = X.shape[0] num_layers = len(layers) # Roll Params # The parameters for the neural network are "unrolled" into the vector # nn_params and need to be converted back into the weight matrices. Theta = roll_params(nn_weights, layers) # You need to return the following variables correctly Theta_grad = [zeros(w.shape) for w in Theta] # ================================ TODO ================================ # The vector y passed into the function is a vector of labels # containing values from 1..K. You need to map this vector into a # binary vector of 1's and 0's to be used with the neural network # cost function. yv = zeros((num_labels, m)) # ================================ TODO ================================ # In this point implement the backpropagaition algorithm # Unroll Params Theta_grad = unroll_params(Theta_grad) return Theta_grad
def checkNNGradients(lambd): input_layer_size = 3; hidden_layer_size = 5; num_labels = 3; m = 5; layers = [3, 5, 3] # In this point we generate a number of random data Theta = [] Theta.append(debugInitializeWeights(hidden_layer_size, input_layer_size)) Theta.append(debugInitializeWeights(num_labels, hidden_layer_size)) X = debugInitializeWeights(m, input_layer_size - 1) y = remainder(arange(m)+1, num_labels) # Unroll parameters nn_params = unroll_params(Theta) # Compute Numerical Gradient numgrad = computeNumericalGradient(nn_params,layers, X, y, num_labels, lambd) # Compute Analytical Gradient (BackPropagation) truegrad = backwards(nn_params, layers, X, y, num_labels, lambd) print concatenate(([numgrad], [truegrad]), axis = 0).transpose() print "The above two columns must be very similar.\n(Left-Numerical Gradient, Right-Analytical Gradient (BackPropagation)\n" diff = linalg.norm(numgrad - truegrad) / linalg.norm(numgrad + truegrad) print "\nNote: If the implementation of the backpropagation is correct, the relative different must be quite small (less that 1e-09)." print "Relative difference: " + str(diff) + "\n"
def checkNNCost(lambd): input_layer_size = 3; hidden_layer_size = 5; num_labels = 3; m = 5; layers = [3, 5, 3] Theta = [] Theta.append(debugInitializeWeights(hidden_layer_size, input_layer_size)) Theta.append(debugInitializeWeights(num_labels, hidden_layer_size)) nn_params = unroll_params(Theta) X = debugInitializeWeights(m, input_layer_size - 1) y = remainder(arange(m)+1, num_labels) cost = costFunction(nn_params, layers, X, y, num_labels, lambd) print 'Cost: ' + str(cost)
def checkNNCost(lambd): input_layer_size = 3 hidden_layer_size = 5 num_labels = 3 m = 5 layers = [3, 5, 3] Theta = [] Theta.append(debugInitializeWeights(hidden_layer_size, input_layer_size)) Theta.append(debugInitializeWeights(num_labels, hidden_layer_size)) nn_params = unroll_params(Theta) X = debugInitializeWeights(m, input_layer_size - 1) y = remainder(arange(m)+1, num_labels) cost = costFunction(nn_params, layers, X, y, num_labels, lambd) print 'Cost: ' + str(cost)
layers = [input_layer_size] for i in range(num_of_hidden_layers): layers = layers + [int(raw_input('Please select the number nodes for the ' + str(i+1) + ' hidden layers: '))] layers = layers + [num_labels] raw_input('\nProgram paused. Press enter to continue!!!') print "\nInitializing Neural Network Parameters ...\n" # ================================ DONE ================================ # Fill the randInitializeWeights.py in order to initialize the neural network weights. Theta = randInitializeWeights(layers) # Unroll parameters nn_weights = unroll_params(Theta) raw_input('\nProgram paused. Press enter to continue!!!') # ================================ Step 3: Sigmoid ================================================ # Before you start implementing the neural network, you will first # implement the gradient for the sigmoid function. You should complete the # code in the sigmoidGradient.m file. # print "\nEvaluating sigmoid function ...\n" g = sigmoid(array([-1, -0.5, 0, 0.5, 1])) print "Sigmoid evaluated at [1 -0.5 0 0.5 1]: " print g
layers = [input_layer_size] for i in range(num_of_hidden_layers): layers = layers + [int(input('Please select the number nodes for the ' + str(i+1) + ' hidden layers: '))] layers = layers + [num_labels] input('\nProgram paused. Press enter to continue!!!') print("\nInitializing Neural Network Parameters ...\n") # ================================ TODO ================================ # Fill the randInitializeWeights.py in order to initialize the neural network weights. Theta = randInitializeWeights(layers) # Unroll parameters nn_weights = unroll_params(Theta) input('\nProgram paused. Press enter to continue!!!') # ================================ Step 3: Sigmoid ================================================ # Before you start implementing the neural network, you will first # implement the gradient for the sigmoid function. You should complete the # code in the sigmoidGradient.m file. # print("\nEvaluating sigmoid function ...\n") g = sigmoid(array([1, -0.5, 0, 0.5, 1])) print("Sigmoid evaluated at [1 -0.5 0 0.5 1]: ") print(g)
def backwards(nn_weights, layers, X, y, num_labels, lambd): # Computes the gradient fo the neural network. # nn_weights: Neural network parameters (vector) # layers: a list with the number of units per layer. # X: a matrix where every row is a training example for a handwritten digit image # y: a vector with the labels of each instance # num_labels: the number of units in the output layer # lambd: regularization factor # Setup some useful variables m = X.shape[0] num_layers = len(layers) # Roll Params # The parameters for the neural network are "unrolled" into the vector # nn_params and need to be converted back into the weight matrices. Theta = roll_params(nn_weights, layers) # You need to return the following variables correctly Theta_grad = [zeros(w.shape) for w in Theta] # The vector y passed into the function is a vector of labels # containing values from 1..K. You need to map this vector into a # binary vector of 1's and 0's to be used with the neural network # cost function. yv = zeros((num_labels, m)) for i in range(m): yv[y[i],i] = 1 # In this point implement the backpropagaition algorithm A = [] a = ones(X.shape[0]) a = vstack((a,X.transpose())) Z = [] Z.append(a) for i in range(num_layers-1): A.append(a.transpose()) z = dot(Theta[i],a) Z.append(z) a = sigmoid(z) if i != num_layers-2: a = vstack((ones(a.shape[1]),a)) # A: list of result after each layer A.append(a.transpose()) h = a.transpose() # delta for the last layer delta = h - yv.transpose() # calculate of gradients for j in range(num_layers-2,0,-1): Theta_grad[j] = Theta_grad[j] + dot(delta.transpose(),A[j]) # calculate of delta for current layer(have to remove the first column of Theta) tmp = dot(Theta[j][:,1:].transpose(),delta.transpose()) tmp = tmp.transpose() tmp_matrix = zeros(tmp.shape) for i in range(m): tmp_matrix[i] = sigmoidGradient(Z[j].transpose()[i]) delta = tmp_matrix * tmp Theta_grad[0] = Theta_grad[0] + dot(delta.transpose(),A[0]) # regularization for i in range(num_layers-1): for j in range((Theta_grad[i].shape)[0]): for k in range((Theta_grad[i].shape)[1]): Theta_grad[i][j,k] = Theta_grad[i][j,k]/m if k >=1: Theta_grad[i][j,k] = Theta_grad[i][j,k] + lambd/m*Theta[i][j,k] # Unroll Params Theta_grad = unroll_params(Theta_grad) return Theta_grad
def backwards(nn_weights, layers, X, y, num_labels, lambd): # Computes the gradient fo the neural network. # nn_weights: Neural network parameters (vector) # layers: a list with the number of units per layer. # X: a matrix where every row is a training example for a handwritten digit image # y: a vector with the labels of each instance # num_labels: the number of units in the output layer # lambd: regularization factor # Setup some useful variables m = X.shape[0] num_layers = len(layers) # Roll Params # The parameters for the neural network are "unrolled" into the vector # nn_params and need to be converted back into the weight matrices. Theta = roll_params(nn_weights, layers) # You need to return the following variables correctly Theta_grad = [zeros(w.shape) for w in Theta] # ================================ DONE ================================ # The vector y passed into the function is a vector of labels # containing values from 1..K. You need to map this vector into a # binary vector of 1's and 0's to be used with the neural network # cost function. yv = zeros((m, num_labels)) for i in range(m): yv[i][y[i]] += 1 # ================================ DONE ================================ # In this point implement the backpropagation algorithm # In this point calculate the cost of the neural network (feedforward) # Step 1: Initialization of useful variables # Z and A will store the hidden states of the network, as lists of matrices, of size num_layers A = [addColumnOne(X)] Z = [addColumnOne(X)] # delta will store the delta for each layer from the last to the second layer (in reverse order) delta = [] # Step 2: Feedforward for i in range(num_layers - 1): h = A[i].dot(Theta[i].T) Z.append(h) h = addColumnOne(sigmoid(h)) A.append(h) # Step 3: Backpropagation d = removeFirstColumn(A[-1]) - yv delta.append(d) for i in range(num_layers - 2, 0, -1): d = removeFirstColumn(d.dot(Theta[i])) * sigmoidGradient(Z[i]) delta.append(d) delta.reverse() # delta is of size num_layers-1 (no delta for the input layer) for i in range(num_layers - 1): Theta_grad[i] += delta[i].T.dot(A[i]) # DONE: no regularization on the bias weights !! Theta_grad[i] += lambd * Theta[i] for j in range(Theta[i].shape[0]): Theta_grad[i][j, 0] -= lambd * Theta[i][j, 0] Theta_grad[i] /= m # Unroll Params Theta_grad = unroll_params(Theta_grad) return Theta_grad
def backwards(nn_weights, layers, X, y, num_labels, lambd): """ :param nn_weights: Neural network parameters (vector) :param layers: a list with the number of units per layer. :param X: a matrix where every row is a training example for a handwritten digit image :param y: a vector with the labels of each instance :param num_labels: the number of units in the output layer :param lambd: regularization factor :return: Computes the gradient fo the neural network. """ # Setup some useful variables m = X.shape[0] num_layers = len(layers) # Roll Params # The parameters for the neural network are "unrolled" into the vector # nn_params and need to be converted back into the weight matrices. Theta = roll_params(nn_weights, layers) # The vector y passed into the function is a vector of labels # containing values from 1..K. You need to map this vector into a # binary vector of 1's and 0's to be used with the neural network # cost function. yv = np.zeros((num_labels, m)) for i in range(len(y)): yv[int(y[i]), i] = 1 yv = np.transpose(yv) a = [] z = [] x = np.copy(X) a.append(insertOne(x)) z.append(x) # if you want to be able to follow the training accuracy: # pred = predict(Theta, X) # accuracy = np.mean(y == pred) * 100 # print(accuracy) for i in range(num_layers - 1): s = np.shape(Theta[i]) theta = Theta[i][:, 1:s[1]] x = np.dot(x, np.transpose(theta)) x = x + Theta[i][:, 0] z.append(x) x = sigmoid(x) a.append(insertOne(x)) delta = [np.zeros(w.shape) for w in z] delta[num_layers - 1] = (x - yv) for i in range(num_layers - 2, 0, -1): s = np.shape(Theta[i]) theta = np.copy(Theta[i][:, 1:s[1]]) temp = np.dot(np.transpose(theta), np.transpose(delta[i + 1])) delta[i] = np.transpose(temp) * sigmoidGradient(z[i]) Delta = [] for i in range(num_layers - 1): temp = np.dot(np.transpose(delta[i + 1]), a[i]) Delta.append(temp) # if you want to follow the cost during the training: # cost = (yv * np.log(x) + (1 - yv) * np.log(1 - x)) / m # cost = -np.sum(cost) # # somme = 0 # # for i in range(num_layers - 1): # somme += lambd * np.sum(Theta[i] ** 2) / (2 * m) # # cost += somme Theta_grad = [(d / m) for d in Delta] i = 0 for t in Theta: current = lambd * t / m # d'après le poly il faudrait qu'il y ait cette ligne # mais après quand on son checkNNGradient il vaut mieux enlever # cette ligne donc je ne sais pas ...: # current[:, 0] = current[:, 0]*0 Theta_grad[i] += current i += 1 # Unroll Params Theta_grad = unroll_params(Theta_grad) return Theta_grad
def backwards(nn_weights, layers, X, y, num_labels, lambd): # Computes the gradient fo the neural network. # nn_weights: Neural network parameters (vector) # layers: a list with the number of units per layer. # X: a matrix where every row is a training example for a handwritten digit image # y: a vector with the labels of each instance # num_labels: the number of units in the output layer # lambd: regularization factor # Setup some useful variables m = X.shape[0] num_layers = len(layers) # Roll Params # The parameters for the neural network are "unrolled" into the vector # nn_params and need to be converted back into the weight matrices. Theta = roll_params(nn_weights, layers) Theta_grad = [np.zeros(w.shape) for w in Theta] yv = np.zeros((num_labels, m)) for i in range(m): yv[y[i]][i] = 1 # Implementation of the backpropagation algorithm for i in range(m): a_values, z_values = [], [ ] # arrays where the values of the activations are to be stored a = np.append([1], X[i, :]) a_values.append(a) # Loop of the feedforward algorithm for k in range(num_layers - 1): z = np.dot(Theta[k], a) z_values.append(z) a = np.append([1], sigmoid(z)) a_values.append(a) delta_layer = a[1:] - yv[:, i] # error array of the outer layer # np.outer to calculate the matrix product of delta_layer.T and a_values[-2] Theta_grad[-1] += np.outer(delta_layer, a_values[-2]) / m # Descending loop for h in range(num_layers - 2): # Error of the (num_layers - 2 - h)-th hidden layer # The error that corresponds to the bias factors is not taken into account delta_layer = np.dot(Theta[-1 - h].T, delta_layer)[1:] * sigmoidGradient( z_values[-2 - h]) # Calculation of the gradient Theta_grad[-2 - h] += np.outer(delta_layer, a_values[-3 - h]) / m #Regularization for h in range(num_layers - 1): # The terms corresponding to the bias factors are not regularized Theta_grad[h][:, 1:] += lambd * Theta[h][:, 1:] / m # Unroll Params Theta_grad = unroll_params(Theta_grad) return Theta_grad
def backwards(nn_weights, layers, X, y, num_labels, lambd): # Computes the gradient fo the neural network. # nn_weights: Neural network parameters (vector) # layers: a list with the number of units per layer. # X: a matrix where every row is a training example for a handwritten digit image # y: a vector with the labels of each instance # num_labels: the number of units in the output layer # lambd: regularization factor # Setup some useful variables m = X.shape[0] num_layers = len(layers) # Roll Params # The parameters for the neural network are "unrolled" into the vector # nn_params and need to be converted back into the weight matrices. Theta = roll_params(nn_weights, layers) # You need to return the following variables correctly Theta_grad = [zeros(w.shape) for w in Theta] # ================================ DONE ================================ # The vector y passed into the function is a vector of labels # containing values from 1..K. You need to map this vector into a # binary vector of 1's and 0's to be used with the neural network # cost function. yv = zeros((m, num_labels)) for i in range(m): yv[i][y[i]] += 1 # ================================ DONE ================================ # In this point implement the backpropagation algorithm # In this point calculate the cost of the neural network (feedforward) # Step 1: Initialization of useful variables # Z and A will store the hidden states of the network, as lists of matrices, of size num_layers A = [addColumnOne(X)] Z = [addColumnOne(X)] # delta will store the delta for each layer from the last to the second layer (in reverse order) delta = [] # Step 2: Feedforward for i in range(num_layers-1): h = A[i].dot(Theta[i].T) Z.append(h) h = addColumnOne(sigmoid(h)) A.append(h) # Step 3: Backpropagation d = removeFirstColumn(A[-1]) - yv delta.append(d) for i in range(num_layers-2, 0, -1): d = removeFirstColumn(d.dot(Theta[i])) * sigmoidGradient(Z[i]) delta.append(d) delta.reverse() # delta is of size num_layers-1 (no delta for the input layer) for i in range(num_layers-1): Theta_grad[i] += delta[i].T.dot(A[i]) # DONE: no regularization on the bias weights !! Theta_grad[i] += lambd * Theta[i] for j in range(Theta[i].shape[0]): Theta_grad[i][j, 0] -= lambd * Theta[i][j, 0] Theta_grad[i] /= m # Unroll Params Theta_grad = unroll_params(Theta_grad) return Theta_grad