def finalTest(size_training, size_test, hidden_layers, lambd, num_iterations): print "\nBeginning of the finalTest... \n" images_training, labels_training, images_test, labels_test = read_dataset(size_training, size_test) # Setup the parameters you will use for this exercise input_layer_size = 784 # 28x28 Input Images of Digits num_labels = 10 # 10 labels, from 0 to 9 (one label for each digit) layers = [input_layer_size] + hidden_layers + [num_labels] num_of_hidden_layers = len(hidden_layers) # Fill the randInitializeWeights.py in order to initialize the neural network weights. Theta = randInitializeWeights(layers) # Unroll parameters nn_weights = unroll_params(Theta) res = fmin_l_bfgs_b(costFunction, nn_weights, fprime=backwards, args=(layers, images_training, labels_training, num_labels, lambd), maxfun = num_iterations, factr = 1., disp = True) Theta = roll_params(res[0], layers) print "\nTesting Neural Network... \n" pred_training = predict(Theta, images_training) print '\nAccuracy on training set: ' + str(mean(labels_training == pred_training) * 100) pred = predict(Theta, images_test) print '\nAccuracy on test set: ' + str(mean(labels_test == pred) * 100) # Display the images where the algorithm got wrong temp = (labels_test == pred) indexes_false = [] for i in range(size_test): if temp[i] == 0: indexes_false.append(i) displayData(images_training[indexes_false, :])
def backwards(nn_weights, layers, X, y, num_labels, lambd): # Computes the gradient fo the neural network. # nn_weights: Neural network parameters (vector) # layers: a list with the number of units per layer. # X: a matrix where every row is a training example for a handwritten digit image # y: a vector with the labels of each instance # num_labels: the number of units in the output layer # lambd: regularization factor # Setup some useful variables m = X.shape[0] num_layers = len(layers) # Roll Params # The parameters for the neural network are "unrolled" into the vector # nn_params and need to be converted back into the weight matrices. Theta = roll_params(nn_weights, layers) # You need to return the following variables correctly Theta_grad = [zeros(w.shape) for w in Theta] # ================================ TODO ================================ # The vector y passed into the function is a vector of labels # containing values from 1..K. You need to map this vector into a # binary vector of 1's and 0's to be used with the neural network # cost function. yv = zeros((num_labels, m)) for i in range(m): yv[y[i]][i] = 1 # ================================ TODO ================================ # In this point implement the backpropagaition algorithm a = [[] for i in range(num_layers)] z = [[] for i in range(num_layers)] delta=[[] for i in range(num_layers)] for t in range(m): a[0] = X[t] for i in range(0, num_layers - 1): a[i] = insert(a[i], 0, 1) z[i] = Theta[i].dot(transpose(a[i])) a[i + 1] = sigmoid(z[i]) delta[-1] = a[-1] - yv[:,t] for i in range(num_layers - 1, 0, -1): if i > 1: delta[i - 1] = (transpose(Theta[i-1][:, 1:]).dot(delta[i])) * sigmoidGradient(z[i - 2]) #because z[0] corresponds to z2 for i in range(0, num_layers - 1): Theta_grad[i] += atleast_2d(delta[i+1]).T.dot(atleast_2d(a[i])) # regularization for l in range(0, num_layers - 1): for i in range(Theta[l].shape[0]): for j in range(1, Theta[l].shape[1]): Theta_grad[l][i][j] += lambd * Theta[l][i][j] # Unroll Params Theta_grad = unroll_params(Theta_grad) return Theta_grad/m
def costFunction(nn_weights, layers, X, y, num_labels, lambd): # Computes the cost function of the neural network. # nn_weights: Neural network parameters (vector) # layers: a list with the number of units per layer. # X: a matrix where every row is a training example for a handwritten digit image # y: a vector with the labels of each instance # num_labels: the number of units in the output layer # lambd: regularization factor # Setup some useful variables m = X.shape[0] num_layers = len(layers) # Unroll Params Theta = roll_params(nn_weights, layers) J = 0; yv = np.zeros((num_labels, m)) for i in range(m): yv[y[i]][i] = 1 # Cost of the neural network (feedforward) # Activation of the k-th layer for the i-th example def scores_layer(i, k): # k = 0 for the input layer # k = l for the l-th hidden layer x_vect = np.append([1], X[i, :]) # insert 1 at the beginning of the input image if k == 0: return sigmoid(np.dot(Theta[0], x_vect)) # Insert 1 at the beginning of the activation of the previous layer res_with_bias = np.append([1], scores_layer(i, k-1)) return sigmoid(np.dot(Theta[k], res_with_bias)) # Cost function for the i-th example def cost_i(i): activation_layer = scores_layer(i, num_layers - 2) # output: activation of the outer layer y_i = yv[:, i] return (-y_i * np.log(activation_layer) - (1 - y_i) * np.log(1 - activation_layer)).sum() # Total cost J for i in range(m): J += cost_i(i) J /= m # Regularization coeff_reg = lambd / (2 * m) # Loop on the weight matrixes for h in range(num_layers - 1): sub_weights = Theta[h][:, 1:] # the terms corresponding to the bias factors are not regularized J += coeff_reg * (sub_weights * sub_weights).sum() return J
def backwards(nn_weights, layers, X, y, num_labels, lambd): # Computes the gradient fo the neural network. # nn_weights: Neural network parameters (vector) # layers: a list with the number of units per layer. # X: a matrix where every row is a training example for a handwritten digit image # y: a vector with the labels of each instance # num_labels: the number of units in the output layer # lambd: regularization factor # Setup some useful variables m = X.shape[0] num_layers = len(layers) # Roll Params # The parameters for the neural network are "unrolled" into the vector # nn_params and need to be converted back into the weight matrices. Theta = roll_params(nn_weights, layers) # You need to return the following variables correctly Theta_grad = [zeros(w.shape) for w in Theta] # ================================ TODO ================================ # The vector y passed into the function is a vector of labels # containing values from 1..K. You need to map this vector into a # binary vector of 1's and 0's to be used with the neural network # cost function. yv = zeros((num_labels, m)) # ================================ TODO ================================ # In this point implement the backpropagaition algorithm # Unroll Params Theta_grad = unroll_params(Theta_grad) return Theta_grad
def cross_validation(lambd_values=[0.1], maxfun_values=[200]): """Function that trains the neural network and then tests its accuracy Parameters : lambd, which measures the coefficient of the regularization maxfun, which counts the number of iterations of the backpropagation """ n_lambd, n_maxfun = len(lambd_values), len(maxfun_values) # Creation of the DataFrame where the results are to be stored df_results = pd.DataFrame(index=range(n_lambd * n_maxfun)) df_results['Maxfun'], df_results['Lambd'] = list( maxfun_values) * n_lambd, list(lambd_values) * n_maxfun df_results['Hidden layers'] = num_of_hidden_layers nodes_avg = np.mean(layers[1:-1]) df_results['Nodes per hidden layer (avg)'] = nodes_avg accuracy_col = [] for lambd in lambd_values: for maxfun in maxfun_values: start = time() # start of the timer res = opt.fmin_l_bfgs_b(costFunction, nn_weights, fprime=backwards, args=(layers, images_validation, labels_training, num_labels, lambd), maxfun=maxfun, factr=1., disp=True) Theta = roll_params(res[0], layers) # input('\nProgram paused. Press enter to continue!!!') # print("\nTesting Neural Network... \n") pred = predict(Theta, images_test) end = time() # end of the timer accuracy = np.mean(labels_test == pred) * 100 print('\nLambda =', lambd) print('Maxfun =', maxfun) time_complexity = end - start print('Time:', time_complexity, 'seconds') print('Accuracy =', accuracy, '%') # Modification of the 'Accuracy' column accuracy_col.append(accuracy) # Accuracy values stored into the dataframe df_results['Accuracy'] = accuracy_col return df_results
def costFunction(nn_weights, layers, X, y, num_labels, lambd): # Computes the cost function of the neural network. # nn_weights: Neural network parameters (vector) # layers: a list with the number of units per layer. # X: a matrix where every row is a training example for a handwritten digit image # y: a vector with the labels of each instance # num_labels: the number of units in the output layer # lambd: regularization factor # Setup some useful variables m = X.shape[0] num_layers = len(layers) # Unroll Params Theta = roll_params(nn_weights, layers) # The vector y passed into the function is a vector of labels # containing values from 1..K. You need to map this vector into a # binary vector of 1's and 0's to be used with the neural network # cost function. yv = zeros((num_labels, m)) for i in range(m): yv[y[i],i] = 1 # In this point calculate the cost of the neural network (feedforward) # a: the result obtained after each layer a = ones(X.shape[0]) a = vstack((a,X.transpose())) for i in range(num_layers-1): z = dot(Theta[i],a) a = sigmoid(z) if i != num_layers-2: a = vstack((ones(a.shape[1]),a)) #h: final result h = a.transpose() #calculate of the cost J J = 0 for i in range(m): for k in range(num_labels): J = J + (-yv[k,i] * log(h[i][k]) - (1-yv[k,i]) * log(1.0 - h[i][k])) J = J/m; #regularization tmp = 0 for i in range(num_layers-1): for j in range(Theta[i].shape[0]): for k in range(1,Theta[i].shape[1]): tmp = tmp + Theta[i][j][k] * Theta[i][j][k] J = J + tmp * lambd/(2.0*m) return J
def costFunction(nn_weights, layers, X, y, num_labels, lambd): # Computes the cost function of the neural network. # nn_weights: Neural network parameters (vector) # layers: a list with the number of units per layer. # X: a matrix where every row is a training example for a handwritten digit image # y: a vector with the labels of each instance # num_labels: the number of units in the output layer # lambd: regularization factor # Setup some useful variables m = X.shape[0] num_layers = len(layers) # Unroll Params Theta = roll_params(nn_weights, layers) # You need to return the following variables correctly J = 0; # ================================ TODO ================================ # The vector y passed into the function is a vector of labels # containing values from 1..K. You need to map this vector into a # binary vector of 1's and 0's to be used with the neural network # cost function. yv = zeros((num_labels, m)) for i in range(m): yv[y[i]][i] = 1 # ================================ TODO ================================ # In this point calculate the cost of the neural network (feedforward) for i in range(m): layer_output = layerOutput(Theta, X, num_layers, i) for j in range(num_labels): cost = -yv[j, i] * log(layer_output[j]) cost -= (1 - yv[j, i]) * log(1 - layer_output[j]) J += cost J /= m # Regularization regulation_term = 0 for i in range(len(Theta)): for j in range(Theta[i].shape[0]): for k in range(1, Theta[i].shape[1]): regulation_term += (Theta[i][j][k]) ** 2 J += lambd / m * regulation_term / (num_layers - 1 ) return J
def costFunction(nn_weights, layers, X, y, num_labels, lambd): # Computes the cost function of the neural network. # nn_weights: Neural network parameters (vector) # layers: a list with the number of units per layer. # X: a matrix where every row is a training example for a handwritten digit image # y: a vector with the labels of each instance # num_labels: the number of units in the output layer # lambd: regularization factor # Setup some useful variables m = X.shape[0] num_layers = len(layers) # Unroll Params Theta = roll_params(nn_weights, layers) # ================================ TODO ================================ # The vector y passed into the function is a vector of labels # containing values from 1..K. You need to map this vector into a # binary vector of 1's and 0's to be used with the neural network # cost function. yv = np.zeros((num_labels, m)) for i in range(len(y)): yv[int(y[i]), i] = 1 yv = np.transpose(yv) # ================================ TODO ================================ # In this point calculate the cost of the neural network (feedforward) x = np.copy(X) for i in range(num_layers - 1): s = np.shape(Theta[i]) theta = Theta[i][:, 1:s[1]] x = np.dot(x, np.transpose(theta)) x = x + Theta[i][:, 0] x = sigmoid(x) cost = (yv * np.log(x) + (1 - yv) * np.log(1 - x)) / m cost = -np.sum(cost) somme = 0 for i in range(num_layers - 1): somme += lambd * np.sum(Theta[i] ** 2) / (2 * m) cost += somme return cost
def costFunction(nn_weights, layers, X, y, num_labels, lambd): # Computes the cost function of the neural network. # nn_weights: Neural network parameters (vector) # layers: a list with the number of units per layer. # X: a matrix where every row is a training example for a handwritten digit image # y: a vector with the labels of each instance # num_labels: the number of units in the output layer # lambd: regularization factor # Setup some useful variables m = X.shape[0] num_layers = len(layers) # Unroll Params Theta = roll_params(nn_weights, layers) # You need to return the following variables correctly J = 0 # ================================ TODO ================================ # The vector y passed into the function is a vector of labels # containing values from 1..K. You need to map this vector into a # binary vector of 1's and 0's to be used with the neural network # cost function. yv = zeros((num_labels, m)) for i in range(m): yv[y[i],i] = 1.0 # ================================ TODO ================================ # In this point calculate the cost of the neural network (feedforward) activation = transpose(concatenate(ones(m,1),X),axis=1) activations = [activation] for i in range(num_layers-1): z = dot(Theta[i],activation) zs.append(z) if i == (num_layers-1): activation = sigmoid(z) else: activation = concatenate((ones(1,m),sigmoid(z)), axis = 0) activations.append(activation) J = (1.0/m)*(sum(-1*yv*log(activations[-1]) - (1 - yv) * log(1 - activations[-1]))) return J
def backwards(nn_weights, layers, X, y, num_labels, lambd): # Computes the gradient fo the neural network. # nn_weights: Neural network parameters (vector) # layers: a list with the number of units per layer. # X: a matrix where every row is a training example for a handwritten digit image # y: a vector with the labels of each instance # num_labels: the number of units in the output layer # lambd: regularization factor # Setup some useful variables m = X.shape[0] num_layers = len(layers) # Roll Params # The parameters for the neural network are "unrolled" into the vector # nn_params and need to be converted back into the weight matrices. Theta = roll_params(nn_weights, layers) # You need to return the following variables correctly Theta_grad = [zeros(w.shape) for w in Theta] # The vector y passed into the function is a vector of labels # containing values from 1..K. You need to map this vector into a # binary vector of 1's and 0's to be used with the neural network # cost function. yv = zeros((num_labels, m)) for i in range(m): yv[y[i],i] = 1 # In this point implement the backpropagaition algorithm A = [] a = ones(X.shape[0]) a = vstack((a,X.transpose())) Z = [] Z.append(a) for i in range(num_layers-1): A.append(a.transpose()) z = dot(Theta[i],a) Z.append(z) a = sigmoid(z) if i != num_layers-2: a = vstack((ones(a.shape[1]),a)) # A: list of result after each layer A.append(a.transpose()) h = a.transpose() # delta for the last layer delta = h - yv.transpose() # calculate of gradients for j in range(num_layers-2,0,-1): Theta_grad[j] = Theta_grad[j] + dot(delta.transpose(),A[j]) # calculate of delta for current layer(have to remove the first column of Theta) tmp = dot(Theta[j][:,1:].transpose(),delta.transpose()) tmp = tmp.transpose() tmp_matrix = zeros(tmp.shape) for i in range(m): tmp_matrix[i] = sigmoidGradient(Z[j].transpose()[i]) delta = tmp_matrix * tmp Theta_grad[0] = Theta_grad[0] + dot(delta.transpose(),A[0]) # regularization for i in range(num_layers-1): for j in range((Theta_grad[i].shape)[0]): for k in range((Theta_grad[i].shape)[1]): Theta_grad[i][j,k] = Theta_grad[i][j,k]/m if k >=1: Theta_grad[i][j,k] = Theta_grad[i][j,k] + lambd/m*Theta[i][j,k] # Unroll Params Theta_grad = unroll_params(Theta_grad) return Theta_grad
def backwards(nn_weights, layers, X, y, num_labels, lambd): # Computes the gradient fo the neural network. # nn_weights: Neural network parameters (vector) # layers: a list with the number of units per layer. # X: a matrix where every row is a training example for a handwritten digit image # y: a vector with the labels of each instance # num_labels: the number of units in the output layer # lambd: regularization factor # Setup some useful variables m = X.shape[0] num_layers = len(layers) # Roll Params # The parameters for the neural network are "unrolled" into the vector # nn_params and need to be converted back into the weight matrices. Theta = roll_params(nn_weights, layers) # You need to return the following variables correctly Theta_grad = [zeros(w.shape) for w in Theta] # ================================ DONE ================================ # The vector y passed into the function is a vector of labels # containing values from 1..K. You need to map this vector into a # binary vector of 1's and 0's to be used with the neural network # cost function. yv = zeros((m, num_labels)) for i in range(m): yv[i][y[i]] += 1 # ================================ DONE ================================ # In this point implement the backpropagation algorithm # In this point calculate the cost of the neural network (feedforward) # Step 1: Initialization of useful variables # Z and A will store the hidden states of the network, as lists of matrices, of size num_layers A = [addColumnOne(X)] Z = [addColumnOne(X)] # delta will store the delta for each layer from the last to the second layer (in reverse order) delta = [] # Step 2: Feedforward for i in range(num_layers - 1): h = A[i].dot(Theta[i].T) Z.append(h) h = addColumnOne(sigmoid(h)) A.append(h) # Step 3: Backpropagation d = removeFirstColumn(A[-1]) - yv delta.append(d) for i in range(num_layers - 2, 0, -1): d = removeFirstColumn(d.dot(Theta[i])) * sigmoidGradient(Z[i]) delta.append(d) delta.reverse() # delta is of size num_layers-1 (no delta for the input layer) for i in range(num_layers - 1): Theta_grad[i] += delta[i].T.dot(A[i]) # DONE: no regularization on the bias weights !! Theta_grad[i] += lambd * Theta[i] for j in range(Theta[i].shape[0]): Theta_grad[i][j, 0] -= lambd * Theta[i][j, 0] Theta_grad[i] /= m # Unroll Params Theta_grad = unroll_params(Theta_grad) return Theta_grad
def backwards(nn_weights, layers, X, y, num_labels, lambd): # Computes the gradient fo the neural network. # nn_weights: Neural network parameters (vector) # layers: a list with the number of units per layer. # X: a matrix where every row is a training example for a handwritten digit image # y: a vector with the labels of each instance # num_labels: the number of units in the output layer # lambd: regularization factor # Setup some useful variables m = X.shape[0] num_layers = len(layers) # Roll Params # The parameters for the neural network are "unrolled" into the vector # nn_params and need to be converted back into the weight matrices. Theta = roll_params(nn_weights, layers) Theta_grad = [np.zeros(w.shape) for w in Theta] yv = np.zeros((num_labels, m)) for i in range(m): yv[y[i]][i] = 1 # Implementation of the backpropagation algorithm for i in range(m): a_values, z_values = [], [ ] # arrays where the values of the activations are to be stored a = np.append([1], X[i, :]) a_values.append(a) # Loop of the feedforward algorithm for k in range(num_layers - 1): z = np.dot(Theta[k], a) z_values.append(z) a = np.append([1], sigmoid(z)) a_values.append(a) delta_layer = a[1:] - yv[:, i] # error array of the outer layer # np.outer to calculate the matrix product of delta_layer.T and a_values[-2] Theta_grad[-1] += np.outer(delta_layer, a_values[-2]) / m # Descending loop for h in range(num_layers - 2): # Error of the (num_layers - 2 - h)-th hidden layer # The error that corresponds to the bias factors is not taken into account delta_layer = np.dot(Theta[-1 - h].T, delta_layer)[1:] * sigmoidGradient( z_values[-2 - h]) # Calculation of the gradient Theta_grad[-2 - h] += np.outer(delta_layer, a_values[-3 - h]) / m #Regularization for h in range(num_layers - 1): # The terms corresponding to the bias factors are not regularized Theta_grad[h][:, 1:] += lambd * Theta[h][:, 1:] / m # Unroll Params Theta_grad = unroll_params(Theta_grad) return Theta_grad
def costFunction(nn_weights, layers, X, y, num_labels, lambd): # Computes the cost function of the neural network. # nn_weights: Neural network parameters (vector) # layers: a list with the number of units per layer. # X: a matrix where every row is a training example for a handwritten digit image # y: a vector with the labels of each instance # num_labels: the number of units in the output layer # lambd: regularization factor # Setup some useful variables m = X.shape[0] num_layers = len(layers) # Unroll Params Theta = roll_params(nn_weights, layers) # You need to return the following variables correctly J = 0 # ================================ DONE ================================ # The vector y passed into the function is a vector of labels # containing values from 1..K. You need to map this vector into a # binary vector of 1's and 0's to be used with the neural network # cost function. yv = np.zeros((m, num_labels)) for i in range(m): yv[i][y[i]] += 1 # ================================ DONE ================================ # In this point calculate the cost of the neural network (feedforward) # Step 1: Initialization of useful variables # H will store the hidden states of the network, H is a list of matrices, of size num_layers H = [X] # Step 2: Feedforward for i in range(num_layers-1): h = sigmoid(addColumnOne(H[i]).dot(Theta[i].T)) H.append(h) # The end layer is H[num_layers] yv_pred = H[num_layers-1] # Step 3: Compute cost # We create the variable S, a matrix of size (m, K) which we will sum afterwards S = np.zeros((m, num_labels)) temp = np.log(yv_pred) temp = yv*temp temp2 = np.log(1.0-yv_pred) temp2 = (1.0-yv)*temp2 S += - temp - temp2 J += np.sum(S) J = J/m reg = 0 for i in range(num_layers-1): # No regularization on the bias weights reg += np.sum(removeFirstColumn(Theta[i])**2) J += lambd * reg / (2.0 * m) return J
def costFunction(nn_weights, layers, X, y, num_labels, lambd): # Computes the cost function of the neural network. # nn_weights: Neural network parameters (vector) # layers: a list with the number of units per layer. # X: a matrix where every row is a training example for a handwritten digit image # y: a vector with the labels of each instance # num_labels: the number of units in the output layer # lambd: regularization factor # Setup some useful variables m = X.shape[0] num_layers = len(layers) # Unroll Params Theta = roll_params(nn_weights, layers) # You need to return the following variables correctly J = 0 # ================================ DONE ================================ # The vector y passed into the function is a vector of labels # containing values from 1..K. You need to map this vector into a # binary vector of 1's and 0's to be used with the neural network # cost function. yv = np.zeros((m, num_labels)) for i in range(m): yv[i][y[i]] += 1 # ================================ DONE ================================ # In this point calculate the cost of the neural network (feedforward) # Step 1: Initialization of useful variables # H will store the hidden states of the network, H is a list of matrices, of size num_layers H = [X] # Step 2: Feedforward for i in range(num_layers - 1): h = sigmoid(addColumnOne(H[i]).dot(Theta[i].T)) H.append(h) # The end layer is H[num_layers] yv_pred = H[num_layers - 1] # Step 3: Compute cost # We create the variable S, a matrix of size (m, K) which we will sum afterwards S = np.zeros((m, num_labels)) temp = np.log(yv_pred) temp = yv * temp temp2 = np.log(1.0 - yv_pred) temp2 = (1.0 - yv) * temp2 S += -temp - temp2 J += np.sum(S) J = J / m reg = 0 for i in range(num_layers - 1): # No regularization on the bias weights reg += np.sum(removeFirstColumn(Theta[i])**2) J += lambd * reg / (2.0 * m) return J
def backwards(nn_weights, layers, X, y, num_labels, lambd): """ :param nn_weights: Neural network parameters (vector) :param layers: a list with the number of units per layer. :param X: a matrix where every row is a training example for a handwritten digit image :param y: a vector with the labels of each instance :param num_labels: the number of units in the output layer :param lambd: regularization factor :return: Computes the gradient fo the neural network. """ # Setup some useful variables m = X.shape[0] num_layers = len(layers) # Roll Params # The parameters for the neural network are "unrolled" into the vector # nn_params and need to be converted back into the weight matrices. Theta = roll_params(nn_weights, layers) # The vector y passed into the function is a vector of labels # containing values from 1..K. You need to map this vector into a # binary vector of 1's and 0's to be used with the neural network # cost function. yv = np.zeros((num_labels, m)) for i in range(len(y)): yv[int(y[i]), i] = 1 yv = np.transpose(yv) a = [] z = [] x = np.copy(X) a.append(insertOne(x)) z.append(x) # if you want to be able to follow the training accuracy: # pred = predict(Theta, X) # accuracy = np.mean(y == pred) * 100 # print(accuracy) for i in range(num_layers - 1): s = np.shape(Theta[i]) theta = Theta[i][:, 1:s[1]] x = np.dot(x, np.transpose(theta)) x = x + Theta[i][:, 0] z.append(x) x = sigmoid(x) a.append(insertOne(x)) delta = [np.zeros(w.shape) for w in z] delta[num_layers - 1] = (x - yv) for i in range(num_layers - 2, 0, -1): s = np.shape(Theta[i]) theta = np.copy(Theta[i][:, 1:s[1]]) temp = np.dot(np.transpose(theta), np.transpose(delta[i + 1])) delta[i] = np.transpose(temp) * sigmoidGradient(z[i]) Delta = [] for i in range(num_layers - 1): temp = np.dot(np.transpose(delta[i + 1]), a[i]) Delta.append(temp) # if you want to follow the cost during the training: # cost = (yv * np.log(x) + (1 - yv) * np.log(1 - x)) / m # cost = -np.sum(cost) # # somme = 0 # # for i in range(num_layers - 1): # somme += lambd * np.sum(Theta[i] ** 2) / (2 * m) # # cost += somme Theta_grad = [(d / m) for d in Delta] i = 0 for t in Theta: current = lambd * t / m # d'après le poly il faudrait qu'il y ait cette ligne # mais après quand on son checkNNGradient il vaut mieux enlever # cette ligne donc je ne sais pas ...: # current[:, 0] = current[:, 0]*0 Theta_grad[i] += current i += 1 # Unroll Params Theta_grad = unroll_params(Theta_grad) return Theta_grad
checkNNGradients(lambd) input('\nProgram paused. Press enter to continue!!!') # ================================ Step 8: Implement Backpropagation with Regularization ================================ print("\nChecking Backpropagation with Regularization ...\n") lambd = 3.0 checkNNGradients(lambd) input('\nProgram paused. Press enter to continue!!!') # ================================ Step 9: Training Neural Networks & Prediction ================================ print("\nTraining Neural Network... \n") # You should also try different values of the regularization factor lambd = 3.0 res = fmin_l_bfgs_b(costFunction, nn_weights, fprime = backwards, args = (layers, images_training, labels_training, num_labels, 1.0), maxfun = 50, factr = 1., disp = True) Theta = roll_params(res[0], layers) input('\nrogram paused. Press enter to continue!!!') print("\nTesting Neural Network... \n") pred = predict(Theta, images_test) print('\nAccuracy: ' + str(mean(labels_test==pred) * 100))
checkNNGradients(lambd) raw_input('\nProgram paused. Press enter to continue!!!') # ================================ Step 8: Implement Backpropagation with Regularization ================================ print "\nChecking Backpropagation with Regularization ...\n" lambd = 3.0 checkNNGradients(lambd) raw_input('\nProgram paused. Press enter to continue!!!') # ================================ Step 9: Training Neural Networks & Prediction ================================ print "\nTraining Neural Network... \n" # You should also try different values of the regularization factor lambd = 3.0 res = fmin_l_bfgs_b(costFunction, nn_weights, fprime=backwards, args=(layers, images_training, labels_training, num_labels, 1.0), maxfun=50, factr=1., disp=True) Theta = roll_params(res[0], layers) raw_input('\nrogram paused. Press enter to continue!!!') print "\nTesting Neural Network... \n" pred = predict(Theta, images_test) print '\nAccuracy: ' + str(mean(labels_test==pred) * 100)
def backwards(nn_weights, layers, X, y, num_labels, lambd): # Computes the gradient fo the neural network. # nn_weights: Neural network parameters (vector) # layers: a list with the number of units per layer. # X: a matrix where every row is a training example for a handwritten digit image # y: a vector with the labels of each instance # num_labels: the number of units in the output layer # lambd: regularization factor # Setup some useful variables m = X.shape[0] num_layers = len(layers) # Roll Params # The parameters for the neural network are "unrolled" into the vector # nn_params and need to be converted back into the weight matrices. Theta = roll_params(nn_weights, layers) # You need to return the following variables correctly Theta_grad = [zeros(w.shape) for w in Theta] # ================================ DONE ================================ # The vector y passed into the function is a vector of labels # containing values from 1..K. You need to map this vector into a # binary vector of 1's and 0's to be used with the neural network # cost function. yv = zeros((m, num_labels)) for i in range(m): yv[i][y[i]] += 1 # ================================ DONE ================================ # In this point implement the backpropagation algorithm # In this point calculate the cost of the neural network (feedforward) # Step 1: Initialization of useful variables # Z and A will store the hidden states of the network, as lists of matrices, of size num_layers A = [addColumnOne(X)] Z = [addColumnOne(X)] # delta will store the delta for each layer from the last to the second layer (in reverse order) delta = [] # Step 2: Feedforward for i in range(num_layers-1): h = A[i].dot(Theta[i].T) Z.append(h) h = addColumnOne(sigmoid(h)) A.append(h) # Step 3: Backpropagation d = removeFirstColumn(A[-1]) - yv delta.append(d) for i in range(num_layers-2, 0, -1): d = removeFirstColumn(d.dot(Theta[i])) * sigmoidGradient(Z[i]) delta.append(d) delta.reverse() # delta is of size num_layers-1 (no delta for the input layer) for i in range(num_layers-1): Theta_grad[i] += delta[i].T.dot(A[i]) # DONE: no regularization on the bias weights !! Theta_grad[i] += lambd * Theta[i] for j in range(Theta[i].shape[0]): Theta_grad[i][j, 0] -= lambd * Theta[i][j, 0] Theta_grad[i] /= m # Unroll Params Theta_grad = unroll_params(Theta_grad) return Theta_grad