Beispiel #1
0
def finalTest(size_training, size_test, hidden_layers, lambd, num_iterations):
    print "\nBeginning of the finalTest... \n"

    images_training, labels_training, images_test, labels_test = read_dataset(size_training, size_test)
    # Setup the parameters you will use for this exercise
    input_layer_size = 784        # 28x28 Input Images of Digits
    num_labels = 10         # 10 labels, from 0 to 9 (one label for each digit)
    layers = [input_layer_size] + hidden_layers + [num_labels]
    num_of_hidden_layers = len(hidden_layers)
    # Fill the randInitializeWeights.py in order to initialize the neural network weights.
    Theta = randInitializeWeights(layers)

    # Unroll parameters
    nn_weights = unroll_params(Theta)
    res = fmin_l_bfgs_b(costFunction, nn_weights, fprime=backwards, args=(layers, images_training, labels_training, num_labels, lambd), maxfun = num_iterations, factr = 1., disp = True)
    Theta = roll_params(res[0], layers)

    print "\nTesting Neural Network... \n"

    pred_training = predict(Theta, images_training)
    print '\nAccuracy on training set: ' + str(mean(labels_training == pred_training) * 100)

    pred = predict(Theta, images_test)
    print '\nAccuracy on test set: ' + str(mean(labels_test == pred) * 100)

    # Display the images where the algorithm got wrong
    temp = (labels_test == pred)
    indexes_false = []
    for i in range(size_test):
        if temp[i] == 0:
            indexes_false.append(i)

    displayData(images_training[indexes_false, :])
Beispiel #2
0
def backwards(nn_weights, layers, X, y, num_labels, lambd):
    # Computes the gradient fo the neural network.
    # nn_weights: Neural network parameters (vector)
    # layers: a list with the number of units per layer.
    # X: a matrix where every row is a training example for a handwritten digit image
    # y: a vector with the labels of each instance
    # num_labels: the number of units in the output layer
    # lambd: regularization factor

    # Setup some useful variables
    m = X.shape[0]
    num_layers = len(layers)

    # Roll Params
    # The parameters for the neural network are "unrolled" into the vector
    # nn_params and need to be converted back into the weight matrices.
    Theta = roll_params(nn_weights, layers)

    # You need to return the following variables correctly
    Theta_grad = [zeros(w.shape) for w in Theta]

    # ================================ TODO ================================
    # The vector y passed into the function is a vector of labels
    # containing values from 1..K. You need to map this vector into a
    # binary vector of 1's and 0's to be used with the neural network
    # cost function.
    yv = zeros((num_labels, m))
    for i in range(m):
        yv[y[i]][i] = 1


    # ================================ TODO ================================
    # In this point implement the backpropagaition algorithm
    a = [[] for i in range(num_layers)]
    z = [[] for i in range(num_layers)]
    delta=[[] for i in range(num_layers)]
    for t in range(m):
        a[0] = X[t]
        for i in range(0, num_layers - 1):
            a[i] = insert(a[i], 0, 1)
            z[i] = Theta[i].dot(transpose(a[i]))
            a[i + 1] = sigmoid(z[i])
        delta[-1] = a[-1] - yv[:,t]
        for i in range(num_layers - 1, 0, -1):
            if i > 1:
                delta[i - 1] = (transpose(Theta[i-1][:, 1:]).dot(delta[i])) * sigmoidGradient(z[i - 2]) #because z[0] corresponds to z2

        for i in range(0, num_layers - 1):
            Theta_grad[i] += atleast_2d(delta[i+1]).T.dot(atleast_2d(a[i]))

    # regularization
    for l in range(0, num_layers - 1):
        for i in range(Theta[l].shape[0]):
            for j in range(1, Theta[l].shape[1]):
                Theta_grad[l][i][j] += lambd * Theta[l][i][j]

    # Unroll Params
    Theta_grad = unroll_params(Theta_grad)

    return Theta_grad/m
Beispiel #3
0
def costFunction(nn_weights, layers, X, y, num_labels, lambd):
    # Computes the cost function of the neural network.
    # nn_weights: Neural network parameters (vector)
    # layers: a list with the number of units per layer.
    # X: a matrix where every row is a training example for a handwritten digit image
    # y: a vector with the labels of each instance
    # num_labels: the number of units in the output layer
    # lambd: regularization factor
    
    # Setup some useful variables
    m = X.shape[0]
    num_layers = len(layers)


    # Unroll Params
    Theta = roll_params(nn_weights, layers)
    
    J = 0;
    
    yv = np.zeros((num_labels, m))
    for i in range(m):
        yv[y[i]][i] = 1
  

    # Cost of the neural network (feedforward)
    
    # Activation of the k-th layer for the i-th example
    def scores_layer(i, k):
        # k = 0 for the input layer
        # k = l for the l-th hidden layer
        
        x_vect = np.append([1], X[i, :]) # insert 1 at the beginning of the input image 
        
        if k == 0:
            return sigmoid(np.dot(Theta[0], x_vect))
         
        # Insert 1 at the beginning of the activation of the previous layer
        res_with_bias = np.append([1], scores_layer(i, k-1))
        return sigmoid(np.dot(Theta[k], res_with_bias))
      
    # Cost function for the i-th example          
    def cost_i(i):
        activation_layer = scores_layer(i, num_layers - 2) # output: activation of the outer layer
        y_i = yv[:, i]
        return (-y_i * np.log(activation_layer) - (1 - y_i) * np.log(1 - activation_layer)).sum()
    
    # Total cost J
    for i in range(m):
        J += cost_i(i)
    J /= m
    
    # Regularization
    coeff_reg = lambd / (2 * m)
    # Loop on the weight matrixes
    for h in range(num_layers - 1):
        sub_weights = Theta[h][:, 1:] # the terms corresponding to the bias factors are not regularized
        J += coeff_reg * (sub_weights * sub_weights).sum()
        
        
    return J
Beispiel #4
0
def backwards(nn_weights, layers, X, y, num_labels, lambd):
    # Computes the gradient fo the neural network.
    # nn_weights: Neural network parameters (vector)
    # layers: a list with the number of units per layer.
    # X: a matrix where every row is a training example for a handwritten digit image
    # y: a vector with the labels of each instance
    # num_labels: the number of units in the output layer
    # lambd: regularization factor

    # Setup some useful variables
    m = X.shape[0]
    num_layers = len(layers)

    # Roll Params
    # The parameters for the neural network are "unrolled" into the vector
    # nn_params and need to be converted back into the weight matrices.
    Theta = roll_params(nn_weights, layers)

    # You need to return the following variables correctly
    Theta_grad = [zeros(w.shape) for w in Theta]

    # ================================ TODO ================================
    # The vector y passed into the function is a vector of labels
    # containing values from 1..K. You need to map this vector into a
    # binary vector of 1's and 0's to be used with the neural network
    # cost function.
    yv = zeros((num_labels, m))

    # ================================ TODO ================================
    # In this point implement the backpropagaition algorithm

    # Unroll Params
    Theta_grad = unroll_params(Theta_grad)

    return Theta_grad
Beispiel #5
0
def cross_validation(lambd_values=[0.1], maxfun_values=[200]):
    """Function that trains the neural network and then tests its accuracy
    Parameters : lambd, which measures the coefficient of the regularization
                 maxfun, which counts the number of iterations of the backpropagation
    """

    n_lambd, n_maxfun = len(lambd_values), len(maxfun_values)

    # Creation of the DataFrame where the results are to be stored
    df_results = pd.DataFrame(index=range(n_lambd * n_maxfun))
    df_results['Maxfun'], df_results['Lambd'] = list(
        maxfun_values) * n_lambd, list(lambd_values) * n_maxfun
    df_results['Hidden layers'] = num_of_hidden_layers
    nodes_avg = np.mean(layers[1:-1])
    df_results['Nodes per hidden layer (avg)'] = nodes_avg
    accuracy_col = []

    for lambd in lambd_values:

        for maxfun in maxfun_values:

            start = time()  # start of the timer

            res = opt.fmin_l_bfgs_b(costFunction,
                                    nn_weights,
                                    fprime=backwards,
                                    args=(layers, images_validation,
                                          labels_training, num_labels, lambd),
                                    maxfun=maxfun,
                                    factr=1.,
                                    disp=True)
            Theta = roll_params(res[0], layers)

            # input('\nProgram paused. Press enter to continue!!!')

            # print("\nTesting Neural Network... \n")

            pred = predict(Theta, images_test)
            end = time()  # end of the timer
            accuracy = np.mean(labels_test == pred) * 100
            print('\nLambda =', lambd)
            print('Maxfun =', maxfun)
            time_complexity = end - start
            print('Time:', time_complexity, 'seconds')
            print('Accuracy =', accuracy, '%')

            # Modification of the 'Accuracy' column
            accuracy_col.append(accuracy)

    # Accuracy values stored into the dataframe
    df_results['Accuracy'] = accuracy_col

    return df_results
Beispiel #6
0
def costFunction(nn_weights, layers, X, y, num_labels, lambd):
    # Computes the cost function of the neural network.
    # nn_weights: Neural network parameters (vector)
    # layers: a list with the number of units per layer.
    # X: a matrix where every row is a training example for a handwritten digit image
    # y: a vector with the labels of each instance
    # num_labels: the number of units in the output layer
    # lambd: regularization factor
    
    # Setup some useful variables
    m = X.shape[0]
    num_layers = len(layers)

    # Unroll Params
    Theta = roll_params(nn_weights, layers)
    
    # The vector y passed into the function is a vector of labels
    # containing values from 1..K. You need to map this vector into a 
    # binary vector of 1's and 0's to be used with the neural network
    # cost function.
    yv = zeros((num_labels, m))
    for i in range(m):
	yv[y[i],i] = 1

    # In this point calculate the cost of the neural network (feedforward)
    # a: the result obtained after each layer
    a = ones(X.shape[0])
    a = vstack((a,X.transpose()))
    for i in range(num_layers-1):
	z = dot(Theta[i],a)
	a = sigmoid(z)
	if i != num_layers-2:
	    a = vstack((ones(a.shape[1]),a))
    #h: final result
    h = a.transpose()
	
    #calculate of the cost J
    J = 0
    for i in range(m):
	for k in range(num_labels):
	   J = J + (-yv[k,i] * log(h[i][k]) - (1-yv[k,i]) * log(1.0 - h[i][k]))
    J = J/m;
    
    #regularization
    tmp = 0
    for i in range(num_layers-1):
    	for j in range(Theta[i].shape[0]):
	    for k in range(1,Theta[i].shape[1]):
		tmp = tmp + Theta[i][j][k] * Theta[i][j][k]
    J = J + tmp * lambd/(2.0*m) 
    
    return J
Beispiel #7
0
def costFunction(nn_weights, layers, X, y, num_labels, lambd):
    # Computes the cost function of the neural network.
    # nn_weights: Neural network parameters (vector)
    # layers: a list with the number of units per layer.
    # X: a matrix where every row is a training example for a handwritten digit image
    # y: a vector with the labels of each instance
    # num_labels: the number of units in the output layer
    # lambd: regularization factor

    # Setup some useful variables
    m = X.shape[0]
    num_layers = len(layers)

    # Unroll Params
    Theta = roll_params(nn_weights, layers)

    # You need to return the following variables correctly
    J = 0;

    # ================================ TODO ================================
    # The vector y passed into the function is a vector of labels
    # containing values from 1..K. You need to map this vector into a
    # binary vector of 1's and 0's to be used with the neural network
    # cost function.
    yv = zeros((num_labels, m))
    for i in range(m):
        yv[y[i]][i] = 1


    # ================================ TODO ================================
    # In this point calculate the cost of the neural network (feedforward)

    for i in range(m):
        layer_output = layerOutput(Theta, X, num_layers, i)
        for j in range(num_labels):
            cost = -yv[j, i] * log(layer_output[j])
            cost -= (1 - yv[j, i]) * log(1 - layer_output[j])
            J += cost
    J /= m

    # Regularization
    regulation_term = 0
    for i in range(len(Theta)):
        for j in range(Theta[i].shape[0]):
            for k in range(1, Theta[i].shape[1]):
                regulation_term += (Theta[i][j][k]) ** 2
    J += lambd / m * regulation_term / (num_layers - 1 )

    return J
def costFunction(nn_weights, layers, X, y, num_labels, lambd):
    # Computes the cost function of the neural network.
    # nn_weights: Neural network parameters (vector)
    # layers: a list with the number of units per layer.
    # X: a matrix where every row is a training example for a handwritten digit image
    # y: a vector with the labels of each instance
    # num_labels: the number of units in the output layer
    # lambd: regularization factor
    
    # Setup some useful variables
    m = X.shape[0]
    num_layers = len(layers)

    # Unroll Params
    Theta = roll_params(nn_weights, layers)

    # ================================ TODO ================================
    # The vector y passed into the function is a vector of labels
    # containing values from 1..K. You need to map this vector into a 
    # binary vector of 1's and 0's to be used with the neural network
    # cost function.
    yv = np.zeros((num_labels, m))
    for i in range(len(y)):
        yv[int(y[i]), i] = 1
    yv = np.transpose(yv)

    # ================================ TODO ================================
    # In this point calculate the cost of the neural network (feedforward)
    x = np.copy(X)

    for i in range(num_layers - 1):
        s = np.shape(Theta[i])
        theta = Theta[i][:, 1:s[1]]
        x = np.dot(x, np.transpose(theta))
        x = x + Theta[i][:, 0]
        x = sigmoid(x)

    cost = (yv * np.log(x) + (1 - yv) * np.log(1 - x)) / m
    cost = -np.sum(cost)

    somme = 0

    for i in range(num_layers - 1):
        somme += lambd * np.sum(Theta[i] ** 2) / (2 * m)

    cost += somme

    return cost
Beispiel #9
0
def costFunction(nn_weights, layers, X, y, num_labels, lambd):
    # Computes the cost function of the neural network.
    # nn_weights: Neural network parameters (vector)
    # layers: a list with the number of units per layer.
    # X: a matrix where every row is a training example for a handwritten digit image
    # y: a vector with the labels of each instance
    # num_labels: the number of units in the output layer
    # lambd: regularization factor
    
    # Setup some useful variables
    m = X.shape[0]
    num_layers = len(layers)

    # Unroll Params
    Theta = roll_params(nn_weights, layers)
    
    # You need to return the following variables correctly 
    J = 0
    
    # ================================ TODO ================================
    # The vector y passed into the function is a vector of labels
    # containing values from 1..K. You need to map this vector into a 
    # binary vector of 1's and 0's to be used with the neural network
    # cost function.
    yv = zeros((num_labels, m))
    for i in range(m):
        yv[y[i],i] = 1.0

    # ================================ TODO ================================
    # In this point calculate the cost of the neural network (feedforward)
    activation = transpose(concatenate(ones(m,1),X),axis=1)
    activations = [activation]
    for i in range(num_layers-1):
        z = dot(Theta[i],activation)
        zs.append(z)
        if i == (num_layers-1):
            activation = sigmoid(z)
        else:
            activation = concatenate((ones(1,m),sigmoid(z)), axis = 0)

        activations.append(activation)
    J = (1.0/m)*(sum(-1*yv*log(activations[-1]) - 
        (1 - yv) * log(1 - activations[-1])))

    return J
Beispiel #10
0
def backwards(nn_weights, layers, X, y, num_labels, lambd):
    # Computes the gradient fo the neural network.
    # nn_weights: Neural network parameters (vector)
    # layers: a list with the number of units per layer.
    # X: a matrix where every row is a training example for a handwritten digit image
    # y: a vector with the labels of each instance
    # num_labels: the number of units in the output layer
    # lambd: regularization factor
    
    # Setup some useful variables
    m = X.shape[0]
    num_layers = len(layers)

    # Roll Params
    # The parameters for the neural network are "unrolled" into the vector
    # nn_params and need to be converted back into the weight matrices.
    Theta = roll_params(nn_weights, layers)
  
    # You need to return the following variables correctly 
    Theta_grad = [zeros(w.shape) for w in Theta]

    # The vector y passed into the function is a vector of labels
    # containing values from 1..K. You need to map this vector into a 
    # binary vector of 1's and 0's to be used with the neural network
    # cost function.
    yv = zeros((num_labels, m))
    for i in range(m):
	yv[y[i],i] = 1

    # In this point implement the backpropagaition algorithm 
    A = []
    a = ones(X.shape[0])
    a = vstack((a,X.transpose()))
    Z = []
    Z.append(a)
    for i in range(num_layers-1):
        A.append(a.transpose())
	z = dot(Theta[i],a)
	Z.append(z)
	a = sigmoid(z)
	if i != num_layers-2:
	    a = vstack((ones(a.shape[1]),a))  
  
    # A: list of result after each layer
    A.append(a.transpose())
    h = a.transpose()

    # delta for the last layer
    delta = h - yv.transpose()
    # calculate of gradients
    for j in range(num_layers-2,0,-1):
	Theta_grad[j] = Theta_grad[j] + dot(delta.transpose(),A[j])
	# calculate of delta for current layer(have to remove the first column of Theta)
	tmp = dot(Theta[j][:,1:].transpose(),delta.transpose())
	tmp = tmp.transpose()
	tmp_matrix = zeros(tmp.shape)
	for i in range(m):
	    tmp_matrix[i] = sigmoidGradient(Z[j].transpose()[i])
	delta = tmp_matrix * tmp
    Theta_grad[0] = Theta_grad[0] + dot(delta.transpose(),A[0])

    
    # regularization
    for i in range(num_layers-1):
	for j in range((Theta_grad[i].shape)[0]):
	    for k in range((Theta_grad[i].shape)[1]):
		Theta_grad[i][j,k] = Theta_grad[i][j,k]/m
		if k >=1:
			Theta_grad[i][j,k] = Theta_grad[i][j,k] + lambd/m*Theta[i][j,k]
    # Unroll Params
    Theta_grad = unroll_params(Theta_grad)

    return Theta_grad
Beispiel #11
0
def backwards(nn_weights, layers, X, y, num_labels, lambd):
    # Computes the gradient fo the neural network.
    # nn_weights: Neural network parameters (vector)
    # layers: a list with the number of units per layer.
    # X: a matrix where every row is a training example for a handwritten digit image
    # y: a vector with the labels of each instance
    # num_labels: the number of units in the output layer
    # lambd: regularization factor

    # Setup some useful variables
    m = X.shape[0]
    num_layers = len(layers)

    # Roll Params
    # The parameters for the neural network are "unrolled" into the vector
    # nn_params and need to be converted back into the weight matrices.
    Theta = roll_params(nn_weights, layers)

    # You need to return the following variables correctly
    Theta_grad = [zeros(w.shape) for w in Theta]

    # ================================ DONE ================================
    # The vector y passed into the function is a vector of labels
    # containing values from 1..K. You need to map this vector into a
    # binary vector of 1's and 0's to be used with the neural network
    # cost function.
    yv = zeros((m, num_labels))
    for i in range(m):
        yv[i][y[i]] += 1

    # ================================ DONE ================================
    # In this point implement the backpropagation algorithm

    # In this point calculate the cost of the neural network (feedforward)

    # Step 1: Initialization of useful variables

    # Z and A will store the hidden states of the network, as lists of matrices, of size num_layers
    A = [addColumnOne(X)]
    Z = [addColumnOne(X)]

    # delta will store the delta for each layer from the last to the second layer (in reverse order)
    delta = []

    # Step 2: Feedforward
    for i in range(num_layers - 1):
        h = A[i].dot(Theta[i].T)
        Z.append(h)
        h = addColumnOne(sigmoid(h))
        A.append(h)

    # Step 3: Backpropagation
    d = removeFirstColumn(A[-1]) - yv
    delta.append(d)

    for i in range(num_layers - 2, 0, -1):
        d = removeFirstColumn(d.dot(Theta[i])) * sigmoidGradient(Z[i])
        delta.append(d)

    delta.reverse()
    # delta is of size num_layers-1 (no delta for the input layer)

    for i in range(num_layers - 1):
        Theta_grad[i] += delta[i].T.dot(A[i])
        # DONE: no regularization on the bias weights !!
        Theta_grad[i] += lambd * Theta[i]
        for j in range(Theta[i].shape[0]):
            Theta_grad[i][j, 0] -= lambd * Theta[i][j, 0]
        Theta_grad[i] /= m

    # Unroll Params
    Theta_grad = unroll_params(Theta_grad)

    return Theta_grad
Beispiel #12
0
def backwards(nn_weights, layers, X, y, num_labels, lambd):
    # Computes the gradient fo the neural network.
    # nn_weights: Neural network parameters (vector)
    # layers: a list with the number of units per layer.
    # X: a matrix where every row is a training example for a handwritten digit image
    # y: a vector with the labels of each instance
    # num_labels: the number of units in the output layer
    # lambd: regularization factor

    # Setup some useful variables
    m = X.shape[0]
    num_layers = len(layers)

    # Roll Params
    # The parameters for the neural network are "unrolled" into the vector
    # nn_params and need to be converted back into the weight matrices.
    Theta = roll_params(nn_weights, layers)

    Theta_grad = [np.zeros(w.shape) for w in Theta]

    yv = np.zeros((num_labels, m))
    for i in range(m):
        yv[y[i]][i] = 1

    # Implementation of the backpropagation algorithm

    for i in range(m):

        a_values, z_values = [], [
        ]  # arrays where the values of the activations are to be stored

        a = np.append([1], X[i, :])
        a_values.append(a)

        # Loop of the feedforward algorithm
        for k in range(num_layers - 1):
            z = np.dot(Theta[k], a)
            z_values.append(z)
            a = np.append([1], sigmoid(z))
            a_values.append(a)

        delta_layer = a[1:] - yv[:, i]  # error array of the outer layer
        # np.outer to calculate the matrix product of delta_layer.T and a_values[-2]
        Theta_grad[-1] += np.outer(delta_layer, a_values[-2]) / m

        # Descending loop
        for h in range(num_layers - 2):
            # Error of the (num_layers - 2 - h)-th hidden layer
            # The error that corresponds to the bias factors is not taken into account
            delta_layer = np.dot(Theta[-1 - h].T,
                                 delta_layer)[1:] * sigmoidGradient(
                                     z_values[-2 - h])
            # Calculation of the gradient
            Theta_grad[-2 - h] += np.outer(delta_layer, a_values[-3 - h]) / m

    #Regularization
    for h in range(num_layers - 1):
        # The terms corresponding to the bias factors are not regularized
        Theta_grad[h][:, 1:] += lambd * Theta[h][:, 1:] / m

    # Unroll Params
    Theta_grad = unroll_params(Theta_grad)

    return Theta_grad
Beispiel #13
0
def costFunction(nn_weights, layers, X, y, num_labels, lambd):
    # Computes the cost function of the neural network.
    # nn_weights: Neural network parameters (vector)
    # layers: a list with the number of units per layer.
    # X: a matrix where every row is a training example for a handwritten digit image
    # y: a vector with the labels of each instance
    # num_labels: the number of units in the output layer
    # lambd: regularization factor
    
    # Setup some useful variables
    m = X.shape[0]
    num_layers = len(layers)

    # Unroll Params
    Theta = roll_params(nn_weights, layers)
    
    # You need to return the following variables correctly 
    J = 0
    
    # ================================ DONE ================================
    # The vector y passed into the function is a vector of labels
    # containing values from 1..K. You need to map this vector into a 
    # binary vector of 1's and 0's to be used with the neural network
    # cost function.
    yv = np.zeros((m, num_labels))
    for i in range(m):
        yv[i][y[i]] += 1
  

    # ================================ DONE ================================
    # In this point calculate the cost of the neural network (feedforward)

    # Step 1: Initialization of useful variables

    # H will store the hidden states of the network, H is a list of matrices, of size num_layers
    H = [X]

    # Step 2: Feedforward

    for i in range(num_layers-1):
        h = sigmoid(addColumnOne(H[i]).dot(Theta[i].T))
        H.append(h)

    # The end layer is H[num_layers]
    yv_pred = H[num_layers-1]

    # Step 3: Compute cost
    # We create the variable S, a matrix of size (m, K) which we will sum afterwards
    S = np.zeros((m, num_labels))
    temp = np.log(yv_pred)
    temp = yv*temp

    temp2 = np.log(1.0-yv_pred)
    temp2 = (1.0-yv)*temp2

    S += - temp - temp2

    J += np.sum(S)
    J = J/m

    reg = 0
    for i in range(num_layers-1):
        # No regularization on the bias weights
        reg += np.sum(removeFirstColumn(Theta[i])**2)

    J += lambd * reg / (2.0 * m)
    return J
Beispiel #14
0
def costFunction(nn_weights, layers, X, y, num_labels, lambd):
    # Computes the cost function of the neural network.
    # nn_weights: Neural network parameters (vector)
    # layers: a list with the number of units per layer.
    # X: a matrix where every row is a training example for a handwritten digit image
    # y: a vector with the labels of each instance
    # num_labels: the number of units in the output layer
    # lambd: regularization factor

    # Setup some useful variables
    m = X.shape[0]
    num_layers = len(layers)

    # Unroll Params
    Theta = roll_params(nn_weights, layers)

    # You need to return the following variables correctly
    J = 0

    # ================================ DONE ================================
    # The vector y passed into the function is a vector of labels
    # containing values from 1..K. You need to map this vector into a
    # binary vector of 1's and 0's to be used with the neural network
    # cost function.
    yv = np.zeros((m, num_labels))
    for i in range(m):
        yv[i][y[i]] += 1

    # ================================ DONE ================================
    # In this point calculate the cost of the neural network (feedforward)

    # Step 1: Initialization of useful variables

    # H will store the hidden states of the network, H is a list of matrices, of size num_layers
    H = [X]

    # Step 2: Feedforward

    for i in range(num_layers - 1):
        h = sigmoid(addColumnOne(H[i]).dot(Theta[i].T))
        H.append(h)

    # The end layer is H[num_layers]
    yv_pred = H[num_layers - 1]

    # Step 3: Compute cost
    # We create the variable S, a matrix of size (m, K) which we will sum afterwards
    S = np.zeros((m, num_labels))
    temp = np.log(yv_pred)
    temp = yv * temp

    temp2 = np.log(1.0 - yv_pred)
    temp2 = (1.0 - yv) * temp2

    S += -temp - temp2

    J += np.sum(S)
    J = J / m

    reg = 0
    for i in range(num_layers - 1):
        # No regularization on the bias weights
        reg += np.sum(removeFirstColumn(Theta[i])**2)

    J += lambd * reg / (2.0 * m)
    return J
def backwards(nn_weights, layers, X, y, num_labels, lambd):
    """
    :param nn_weights: Neural network parameters (vector)
    :param layers: a list with the number of units per layer.
    :param X: a matrix where every row is a training example for a handwritten digit image
    :param y: a vector with the labels of each instance
    :param num_labels: the number of units in the output layer
    :param lambd: regularization factor
    :return: Computes the gradient fo the neural network.
    """

    # Setup some useful variables
    m = X.shape[0]
    num_layers = len(layers)

    # Roll Params
    # The parameters for the neural network are "unrolled" into the vector
    # nn_params and need to be converted back into the weight matrices.
    Theta = roll_params(nn_weights, layers)

    # The vector y passed into the function is a vector of labels
    # containing values from 1..K. You need to map this vector into a
    # binary vector of 1's and 0's to be used with the neural network
    # cost function.
    yv = np.zeros((num_labels, m))
    for i in range(len(y)):
        yv[int(y[i]), i] = 1
    yv = np.transpose(yv)

    a = []
    z = []
    x = np.copy(X)
    a.append(insertOne(x))
    z.append(x)

    # if you want to be able to follow the training accuracy:
    # pred = predict(Theta, X)
    # accuracy = np.mean(y == pred) * 100
    # print(accuracy)

    for i in range(num_layers - 1):

        s = np.shape(Theta[i])
        theta = Theta[i][:, 1:s[1]]
        x = np.dot(x, np.transpose(theta))
        x = x + Theta[i][:, 0]
        z.append(x)
        x = sigmoid(x)
        a.append(insertOne(x))

    delta = [np.zeros(w.shape) for w in z]
    delta[num_layers - 1] = (x - yv)

    for i in range(num_layers - 2, 0, -1):
        s = np.shape(Theta[i])
        theta = np.copy(Theta[i][:, 1:s[1]])
        temp = np.dot(np.transpose(theta), np.transpose(delta[i + 1]))
        delta[i] = np.transpose(temp) * sigmoidGradient(z[i])

    Delta = []
    for i in range(num_layers - 1):
        temp = np.dot(np.transpose(delta[i + 1]), a[i])
        Delta.append(temp)

    # if you want to follow the cost during the training:
    # cost = (yv * np.log(x) + (1 - yv) * np.log(1 - x)) / m
    # cost = -np.sum(cost)
    #
    # somme = 0
    #
    # for i in range(num_layers - 1):
    #     somme += lambd * np.sum(Theta[i] ** 2) / (2 * m)
    #
    # cost += somme

    Theta_grad = [(d / m) for d in Delta]

    i = 0
    for t in Theta:
        current = lambd * t / m
        # d'après le poly il faudrait qu'il y ait cette ligne
        # mais après quand on son checkNNGradient il vaut mieux enlever
        # cette ligne donc je ne sais pas ...:
        # current[:, 0] = current[:, 0]*0
        Theta_grad[i] += current
        i += 1

    # Unroll Params
    Theta_grad = unroll_params(Theta_grad)

    return Theta_grad
Beispiel #16
0
checkNNGradients(lambd)
input('\nProgram paused. Press enter to continue!!!')


# ================================ Step 8: Implement Backpropagation with Regularization ================================

print("\nChecking Backpropagation with Regularization ...\n")

lambd = 3.0
checkNNGradients(lambd)

input('\nProgram paused. Press enter to continue!!!')


# ================================ Step 9: Training Neural Networks & Prediction ================================
print("\nTraining Neural Network... \n")

#  You should also try different values of the regularization factor
lambd = 3.0

res = fmin_l_bfgs_b(costFunction, nn_weights, fprime = backwards, args = (layers,  images_training, labels_training, num_labels, 1.0), maxfun = 50, factr = 1., disp = True)
Theta = roll_params(res[0], layers)

input('\nrogram paused. Press enter to continue!!!')

print("\nTesting Neural Network... \n")

pred  = predict(Theta, images_test)
print('\nAccuracy: ' + str(mean(labels_test==pred) * 100))

Beispiel #17
0
checkNNGradients(lambd)
raw_input('\nProgram paused. Press enter to continue!!!')


# ================================ Step 8: Implement Backpropagation with Regularization ================================

print "\nChecking Backpropagation with Regularization ...\n"

lambd = 3.0
checkNNGradients(lambd)

raw_input('\nProgram paused. Press enter to continue!!!')


# ================================ Step 9: Training Neural Networks & Prediction ================================
print "\nTraining Neural Network... \n"

#  You should also try different values of the regularization factor
lambd = 3.0

res = fmin_l_bfgs_b(costFunction, nn_weights, fprime=backwards, args=(layers,  images_training, labels_training, num_labels, 1.0), maxfun=50, factr=1., disp=True)
Theta = roll_params(res[0], layers)

raw_input('\nrogram paused. Press enter to continue!!!')

print "\nTesting Neural Network... \n"

pred = predict(Theta, images_test)
print '\nAccuracy: ' + str(mean(labels_test==pred) * 100)

Beispiel #18
0
def backwards(nn_weights, layers, X, y, num_labels, lambd):
    # Computes the gradient fo the neural network.
    # nn_weights: Neural network parameters (vector)
    # layers: a list with the number of units per layer.
    # X: a matrix where every row is a training example for a handwritten digit image
    # y: a vector with the labels of each instance
    # num_labels: the number of units in the output layer
    # lambd: regularization factor
    
    # Setup some useful variables
    m = X.shape[0]
    num_layers = len(layers)

    # Roll Params
    # The parameters for the neural network are "unrolled" into the vector
    # nn_params and need to be converted back into the weight matrices.
    Theta = roll_params(nn_weights, layers)
  
    # You need to return the following variables correctly 
    Theta_grad = [zeros(w.shape) for w in Theta]

    # ================================ DONE ================================
    # The vector y passed into the function is a vector of labels
    # containing values from 1..K. You need to map this vector into a
    # binary vector of 1's and 0's to be used with the neural network
    # cost function.
    yv = zeros((m, num_labels))
    for i in range(m):
        yv[i][y[i]] += 1

    # ================================ DONE ================================
    # In this point implement the backpropagation algorithm

    # In this point calculate the cost of the neural network (feedforward)

    # Step 1: Initialization of useful variables

    # Z and A will store the hidden states of the network, as lists of matrices, of size num_layers
    A = [addColumnOne(X)]
    Z = [addColumnOne(X)]

    # delta will store the delta for each layer from the last to the second layer (in reverse order)
    delta = []

    # Step 2: Feedforward
    for i in range(num_layers-1):
        h = A[i].dot(Theta[i].T)
        Z.append(h)
        h = addColumnOne(sigmoid(h))
        A.append(h)


    # Step 3: Backpropagation
    d = removeFirstColumn(A[-1]) - yv
    delta.append(d)

    for i in range(num_layers-2, 0, -1):
        d = removeFirstColumn(d.dot(Theta[i])) * sigmoidGradient(Z[i])
        delta.append(d)

    delta.reverse()
    # delta is of size num_layers-1 (no delta for the input layer)

    for i in range(num_layers-1):
        Theta_grad[i] += delta[i].T.dot(A[i])
        # DONE: no regularization on the bias weights !!
        Theta_grad[i] += lambd * Theta[i]
        for j in range(Theta[i].shape[0]):
            Theta_grad[i][j, 0] -= lambd * Theta[i][j, 0]
        Theta_grad[i] /= m

    # Unroll Params
    Theta_grad = unroll_params(Theta_grad)

    return Theta_grad