Beispiel #1
0
def output(partId):
    # Random Test Cases
    X = reshape(3 * sin(arange(1, 31, 1)), (3,10), order='F')
    Xm = reshape(sin(arange(1, 33)), (16,2), order='F') / 5
    ym = 1 + arange(1, 17) % 4
    t1 = sin(reshape(arange(1,25,2), (4,3), order='F'))
    t2 = cos(reshape(arange(1,41,2), (4,5), order='F'))
    t = hstack([t1.ravel('F'), t2.ravel('F')])
    if partId == '1':
        J, _ = nnCostFunction(t, 2, 4, 4, Xm, ym, 0)
        return sprintf('%0.5f ', J)
    elif partId == '2':
        J, _ = nnCostFunction(t, 2, 4, 4, Xm, ym, 1.5)
        return sprintf('%0.5f ', J)
    elif partId == '3':
        return sprintf('%0.5f ', sigmoidGradient(X))
    elif partId == '4':
        J, grad = nnCostFunction(t, 2, 4, 4, Xm, ym, 0)
        out = sprintf('%0.5f ', J)
        return out + sprintf('%0.5f ', grad)
    elif partId == '5':
        J, grad = nnCostFunction(t, 2, 4, 4, Xm, ym, 1.5)
        out = sprintf('%0.5f ', J)
        return out + sprintf('%0.5f ', grad)
Beispiel #2
0
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels,
                   X, y, lambda_):
    """ computes the cost and gradient of the neural network. The
        parameters for the neural network are "unrolled" into the vector
        nn_params and need to be converted back into the weight matrices.

        The returned parameter grad should be a "unrolled" vector of the
        partial derivatives of the neural network.
    """

    # Reshape nn_params back into the parameters Theta1 and Theta2,
    # the weight matrices for our 2 layer neural network
    # Obtain Theta1 and Theta2 back from nn_params
    Theta1 = nn_params[:hidden_layer_size * (input_layer_size + 1)].reshape(
        hidden_layer_size, input_layer_size + 1, order='F')  # (25, 401)
    Theta2 = nn_params[hidden_layer_size * (input_layer_size + 1):].reshape(
        num_labels, hidden_layer_size + 1, order='F')  # (10, 26)

    # Setup some useful variables
    m = len(X)
    y = pd.get_dummies(y).as_matrix()

    # ====================== YOUR CODE HERE ======================
    # Instructions: You should complete the code by working through the
    #               following parts.
    #
    # Part 1: Feedforward the neural network and return the cost in the
    #         variable J. After implementing Part 1, you can verify that your
    #         cost function computation is correct by verifying the cost
    #         computed in ex4.m
    #
    # Part 2: Implement the backpropagation algorithm to compute the gradients
    #         Theta1_grad and Theta2_grad. You should return the partial
    #         derivatives of the cost function with respect to Theta1 and
    #         Theta2 in Theta1_grad and Theta2_grad, respectively.
    #         After implementing Part 2, you can check that
    #         your implementation is correct by running checkNNGradients
    #
    #         Note: The vector y passed into the function is a vector of labels
    #               containing values from 1..K. You need to map this vector
    #               into a binary vector of 1's and 0's to be used with
    #               the neural network cost function.
    #
    #         Hint: We recommend implementing backpropagation using a for-loop
    #               over the training examples if you are implementing it
    #               for the first time.
    #
    # Part 3: Implement regularization with the cost function and gradients.
    #
    #         Hint: You can implement this around the code for backpropagation.
    #               That is, you can compute the gradients
    #               for the regularization separately and then add them
    #               to Theta1_grad and Theta2_grad from Part 2.
    #

    # Feedforward the neural network...
    a1 = np.c_[np.ones(m), X]  # (5000, 401)

    z2 = a1 @ Theta1.T  # (5000, 401) @ (401, 25) = (5000, 25)
    a2 = np.c_[np.ones(len(z2)), sigmoid(z2)]  # (5000, 26)

    z3 = a2 @ Theta2.T  # (5000, 26) @ (26, 10) = (5000, 10)
    a3 = sigmoid(z3)  # (5000, 10)

    # Computing cost...
    J = -np.mean(np.sum(y * np.log(a3) + (1 - y) * np.log(1 - a3), axis=1))

    # Computing regularized cost...
    J += lambda_ * (sum(np.sum(np.square(Theta1[:, 1:]), axis=1)) +
                    sum(np.sum(np.square(Theta2[:, 1:]), axis=1))) / (2 * m)

    # Computing δ(del) and ∆(delta)...
    del3 = a3 - y  # (5000, 10)
    delta2 = del3.T @ a2  # (10, 26)

    del2 = del3 @ Theta2 * sigmoidGradient(np.c_[np.ones(len(z2)), z2])
    delta1 = del2[:, 1:].T @ a1  # (25, 401)

    # Computing gradient...
    Theta1_grad = delta1 / m
    Theta2_grad = delta2 / m

    # Computing regularized gradient...
    Theta1_grad += lambda_ * np.c_[np.zeros(len(Theta1)), Theta1[:, 1:]] / m
    Theta2_grad += lambda_ * np.c_[np.zeros(len(Theta2)), Theta2[:, 1:]] / m
    # -------------------------------------------------------------

    # =========================================================================

    # Unroll gradient
    grad = np.r_[Theta1_grad.flatten(order='F'),
                 Theta2_grad.flatten(order='F')]

    return J, grad
Beispiel #3
0
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, \
 num_labels, X, y, lambda_reg):
    #NNCOSTFUNCTION Implements the neural network cost function for a two layer
    #neural network which performs classification
    #   [J grad] = NNCOSTFUNCTON(nn_params, hidden_layer_size, num_labels, ...
    #   X, y, lambda) computes the cost and gradient of the neural network. The
    #   parameters for the neural network are "unrolled" into the vector
    #   nn_params and need to be converted back into the weight matrices.
    #
    #   The returned parameter grad should be a "unrolled" vector of the
    #   partial derivatives of the neural network.

    # Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices
    # for our 2 layer neural network
    Theta1 = np.reshape(nn_params[:hidden_layer_size * (input_layer_size + 1)], \
                     (hidden_layer_size, input_layer_size + 1), order='F')

    Theta2 = np.reshape(nn_params[hidden_layer_size * (input_layer_size + 1):], \
                     (num_labels, hidden_layer_size + 1), order='F')

    # Setup some useful variables
    m = len(X)

    # # You need to return the following variables correctly
    J = 0
    Theta1_grad = np.zeros(Theta1.shape)
    Theta2_grad = np.zeros(Theta2.shape)

    # ====================== YOUR CODE HERE ======================
    # Instructions: You should complete the code by working through the
    #               following parts.
    #
    # Part 1: Feedforward the neural network and return the cost in the
    #         variable J. After implementing Part 1, you can verify that your
    #         cost function computation is correct by verifying the cost
    #         computed in ex4.m
    #
    # Part 2: Implement the backpropagation algorithm to compute the gradients
    #         Theta1_grad and Theta2_grad. You should return the partial derivatives of
    #         the cost function with respect to Theta1 and Theta2 in Theta1_grad and
    #         Theta2_grad, respectively. After implementing Part 2, you can check
    #         that your implementation is correct by running checkNNGradients
    #
    #         Note: The vector y passed into the function is a vector of labels
    #               containing values from 1..K. You need to map this vector into a
    #               binary vector of 1's and 0's to be used with the neural network
    #               cost function.
    #
    #         Hint: We recommend implementing backpropagation using a for-loop
    #               over the training examples if you are implementing it for the
    #               first time.
    #
    # Part 3: Implement regularization with the cost function and gradients.
    #
    #         Hint: You can implement this around the code for
    #               backpropagation. That is, you can compute the gradients for
    #               the regularization separately and then add them to Theta1_grad
    #               and Theta2_grad from Part 2.
    #

    # add column of ones as bias unit from input layer to second layer
    X = np.column_stack((np.ones((m, 1)), X))  # = a1

    # calculate second layer as sigmoid( z2 ) where z2 = Theta1 * a1
    a2 = s.sigmoid(np.dot(X, Theta1.T))

    # add column of ones as bias unit from second layer to third layer
    a2 = np.column_stack((np.ones((a2.shape[0], 1)), a2))

    # calculate third layer as sigmoid ( z3 ) where z3 = Theta2 * a2
    a3 = s.sigmoid(np.dot(a2, Theta2.T))

    #%% COST FUNCTION CALCULATION

    #% NONREGULARIZED COST FUNCTION

    # recode labels as vectors containing only values 0 or 1
    labels = y
    # set y to be matrix of size m x k
    y = np.zeros((m, num_labels))
    # for every label, convert it into vector of 0s and a 1 in the appropriate position
    for i in xrange(m):
        y[i, labels[i] - 1] = 1

    # at this point, both a3 and y are m x k matrices, where m is the number of inputs
    # and k is the number of hypotheses. Given that the cost function is a sum
    # over m and k, loop over m and in each loop, sum over k by doing a sum over the row

    cost = 0
    for i in xrange(m):
        cost += np.sum(y[i] * np.log(a3[i]) + (1 - y[i]) * np.log(1 - a3[i]))

    J = -(1.0 / m) * cost

    #% REGULARIZED COST FUNCTION
    # note that Theta1[:,1:] is necessary given that the first column corresponds to transitions
    # from the bias terms, and we are not regularizing those parameters. Thus, we get rid
    # of the first column.

    sumOfTheta1 = np.sum(np.sum(Theta1[:, 1:]**2))
    sumOfTheta2 = np.sum(np.sum(Theta2[:, 1:]**2))

    J = J + ((lambda_reg / (2.0 * m)) * (sumOfTheta1 + sumOfTheta2))

    #%% BACKPROPAGATION

    bigDelta1 = 0
    bigDelta2 = 0

    # for each training example
    for t in range(m):

        ## step 1: perform forward pass
        # set lowercase x to the t-th row of X
        x = X[t]
        a2 = s.sigmoid(np.dot(x, Theta1.T))
        a2 = np.concatenate((np.array([1]), a2))
        a3 = s.sigmoid(np.dot(a2, Theta2.T))

        delta3 = np.zeros((num_labels))

        for k in range(num_labels):
            y_k = y[t, k]
            delta3[k] = a3[k] - y_k

        delta2 = (np.dot(Theta2[:, 1:].T, delta3).T) * sg.sigmoidGradient(
            np.dot(x, Theta1.T))

        ## step 4: accumulate gradient from this example
        # accumulation
        # note that
        #   delta2.shape =
        #   x.shape      =
        #   delta3.shape =
        #   a2.shape     =
        # np.dot(delta2,x) and np.dot(delta3,a2) don't do outer product
        # could do e.g. np.dot(delta2[:,None], x[None,:])
        # seems faster to do np.outer(delta2, x)
        # solution from http://stackoverflow.com/a/22950320/583834
        bigDelta1 += np.outer(delta2, x)
        bigDelta2 += np.outer(delta3, a2)

    # step 5: obtain gradient for neural net cost function by dividing the accumulated gradients by m
    Theta1_grad = bigDelta1 / m
    Theta2_grad = bigDelta2 / m

    #% REGULARIZATION FOR GRADIENT
    # only regularize for j >= 1, so skip the first column
    Theta1_grad_unregularized = np.copy(Theta1_grad)
    Theta2_grad_unregularized = np.copy(Theta2_grad)
    Theta1_grad += (float(lambda_reg) / m) * Theta1
    Theta2_grad += (float(lambda_reg) / m) * Theta2
    Theta1_grad[:, 0] = Theta1_grad_unregularized[:, 0]
    Theta2_grad[:, 0] = Theta2_grad_unregularized[:, 0]

    # # -------------------------------------------------------------

    # # =========================================================================

    # Unroll gradients
    grad = np.concatenate((Theta1_grad.reshape(Theta1_grad.size, order='F'),
                           Theta2_grad.reshape(Theta2_grad.size, order='F')))

    return J, grad
Beispiel #4
0
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, Lambda):
    """computes the cost and gradient of the neural network. The
  parameters for the neural network are "unrolled" into the vector
  nn_params and need to be converted back into the weight matrices.

  The returned parameter grad should be a "unrolled" vector of the
  partial derivatives of the neural network.
    """

    # Reshape nn_params back into the parameters theta1 and theta2, the weight matrices
    # for our 2 layer neural network
    # Obtain theta1 and theta2 back from nn_params

    theta1 = nn_params[0:(hidden_layer_size * (input_layer_size + 1))].reshape((input_layer_size + 1),
                                                                               hidden_layer_size).T
    theta2 = nn_params[(hidden_layer_size * (input_layer_size + 1)):].reshape((hidden_layer_size + 1), num_labels).T

    # Setup some useful variables
    m, _ = X.shape

    # You need to return the following variables correctly
    J = 0
    theta1_grad = np.zeros(theta1.shape);
    theta2_grad = np.zeros(theta2.shape);

    # ====================== YOUR CODE HERE ======================
    # Instructions: You should complete the code by working through the
    #               following parts.
    #
    # Part 1: Feedforward the neural network and return the cost in the
    #         variable J. After implementing Part 1, you can verify that your
    #         cost function computation is correct by verifying the cost
    #         computed in ex4.py
    #
    #	NOTE DE GLC: JE VOUS AI MIS LA CORRECTION DES PART 2 ET 3 dans la suite du code
    #   
    #
    # Part 2: Implement the backpropagation algorithm to compute the gradients
    #         theta1_grad and theta2_grad. You should return the partial derivatives of
    #         the cost function with respect to theta1 and theta2 in theta1_grad and
    #         theta2_grad, respectively. After implementing Part 2, you can check
    #         that your implementation is correct by running checkNNGradients
    #
    #         Note: The vector y passed into the function is a vector of labels
    #               containing values from 1...K. You need to map this vector into a 
    #               binary vector of 1's and 0's to be used with the neural network
    #               cost function.
    #
    #         Hint: We recommend implementing backpropagation using a for-loop
    #               over the training examples if you are implementing it for the 
    #               first time.
    #
    # Part 3: Implement regularization with the cost function and gradients.
    #
    #         Hint: You can implement this around the code for
    #               backpropagation. That is, you can compute the gradients for
    #               the regularization separately and then add them to theta1_grad
    #               and theta2_grad from Part 2.
    #
    # =========================================================================

    # Add ones to the X data matrix

    # Construct a 10xm "y" matrix with all zeros and only one "1" entry
    # note here if the hand-written digit is "0", then that corresponds
    # to a y- vector with 1 in the 10th spot (different from what the
    # homework suggests)
    y_matrix = np.zeros((num_labels, m))  # A compléter
    for i in range(m):
        y_matrix[y[i]-1, i] = 1
    a1 = np.ones((X.shape[0], X.shape[1] + 1))
    a1[:, 1:] = X
    z2 = np.dot(theta1, a1.T)
    a2 = np.ones((z2.shape[0] + 1, a1.shape[0]))
    a2[1:, :] = sigmoid(z2)

    z3 = np.dot(theta2, a2)
    a3 = sigmoid(z3).T
    inner1 = (y_matrix.T * np.log(a3))
    inner2 = (1 - y_matrix.T) * np.log(1 - a3)
    J = (1 / m) * np.sum(-inner1 - inner2)

    # Compute Cost

    # =========================================================================

    # Cost regularisation
    reg = (Lambda / (2 * m)) * (np.sum(np.square(theta1[:, 1:])) + np.sum(np.square(theta2[:, 1:])))
    J = J + reg

    # Gradients
    d3 = a3 - y_matrix.T  # 10x5000
    d2 = theta2[:, 1:].T.dot(d3.T) * sigmoidGradient(z2)  # 25x10 *10x5000 * 25x5000 = 25x5000

    delta1 = d2.dot(a1)  # 25x5000 * 5000x401 = 25x401
    delta2 = d3.T.dot(a2.T)  # 10x5000 *5000x26 = 10x26

    # Gradient regularisation
    theta1_grad = delta1 / m
    reg = (theta1[:, 1:] * Lambda) / m
    theta1_grad[:, 1:] = theta1_grad[:, 1:] + reg

    theta2_grad = delta2 / m
    reg = (theta2[:, 1:] * Lambda) / m
    theta2_grad[:, 1:] = theta2_grad[:, 1:] + reg

    # Unroll gradient
    grad = np.hstack((theta1_grad.T.ravel(), theta2_grad.T.ravel()))

    return J, grad
Beispiel #5
0
def nnCostFunction(nn_params,input_layer_size,hidden_layer_size,\
                   num_labels,X,y,lamda):
    import numpy as np
    import sigmoid as sg
    import sub2ind
    import sigmoidGradient as sG
    #restructuring nn_params back to Theta1 and Theta2 python has a o based indexing unlike matlab which has 1 based indexing
    Theta1 = np.reshape(nn_params[0:hidden_layer_size * (input_layer_size + 1)],(hidden_layer_size, (input_layer_size + 1)));
    Theta2 = np.reshape(nn_params[((hidden_layer_size * (input_layer_size + 1))):len(nn_params)],(num_labels, (hidden_layer_size + 1)));
    # Setup some useful variables
    m =np.size(X, 0);
             
    # You need to return the following variables correctly 
    J = 0;
    Theta1_grad = np.zeros(np.size(Theta1),dtype=float);
    Theta2_grad = np.zeros(np.size(Theta2),dtype=float);
    """
    % Part 1: Feedforward the neural network and return the cost in the
    %         variable J. After implementing Part 1, you can verify that your
    %         cost function computation is correct by verifying the cost
    %         computed in ex4.m
    %
    % Part 2: Implement the backpropagation algorithm to compute the gradients
    %         Theta1_grad and Theta2_grad. You should return the partial derivatives of
    %         the cost function with respect to Theta1 and Theta2 in Theta1_grad and
    %         Theta2_grad, respectively. After implementing Part 2, you can check
    %         that your implementation is correct by running checkNNGradients
    %
    %         Note: The vector y passed into the function is a vector of labels
    %               containing values from 1..K. You need to map this vector into a 
    %               binary vector of 1's and 0's to be used with the neural network
    %               cost function.
    %
    %         Hint: We recommend implementing backpropagation using a for-loop
    %               over the training examples if you are implementing it for the 
    %               first time.
    %
    % Part 3: Implement regularization with the cost function and gradients.
    %
    %         Hint: You can implement this around the code for
    %               backpropagation. That is, you can compute the gradients for
    %               the regularization separately and then add them to Theta1_grad
    %               and Theta2_grad from Part 2.
    %
    """
    num_labels=np.size(Theta2,0)

    a1=np.r_[np.ones((1,m),dtype=float),X.conj().T]
    z2 = np.dot(Theta1 , a1);
    a2 =np.r_[np.ones((1, m),dtype=float), sg.sigmoid(z2)]; # 26 x m
    a3 = sg.sigmoid(np.dot(Theta2 , a2)); # 10 x m
    # Explode y into 10 values with Y[i] := i == y.
 
    Y = np.zeros((num_labels, m),dtype=float).flatten(1);
    Y[np.asarray(sub2ind.sub2ind(np.shape(Y),y.T,(np.arange(m).reshape(1,m,order='F'))))-1]=1;
    Y=Y.reshape(10,m)
    
    
    J = (1.0/m) * np.sum(np.sum((-Y*np.log(a3)) -( (1 - Y) * np.log(1 - a3))));
    # Add regularized error. Drop the bias terms in the 1st columns.
    J = J + (lamda / (2*m)) * np.sum(np.sum(Theta1[:, 1:] ** 2));

    J = J + (lamda / (2*m)) * np.sum(np.sum(Theta2[:, 1:] ** 2));
    # 2. Backpropagate to get gradient information.
    
    d3 = a3 - Y; # 10 x m
    d2 = (np.dot(Theta2.conj().T , d3)) * np.r_[np.ones((1, m),dtype=float) ,sG.sigmoidGradient(z2)];#  26 x m
    # Vectorized ftw:
    Theta2_grad = (1/m) *np.dot( d3 , a2.conj().T);
    Theta1_grad = (1/m) * np.dot(d2[1:, :], a1.conj().T);
    # Add gradient regularization.
    Theta2_grad = Theta2_grad +  (lamda / m) * (np.c_[np.zeros((np.size(Theta2, 0), 1),dtype=float), Theta2[:, 1:]])
    Theta1_grad = Theta1_grad +  (lamda / m) * (np.c_[np.zeros((np.size(Theta1, 0), 1),dtype=float), Theta1[:, 1:]])
    grad=np.r_[Theta1_grad.flatten(1),Theta2_grad.flatten(1)]

    return [J,grad]
Beispiel #6
0
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels,
                   X, y, lambda_param):
    '''
    [J grad] = NNCOSTFUNCTON(nn_params, hidden_layer_size, num_labels,
    X, y, lambda) computes the cost and gradient of the neural network. 
    The parameters for the neural network are "unrolled" into the vector
    nn_params and need to be converted back into the weight matrices. 

    The returned parameter grad should be a "unrolled" vector of the
    partial derivatives of the neural network.
    '''

    import numpy as np
    from sigmoid import sigmoid
    from sigmoidGradient import sigmoidGradient

    # Reshape nn_params back into the parameters Theta1 and Theta2
    # the weight matrices for our 2 layer neural network

    Theta1 = np.reshape(
        nn_params[0:hidden_layer_size * (input_layer_size + 1)],
        (hidden_layer_size, (input_layer_size + 1)))

    Theta2 = np.reshape(
        nn_params[(hidden_layer_size * (input_layer_size + 1))::],
        (num_labels, (hidden_layer_size + 1)))

    # Setup some useful variables
    m = X.shape[0]

    # Retrun the following variables correctly
    J = []
    Theta1_grad = np.zeros(Theta1.shape)
    Theta2_grad = np.zeros(Theta2.shape)

    # Part 1:
    # Feedforward the neural network and return the cost in the variable J.

    for i in range(m):
        act_1 = X[i]
        act_1 = np.append(1, act_1)  # add 1
        z_2 = np.dot(Theta1, act_1)
        act_2 = sigmoid(z_2)
        act_2 = np.append(1, act_2)  # add 1
        z_3 = np.dot(Theta2, act_2)
        h = sigmoid(z_3)

        # Logical arrays (binary vector of 1's and 0's)
        y_vect = np.zeros(num_labels)
        y_vect[y[i] - 1] = 1

        cost = -1 / m * (
            np.dot(np.transpose(np.vstack(y_vect)), np.log(h)) +
            np.dot(np.transpose(np.vstack(1 - y_vect)), np.log(1 - h)))

        J.append(cost)

        # Part 2: Implement the backpropagation algorithm to compute the gradients
        # Theta1_grad and Theta2_grad.
        # You should return the partial derivatives of the cost function with respect
        # to Theta1 and Theta2 in Theta1_grad and Theta2_grad, respectively.

        # delta at the output layer
        delta_3 = (h - y_vect)
        # delta for the hidden layer
        # remove delta_2_0 (gradients of bias units) by doing Theta2[:,1:]
        delta_2 = np.dot(np.transpose(Theta2[:, 1:]),
                         delta_3) * sigmoidGradient(z_2)
        # Accumulate the gradients (DELTA)
        Theta1_grad = Theta1_grad + \
            np.dot(np.vstack(delta_2), np.transpose(np.vstack(act_1)))

        Theta2_grad = Theta2_grad + \
            np.dot(np.vstack(delta_3), np.transpose(np.vstack(act_2)))

# Part 3: Implement regularization with the cost function and gradients.
# Regularized gradient for tall
    capital_delta1 = 1 / m * Theta1_grad + np.dot(lambda_param / m, Theta1)
    capital_delta2 = 1 / m * Theta2_grad + np.dot(lambda_param / m, Theta2)

    # Adjust for the first column of Theta. Not regularization for j=0
    capital_delta1[:, 0] = 1 / m * Theta1_grad[:, 0]
    capital_delta2[:, 0] = 1 / m * Theta2_grad[:, 0]

    # Regularized term
    # Take out the bias term in the first column
    regul_term = lambda_param / (2 * m) * (np.sum(np.power(Theta1[:, 1:], 2)) +
                                           np.sum(np.power(Theta2[:, 1:], 2)))

    J = sum(J) + regul_term

    # Unroll gradients
    grad = []
    grad.extend(
        (list(capital_delta1.flatten()) + list(capital_delta2.flatten())))

    grad = np.array(grad)

    return J, grad
Beispiel #7
0
g = sigmoid(array([-1, -0.5, 0,  0.5, 1]))
print "Sigmoid evaluated at [1 -0.5 0 0.5 1]:  "
print g

raw_input('\nProgram paused. Press enter to continue!!!')

# ================================ Step 4: Sigmoid Gradient ================================
#  Before you start implementing the neural network, you will first
#  implement the gradient for the sigmoid function. You should complete the
#  code in the sigmoidGradient.m file.
#

print "\nEvaluating Sigmoid Gradient function ...\n"

g = sigmoidGradient(array([-1, -0.5, 0,  0.5, 1]))
print "Sigmoid gradient evaluated at [1 -0.5 0 0.5 1]:  "
print g

raw_input('\nProgram paused. Press enter to continue!!!')

# ================================ Step 5: Implement Feedforward (Cost Function) ================================

print "\nChecking Cost Function without Regularization (Feedforward) ...\n"

lambd = 0.0
checkNNCost(lambd)

print 'This value should be about 2.09680198349'

raw_input('\nProgram paused. Press enter to continue!!!')
Beispiel #8
0
def backwards(nn_weights, layers, X, y, num_labels, lambd):
    # Computes the gradient fo the neural network.
    # nn_weights: Neural network parameters (vector)
    # layers: a list with the number of units per layer.
    # X: a matrix where every row is a training example for a handwritten digit image
    # y: a vector with the labels of each instance
    # num_labels: the number of units in the output layer
    # lambd: regularization factor
    
    # Setup some useful variables
    m = X.shape[0]
    num_layers = len(layers)

    # Roll Params
    # The parameters for the neural network are "unrolled" into the vector
    # nn_params and need to be converted back into the weight matrices.
    Theta = roll_params(nn_weights, layers)
  
    # You need to return the following variables correctly 
    Theta_grad = [zeros(w.shape) for w in Theta]

    # The vector y passed into the function is a vector of labels
    # containing values from 1..K. You need to map this vector into a 
    # binary vector of 1's and 0's to be used with the neural network
    # cost function.
    yv = zeros((num_labels, m))
    for i in range(m):
	yv[y[i],i] = 1

    # In this point implement the backpropagaition algorithm 
    A = []
    a = ones(X.shape[0])
    a = vstack((a,X.transpose()))
    Z = []
    Z.append(a)
    for i in range(num_layers-1):
        A.append(a.transpose())
	z = dot(Theta[i],a)
	Z.append(z)
	a = sigmoid(z)
	if i != num_layers-2:
	    a = vstack((ones(a.shape[1]),a))  
  
    # A: list of result after each layer
    A.append(a.transpose())
    h = a.transpose()

    # delta for the last layer
    delta = h - yv.transpose()
    # calculate of gradients
    for j in range(num_layers-2,0,-1):
	Theta_grad[j] = Theta_grad[j] + dot(delta.transpose(),A[j])
	# calculate of delta for current layer(have to remove the first column of Theta)
	tmp = dot(Theta[j][:,1:].transpose(),delta.transpose())
	tmp = tmp.transpose()
	tmp_matrix = zeros(tmp.shape)
	for i in range(m):
	    tmp_matrix[i] = sigmoidGradient(Z[j].transpose()[i])
	delta = tmp_matrix * tmp
    Theta_grad[0] = Theta_grad[0] + dot(delta.transpose(),A[0])

    
    # regularization
    for i in range(num_layers-1):
	for j in range((Theta_grad[i].shape)[0]):
	    for k in range((Theta_grad[i].shape)[1]):
		Theta_grad[i][j,k] = Theta_grad[i][j,k]/m
		if k >=1:
			Theta_grad[i][j,k] = Theta_grad[i][j,k] + lambd/m*Theta[i][j,k]
    # Unroll Params
    Theta_grad = unroll_params(Theta_grad)

    return Theta_grad
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels,
                   X, y, l):
    #NNCOSTFUNCTION Implements the neural network cost function for a two layer
    #neural network which performs classification
    #   [J grad] = NNCOSTFUNCTON(nn_params, hidden_layer_size, num_labels, ...
    #   X, y, lambda) computes the cost and gradient of the neural network. The
    #   parameters for the neural network are "unrolled" into the vector
    #   nn_params and need to be converted back into the weight matrices.
    #
    #   The returned parameter grad should be a "unrolled" vector of the
    #   partial derivatives of the neural network.
    #

    # Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices
    # for our 2 layer neural network
    Theta1 = np.reshape(
        nn_params[0:(hidden_layer_size * (input_layer_size + 1)), ],
        (hidden_layer_size, input_layer_size + 1))
    Theta2 = np.reshape(
        nn_params[(hidden_layer_size * (input_layer_size + 1)):, ],
        (num_labels, hidden_layer_size + 1))

    # ====================== YOUR CODE HERE ======================
    # Instructions: You should complete the code by working through the
    #               following parts.
    #
    # Part 1: Feedforward the neural network and return the cost in the
    #         variable J. After implementing Part 1, you can verify that your
    #         cost function computation is correct by verifying the cost
    #         computed in ex4.m
    #
    m, n = X.shape
    a1 = np.hstack((np.ones((m, 1)), X))
    a2 = np.hstack((np.ones((m, 1)), sigmoid(a1.dot(Theta1.T))))
    h = sigmoid(a2.dot(Theta2.T))

    # Constructing a vector of result ex: for 5 of 10 the 1 should be at
    # fifth position [0 0 0 0 1 0 0 0 0 0] where rows are training set samples
    yVec = np.equal(np.matlib.repmat(list(range(1, 11)), m, 1),
                    np.matlib.repmat(y, num_labels, 1).T).astype(np.int)

    # Cost Function
    cost = -yVec * np.log(h) - (1 - yVec) * np.log(1 - h)
    J = (1 / m) * sum(sum(cost))

    # Part 2: Implement the backpropagation algorithm to compute the gradients
    #         Theta1_grad and Theta2_grad. You should return the partial derivatives of
    #         the cost function with respect to Theta1 and Theta2 in Theta1_grad and
    #         Theta2_grad, respectively. After implementing Part 2, you can check
    #         that your implementation is correct by running checkNNGradients
    #
    #         Note: The vector y passed into the function is a vector of labels
    #               containing values from 1..K. You need to map this vector into a
    #               binary vector of 1's and 0's to be used with the neural network
    #               cost function.
    #
    #         Hint: We recommend implementing backpropagation using a for-loop
    #               over the training examples if you are implementing it for the
    #               first time.
    theta1ExcludingBias = Theta1[:, 1:]
    theta2ExcludingBias = Theta2[:, 1:]
    reg = 1.0 * l / (2 * m) * (sum(sum(np.square(theta1ExcludingBias))) +
                               sum(sum(np.square(theta2ExcludingBias))))

    J = J + reg

    d3 = h - yVec
    D2 = d3.T.dot(a2)

    Z2 = np.hstack((np.ones((m, 1)), a1.dot(Theta1.T)))
    d2 = d3.dot(Theta2) * sigmoidGradient(Z2)
    d2 = d2[:, 1:]
    D1 = d2.T.dot(a1)

    Theta_1_grad = 1.0 * D1 / m

    Theta_2_grad = 1.0 * D2 / m

    # Part 3: Implement regularization with the cost function and gradients.
    #
    #         Hint: You can implement this around the code for
    #               backpropagation. That is, you can compute the gradients for
    #               the regularization separately and then add them to Theta1_grad
    #               and Theta2_grad from Part 2.
    Theta_1_grad[:, 1:] = Theta_1_grad[:, 1:] + 1.0 * l / m * Theta1[:, 1:]
    Theta_2_grad[:, 1:] = Theta_2_grad[:, 1:] + 1.0 * l / m * Theta2[:, 1:]

    # Unroll gradients
    grad = np.hstack((Theta_1_grad.ravel(), Theta_2_grad.ravel()))

    return J, grad
Beispiel #10
0
# Вычисление значений стоимостной функции
J = computeCost(X, y, num_labels, Theta1, Theta2, lam)

print(
    'Значение стоимостной функции с регуляризацией для загруженных параметров модели: {:.4f}'
    .format(J))

input('Программа остановлена. Нажмите Enter для продолжения ... \n')

# ====== Часть 5. Вычисление производной сигмоидной функции ======

print('Часть 5. Вычисление производной сигмоидной функции')

z = np.array([1, -0.5, 0, 0.5, 1])
g = sigmoidGradient(z)

print('Значения производной сигмоидной функции для [1, -0.5, 0, 0.5, 1]:')
print('{:.4f} {:.4f} {:.4f} {:.4f} {:.4f}'.format(g[0], g[1], g[2], g[3],
                                                  g[4]))

input('Программа остановлена. Нажмите Enter для продолжения ... \n')

# ============== Часть 6. Инициализация параметров ===============

print('Часть 6. Инициализация параметров')

initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size)
initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels)

input('Программа остановлена. Нажмите Enter для продолжения ... \n')
Beispiel #11
0
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels,
                   X, y, lambda_value):
    #NNCOSTFUNCTION Implements the neural network cost function for a two layer
    #neural network which performs classification
    #   [J grad] = NNCOSTFUNCTON(nn_params, hidden_layer_size, num_labels, ...
    #   X, y, lambda_value) computes the cost and gradient of the neural network. The
    #   parameters for the neural network are "unrolled" into the vector
    #   nn_params and need to be converted back into the weight matrices.
    #
    #   The returned parameter grad should be a "unrolled" vector of the
    #   partial derivatives of the neural network.
    #

    # Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices
    # for our 2 layer neural network
    tmp = nn_params.copy()
    Theta1 = np.reshape(tmp[0:hidden_layer_size * (input_layer_size + 1)],
                        (hidden_layer_size, (input_layer_size + 1)),
                        order='F')
    Theta2 = np.reshape(tmp[(hidden_layer_size *
                             (input_layer_size + 1)):len(tmp)],
                        (num_labels, (hidden_layer_size + 1)),
                        order='F')

    # Setup some useful variables
    m = np.shape(X)[0]

    # Computation of the Cost function including regularisation
    # Feedforward
    a2 = sigmoid(np.dot(np.hstack((np.ones((m, 1)), X)), np.transpose(Theta1)))
    a3 = sigmoid(np.dot(np.hstack((np.ones((m, 1)), a2)),
                        np.transpose(Theta2)))

    # Cost function for Logistic Regression summed over all output nodes
    Cost = np.empty((num_labels, 1))
    for k in range(num_labels):
        # which examples fit this label
        y_binary = (y == k + 1)
        # select all predictions for label k
        hk = a3[:, k]
        # compute two parts of cost function for all examples for node k
        Cost[k][0] = np.sum(np.transpose(y_binary) * np.log(hk)) + np.sum(
            ((1 - np.transpose(y_binary)) * np.log(1 - hk)))

# Sum over all labels and average over examples
    J_no_regularisation = -1. / m * sum(Cost)
    # No regularization over intercept
    Theta1_no_intercept = Theta1[:, 1:]
    Theta2_no_intercept = Theta2[:, 1:]

    # Sum all parameters squared
    RegSum1 = np.sum(np.sum(np.power(Theta1_no_intercept, 2)))
    RegSum2 = np.sum(np.sum(np.power(Theta2_no_intercept, 2)))
    # Add regularisation term to final cost
    J = J_no_regularisation + (lambda_value / (2 * m)) * (RegSum1 + RegSum2)

    # You need to return the following variables correctly
    Theta1_grad = np.zeros(np.shape(Theta1))
    Theta2_grad = np.zeros(np.shape(Theta2))

    # ====================== YOUR CODE HERE ======================
    # Implement the backpropagation algorithm to compute the gradients
    # Theta1_grad and Theta2_grad. You should return the partial derivatives of
    # the cost function with respect to Theta1 and Theta2 in Theta1_grad and
    # Theta2_grad, respectively. After implementing Part 2, you can check
    # that your implementation is correct by running checkNNGradients
    #
    # Note: The vector y passed into the function is a vector of labels
    #       containing values from 1..K. You need to map this vector into a
    #       binary vector of 1's and 0's to be used with the neural network
    #       cost function.
    #
    # Hint: It is recommended implementing backpropagation using a for-loop
    #       over the training examples if you are implementing it for the
    #       first time.
    #
    I = np.eye(num_labels)
    Y = np.zeros((m, num_labels))
    for i in range(m):
        Y[i, :] = I[y[i] - 1, :]

    for t in range(m):
        a1 = X[t, :]
        a1 = np.append([1], a1)
        z2 = np.dot(Theta1, a1)
        a2 = sigmoid(z2)
        a2 = np.append([1], a2)
        z3 = np.dot(Theta2, a2)
        a3 = sigmoid(z3)

        # sigma3 shape is 10 by 1
        sigma3 = a3 - Y[t, :]
        # sigma2 shape is 25 by 1 (eliminate bias)
        sigma2 = np.multiply(
            np.dot(np.transpose(Theta2), sigma3)[1:], sigmoidGradient(z2))
        # combine the forward pass and backwardpass; the delta l/ delta w
        delta2 = np.multiply(sigma3[np.newaxis].T, a2[np.newaxis])
        delta1 = np.multiply(sigma2[np.newaxis].T, a1[np.newaxis])

        Theta1_grad = Theta1_grad + delta1
        Theta2_grad = Theta2_grad + delta2

    # average on the Theta gradient
    Theta1_grad = Theta1_grad / m + (lambda_value / m) * np.hstack((np.zeros(
        (Theta1.shape[0], 1)), Theta1[:, 1:]))
    Theta2_grad = Theta2_grad / m + (lambda_value / m) * np.hstack((np.zeros(
        (Theta2.shape[0], 1)), Theta2[:, 1:]))

    # -------------------------------------------------------------

    # =========================================================================

    # Unroll gradients
    Theta1_grad = np.reshape(Theta1_grad, Theta1_grad.size, order='F')
    Theta2_grad = np.reshape(Theta2_grad, Theta2_grad.size, order='F')
    grad = np.expand_dims(np.hstack((Theta1_grad, Theta2_grad)), axis=1)

    return J, grad
Beispiel #12
0
def nnCostFunctionVec(nn_params,
                      input_layer_size,
                      hidden_layer_size,
                      num_labels,
                      X,
                      y,
                      lam,
                      returnType=''):

    import numpy as np
    from sigmoid import sigmoid
    from sigmoidGradient import sigmoidGradient

    Theta1 = nn_params[0:hidden_layer_size * (input_layer_size + 1)].reshape(
        hidden_layer_size, input_layer_size + 1, order='F')
    Theta2 = nn_params[(hidden_layer_size * (input_layer_size + 1)):].reshape(
        num_labels, (hidden_layer_size + 1), order='F')

    (m, n) = X.shape

    J = 0
    Theta1_grad = np.zeros((Theta1.shape))
    Theta2_grad = np.zeros((Theta2.shape))
    grad = 0

    ident = np.eye(Theta2.shape[0])

    yNodes = ident[y.flatten()].T
    yNodes = np.append(yNodes[1:, ], yNodes[0:1, ], axis=0)

    X = np.append(np.ones((m, 1)), X, axis=1)

    if returnType == '' or returnType == 'J':
        h = sigmoid(
            np.dot(Theta2, (np.append(
                np.ones((1, m)), sigmoid(np.dot(Theta1, X.T)), axis=0))))
        J = np.sum(-yNodes * np.log(h) -
                   ((1 - yNodes) * np.log(1 - h))) / m + lam * (
                       np.sum(np.square(Theta2[:, 1:])) +
                       np.sum(np.square(Theta1[:, 1:]))) / (2 * m)

    if returnType == '' or returnType == 'grad':
        delta3 = sigmoid(
            np.dot(
                Theta2,
                np.append(np.ones((1, m)),
                          sigmoid(np.dot(Theta1, X.T)),
                          axis=0))) - yNodes
        delta2 = (np.dot(Theta2.T, delta3) * sigmoidGradient(
            np.append(np.ones((1, m)), np.dot(Theta1, X.T), axis=0)))[1:, ]

        Theta1_grad = np.dot(delta2, X)
        Theta2_grad = np.dot(
            delta3,
            np.append(np.ones((1, m)), sigmoid(np.dot(Theta1, X.T)), axis=0).T)

        Theta1_grad = Theta1_grad / m + (lam * np.append(
            np.zeros((Theta1.shape[0], 1)), Theta1[:, 1:], axis=1)) / m
        Theta2_grad = Theta2_grad / m + (lam * np.append(
            np.zeros((Theta2.shape[0], 1)), Theta2[:, 1:], axis=1)) / m

        grad = np.append(Theta1_grad.flatten('F'), Theta2_grad.flatten('F'))

    if returnType == '':
        return [J, grad]
    elif returnType == 'J':
        return J
    elif returnType == 'grad':
        return grad
def backwards(nn_weights, layers, X, y, num_labels, lambd):
    """
    :param nn_weights: Neural network parameters (vector)
    :param layers: a list with the number of units per layer.
    :param X: a matrix where every row is a training example for a handwritten digit image
    :param y: a vector with the labels of each instance
    :param num_labels: the number of units in the output layer
    :param lambd: regularization factor
    :return: Computes the gradient fo the neural network.
    """

    # Setup some useful variables
    m = X.shape[0]
    num_layers = len(layers)

    # Roll Params
    # The parameters for the neural network are "unrolled" into the vector
    # nn_params and need to be converted back into the weight matrices.
    Theta = roll_params(nn_weights, layers)

    # The vector y passed into the function is a vector of labels
    # containing values from 1..K. You need to map this vector into a
    # binary vector of 1's and 0's to be used with the neural network
    # cost function.
    yv = np.zeros((num_labels, m))
    for i in range(len(y)):
        yv[int(y[i]), i] = 1
    yv = np.transpose(yv)

    a = []
    z = []
    x = np.copy(X)
    a.append(insertOne(x))
    z.append(x)

    # if you want to be able to follow the training accuracy:
    # pred = predict(Theta, X)
    # accuracy = np.mean(y == pred) * 100
    # print(accuracy)

    for i in range(num_layers - 1):

        s = np.shape(Theta[i])
        theta = Theta[i][:, 1:s[1]]
        x = np.dot(x, np.transpose(theta))
        x = x + Theta[i][:, 0]
        z.append(x)
        x = sigmoid(x)
        a.append(insertOne(x))

    delta = [np.zeros(w.shape) for w in z]
    delta[num_layers - 1] = (x - yv)

    for i in range(num_layers - 2, 0, -1):
        s = np.shape(Theta[i])
        theta = np.copy(Theta[i][:, 1:s[1]])
        temp = np.dot(np.transpose(theta), np.transpose(delta[i + 1]))
        delta[i] = np.transpose(temp) * sigmoidGradient(z[i])

    Delta = []
    for i in range(num_layers - 1):
        temp = np.dot(np.transpose(delta[i + 1]), a[i])
        Delta.append(temp)

    # if you want to follow the cost during the training:
    # cost = (yv * np.log(x) + (1 - yv) * np.log(1 - x)) / m
    # cost = -np.sum(cost)
    #
    # somme = 0
    #
    # for i in range(num_layers - 1):
    #     somme += lambd * np.sum(Theta[i] ** 2) / (2 * m)
    #
    # cost += somme

    Theta_grad = [(d / m) for d in Delta]

    i = 0
    for t in Theta:
        current = lambd * t / m
        # d'après le poly il faudrait qu'il y ait cette ligne
        # mais après quand on son checkNNGradient il vaut mieux enlever
        # cette ligne donc je ne sais pas ...:
        # current[:, 0] = current[:, 0]*0
        Theta_grad[i] += current
        i += 1

    # Unroll Params
    Theta_grad = unroll_params(Theta_grad)

    return Theta_grad
Beispiel #14
0
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, \
num_labels, X, y, lambda_reg):


    Theta1 = np.reshape(nn_params[:hidden_layer_size * (input_layer_size + 1)],\
    (hidden_layer_size,input_layer_size+1),order='F')

    Theta2 = np.reshape(nn_params[hidden_layer_size*(input_layer_size+1):],\
    (num_labels,hidden_layer_size+1),order='F')

    m = len(X)
    J = 0

    Theta1_grad = np.zeros(Theta1.shape)
    Theta2_grad = np.zeros(Theta2.shape)

    X = np.column_stack((np.ones((m, 1)), X))

    a2 = s.sigmoid(np.dot(X, Theta1.T))

    a2 = np.column_stack((np.ones((a2.shape[0], 1)), a2))

    a3 = s.sigmoid(np.dot(a2, Theta2.T))

    labels = y

    y = np.zeros((m, num_labels))

    for i in xrange(m):
        y[i, labels[i] - 1] = 1

    cost = 0

    for i in xrange(m):
        cost += np.sum(y[i] * np.log(a3[i]) + (1 - y[i]) * np.log(1 - a3[i]))

    J = -(1.0 / m) * cost

    sum0fTheta1 = np.sum(np.sum(Theta1[:, 1]**2))

    sum0fTheta2 = np.sum(np.sum(Theta2[:, 1]**2))

    J = J + ((lambda_reg / (2.0 * m)) * (sum0fTheta1 + sum0fTheta2))

    bigDelta1 = 0
    bigDelta2 = 0

    for t in xrange(m):
        x = X[t]

        a2 = s.sigmoid(np.dot(x, Theta1.T))

        a2 = np.concatenate((np.array([1]), a2))

        a3 = s.sigmoid(np.dot(a2, Theta2.T))

        delta3 = np.zeros((num_labels))

        for k in xrange(num_labels):
            y_k = y[t, k]
            delta3[k] = a3[k] - y_k

        delta2 = (np.dot(Theta2[:, 1:].T, delta3).T) * sg.sigmoidGradient(
            np.dot(x, Theta1.T))

        bigDelta1 += np.outer(delta2, x)
        bigDelta2 += np.outer(delta3, a2)

    Theta1_grad = bigDelta1 / m
    Theta2_grad = bigDelta2 / m

    Theta1_grad_unregularized = np.copy(Theta1_grad)
    Theta2_grad_unregularized = np.copy(Theta2_grad)

    Theta1_grad += (float(lambda_reg) / m) * Theta1
    Theta2_grad += (float(lambda_reg) / m) * Theta2

    print Theta1_grad.shape
    print Theta2_grad.shape
    print Theta1_grad

    Theta1_grad[:, 0] = Theta1_grad_unregularized[:, 0]
    Theta2_grad[:, 0] = Theta2_grad_unregularized[:, 0]

    print Theta1_grad

    grad = np.concatenate((Theta1_grad.reshape(Theta1_grad.size, order='F'),
                           Theta2_grad.reshape(Theta2_grad.size, order='F')))

    return J, grad
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels,
                   X, y, Lambda):
    """computes the cost and gradient of the neural network. The
  parameters for the neural network are "unrolled" into the vector
  nn_params and need to be converted back into the weight matrices.

  The returned parameter grad should be a "unrolled" vector of the
  partial derivatives of the neural network.
    """

    # Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices
    # for our 2 layer neural network
    # Obtain Theta1 and Theta2 back from nn_params
    input_layer_size = int(input_layer_size)
    hidden_layer_size = int(hidden_layer_size)
    num_labels = int(num_labels)
    Theta1 = np.reshape(nn_params[:hidden_layer_size * (input_layer_size + 1)],
                        (hidden_layer_size, input_layer_size + 1),
                        order='F').copy()

    Theta2 = np.reshape(nn_params[hidden_layer_size * (input_layer_size + 1):],
                        (num_labels, (hidden_layer_size + 1)),
                        order='F').copy()

    # Setup some useful variables
    m, _ = X.shape

    # ====================== YOUR CODE HERE ======================
    # Instructions: You should complete the code by working through the
    #               following parts.
    #
    # Part 1: Feedforward the neural network and return the cost in the
    #         variable J. After implementing Part 1, you can verify that your
    #         cost function computation is correct by verifying the cost
    #         computed in ex4.m
    #
    # Part 2: Implement the backpropagation algorithm to compute the gradients
    #         Theta1_grad and Theta2_grad. You should return the partial derivatives of
    #         the cost function with respect to Theta1 and Theta2 in Theta1_grad and
    #         Theta2_grad, respectively. After implementing Part 2, you can check
    #         that your implementation is correct by running checkNNGradients
    #
    #         Note: The vector y passed into the function is a vector of labels
    #               containing values from 1..K. You need to map this vector into a
    #               binary vector of 1's and 0's to be used with the neural network
    #               cost function.
    #
    #         Hint: We recommend implementing backpropagation using a for-loop
    #               over the training examples if you are implementing it for the
    #               first time.
    #
    # Part 3: Implement regularization with the cost function and gradients.
    #
    #         Hint: You can implement this around the code for
    #               backpropagation. That is, you can compute the gradients for
    #               the regularization separately and then add them to Theta1_grad
    #               and Theta2_grad from Part 2.
    #
    import pandas as pd
    y_categorical = pd.get_dummies(y.ravel()).as_matrix()

    a1 = np.column_stack((np.ones((m, 1)), X))
    z2 = a1.dot(Theta1.T)
    a2 = np.column_stack((np.ones((z2.shape[0], 1)), sigmoid(z2)))
    a3 = sigmoid(a2.dot(Theta2.T))
    J = np.sum(np.log(a3) * y_categorical + np.log(1 - a3) * (1 - y_categorical)) / float(-m) \
        + Lambda * (np.sum(np.square(Theta1[:, 1:])) + np.sum(np.square(Theta2[:, 1:]))) / (2 * m)

    a3_grad = a3 - y_categorical
    Theta2_grad = a3_grad.T.dot(a2) / m + Lambda * np.column_stack((np.zeros(
        (Theta2.shape[0], 1)), Theta2[:, 1:])) / m
    a2_grad = (a3_grad).dot(Theta2[:, 1:]) * sigmoidGradient(z2)
    Theta1_grad = a2_grad.T.dot(a1) / m + Lambda * np.column_stack((np.zeros(
        (Theta1.shape[0], 1)), Theta1[:, 1:])) / m

    # =========================================================================

    # Unroll gradient
    grad = np.hstack((Theta1_grad.T.ravel(), Theta2_grad.T.ravel()))

    return J, grad
Beispiel #16
0
def backwards(nn_weights, layers, X, y, num_labels, lambd):
    # Computes the gradient fo the neural network.
    # nn_weights: Neural network parameters (vector)
    # layers: a list with the number of units per layer.
    # X: a matrix where every row is a training example for a handwritten digit image
    # y: a vector with the labels of each instance
    # num_labels: the number of units in the output layer
    # lambd: regularization factor

    # Setup some useful variables
    m = X.shape[0]
    num_layers = len(layers)

    # Roll Params
    # The parameters for the neural network are "unrolled" into the vector
    # nn_params and need to be converted back into the weight matrices.
    Theta = roll_params(nn_weights, layers)

    # You need to return the following variables correctly
    Theta_grad = [zeros(w.shape) for w in Theta]

    # ================================ DONE ================================
    # The vector y passed into the function is a vector of labels
    # containing values from 1..K. You need to map this vector into a
    # binary vector of 1's and 0's to be used with the neural network
    # cost function.
    yv = zeros((m, num_labels))
    for i in range(m):
        yv[i][y[i]] += 1

    # ================================ DONE ================================
    # In this point implement the backpropagation algorithm

    # In this point calculate the cost of the neural network (feedforward)

    # Step 1: Initialization of useful variables

    # Z and A will store the hidden states of the network, as lists of matrices, of size num_layers
    A = [addColumnOne(X)]
    Z = [addColumnOne(X)]

    # delta will store the delta for each layer from the last to the second layer (in reverse order)
    delta = []

    # Step 2: Feedforward
    for i in range(num_layers - 1):
        h = A[i].dot(Theta[i].T)
        Z.append(h)
        h = addColumnOne(sigmoid(h))
        A.append(h)

    # Step 3: Backpropagation
    d = removeFirstColumn(A[-1]) - yv
    delta.append(d)

    for i in range(num_layers - 2, 0, -1):
        d = removeFirstColumn(d.dot(Theta[i])) * sigmoidGradient(Z[i])
        delta.append(d)

    delta.reverse()
    # delta is of size num_layers-1 (no delta for the input layer)

    for i in range(num_layers - 1):
        Theta_grad[i] += delta[i].T.dot(A[i])
        # DONE: no regularization on the bias weights !!
        Theta_grad[i] += lambd * Theta[i]
        for j in range(Theta[i].shape[0]):
            Theta_grad[i][j, 0] -= lambd * Theta[i][j, 0]
        Theta_grad[i] /= m

    # Unroll Params
    Theta_grad = unroll_params(Theta_grad)

    return Theta_grad
print('Program paused. Press enter to continue.\n')
pause()



"""## Part 5: Sigmoid Gradient  ================
  Before you start implementing the neural network, you will first
  implement the gradient for the sigmoid function. You should complete the
  code in the sigmoidGradient.py file.
"""

print('\nEvaluating sigmoid gradient...\n')

test_array = np.array([[1, -0.5, 0, 0.5, 1]])
g = sigmoidGradient(test_array)
print('Sigmoid gradient evaluated at [1 -0.5 0 0.5 1]:\n ')
print(g)
print('\n\n')

print('Program paused. Press enter to continue.\n')
pause()


"""## Part 6: Initializing Pameters ================
  In this part of the exercise, you will be starting to implment a two
  layer neural network that classifies digits. You will start by
  implementing a function to initialize the weights of the neural network
  (randInitializeWeights.m)"""

print('\nInitializing Neural Network Parameters ...\n')
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, \
                   num_labels, X, y, lambda_val):
    # NNCOSTFUNCTION Implements the neural network cost function for a two layer
    # neural network which performs classification
    #   [J grad] = NNCOSTFUNCTON(nn_params, hidden_layer_size, num_labels, ...
    #   X, y, lambda) computes the cost and gradient of the neural network. The
    #   parameters for the neural network are "unrolled" into the vector
    #   nn_params and need to be converted back into the weight matrices.
    #
    #   The returned parameter grad should be a "unrolled" vector of the
    #   partial derivatives of the neural network.

    # Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices
    # for our 2 layer neural network
    Theta1 = np.reshape(nn_params[:hidden_layer_size * (input_layer_size + 1)], \
                        (hidden_layer_size, input_layer_size + 1), order='F')

    Theta2 = np.reshape(nn_params[hidden_layer_size * (input_layer_size + 1):], \
                        (num_labels, hidden_layer_size + 1), order='F')

    # Setup some useful variables
    m = len(X)

    # # You need to return the following variables correctly
    J = 0
    Theta1_grad = np.zeros(Theta1.shape)
    Theta2_grad = np.zeros(Theta2.shape)

    # ====================== YOUR CODE HERE ======================
    #         Implement the backpropagation algorithm to compute the gradients
    #         Theta1_grad and Theta2_grad. You should return the partial derivatives of
    #         the cost function with respect to Theta1 and Theta2 in Theta1_grad and
    #         Theta2_grad, respectively. After implementing Part 2, you can check
    #         that your implementation is correct by running checkNNGradients
    #
    #         Note: The vector y passed into the function is a vector of labels
    #               containing values from 1..K. You need to map this vector into a
    #               binary vector of 1's and 0's to be used with the neural network
    #               cost function.
    #
    #         Hint: We recommend implementing backpropagation using a for-loop
    #               over the training examples if you are implementing it for the
    #               first time.

    # the algorithm is described in our given assignment pdf

    # one bias column form input to second layer (only ones)
    # X is a1
    X = np.column_stack((np.ones((m, 1)), X))

    # hidden layer + adding bias column
    a2 = s.sigmoid(np.dot(X, Theta1.T))
    a2 = np.column_stack((np.ones((a2.shape[0], 1)), a2))

    # outer layer
    a3 = s.sigmoid(np.dot(a2, Theta2.T))

    # calculating the cost function (non-regularized)
    # only values 0 or 1
    cost = 0
    y_ = y
    # setting y to a matrix with m (as the number of inputs) and num_labels
    y = np.zeros((m, num_labels))
    # for every label, convert it into vector of 0s and a 1 in the appropriate position
    for i in range(m):
        y[i, y_[i] - 1] = 1

    # calculate cost by summing
    for i in range(m):
        cost += np.sum(y[i] * np.log(a3[i]) + (1 - y[i]) * np.log(1 - a3[i]))

    J = -(1.0 / m) * cost

    # note first column are bias units - this is why we start from the second column
    sumOfTheta1 = np.sum(np.sum(Theta1[:, 1:]**2))
    sumOfTheta2 = np.sum(np.sum(Theta2[:, 1:]**2))

    J = J + ((lambda_val / (2.0 * m)) * (sumOfTheta1 + sumOfTheta2))

    # foward propagation step
    # iterate over training examples
    for t in range(m):

        x = X[t]

        # hidden layer
        # z2 = Theta1 * a1, sigmoid(z)
        # returns vector
        a2 = s.sigmoid(np.dot(x, Theta1.T))

        # append bias values - to third layer
        a2 = np.append([1], a2)

        # outer layer
        # z3 = Theta2 * a2, sigmoid(z)
        # returns vector
        a3 = s.sigmoid(np.dot(a2, Theta2.T))

        # create delta with only zeros
        delta3 = np.zeros((num_labels))

        # num_labels is 10, used to compute delta3
        # number of hypotheses
        # indicates wheather the current training example belongs to class k
        # (y[t, k] = 1), or if it belongs to a different class (y[t, k] = 0)
        for k in range(num_labels):

            delta3[k] = a3[k] - y[t, k]

        # computing delta2, with all of the THETA2 values times DELTA3 * the SIGMUNDGRADIENT values
        delta2 = (np.dot(Theta2[:, 1:].T, delta3).T) * sg.sigmoidGradient(
            np.dot(x, Theta1.T))

        Theta1_grad += np.outer(delta2, x)
        Theta2_grad += np.outer(delta3, a2)

    ## UNREGULARIZED

    # devide accumalted gradients by 1/m
    Theta1_grad = Theta1_grad / m
    Theta2_grad = Theta2_grad / m

    ## REGULARIZATION

    # just temporary variable
    tmp1 = np.copy(Theta1_grad)
    tmp2 = np.copy(Theta2_grad)

    # regularize using lambda
    Theta1_grad += (float(lambda_val) / m) * Theta1
    Theta2_grad += (float(lambda_val) / m) * Theta2

    Theta1_grad[:, 0] = tmp1[:, 0]
    Theta2_grad[:, 0] = tmp2[:, 0]

    # # =========================================================================

    # Unroll gradients
    Theta1_grad = np.reshape(Theta1_grad, Theta1_grad.size, order='F')
    Theta2_grad = np.reshape(Theta2_grad, Theta2_grad.size, order='F')
    grad = np.expand_dims(np.hstack((Theta1_grad, Theta2_grad)), axis=1)

    return J, grad
Beispiel #19
0
def nnCostFunction(thetas, X, y, struc, lambd=1.0, bias=1):
    j = 0.0
    grad = {}
    grad_final = np.empty_like([]) 
    m,n = X.shape
    hidden = []
    t1 = 0
    t2 = 0
    
#     try:
#         my2, ny2 = y2.shape
#     except:
#         ny2 = 1
#     
#     if ny2 < 2:
#         y = np.zeros((len(y2),y2.max()+1))
#         for i in range(0,len(y2)):
#             for ii in range(0,len(y[i])):
#                 if y2[i] == ii:
#                     y[i][ii] = 1
#     else:
#         y = y2
        
    for i in range(0,len(struc)):
        m2 = struc[i][0]
        n2 = struc[i][1]
        t2 += m2 * n2
        hidden.append({'layer': i,'theta': thetas[t1:t2].reshape(n2,m2).transpose()})
        t1 = t2
    local = {'a1': X,'t': 0.0}
    c = 1
    last = ''
    if bias == 1:
        for layer in hidden:
            theta = layer['theta']
            local['Theta' + str(c)] = theta 
            local['theta' + str(c)] = theta.copy()
            local['theta' + str(c)][:,0] = 0.0
            local['t'] += (local['theta' + str(c)][:]**2).sum()
            local['a'+ str(c)] = np.hstack((np.ones((m,1)),local['a'+ str(c)]))
            c += 1
            local['z'+ str(c)] = local['a'+ str(c - 1)].dot(theta.conj().transpose())
            local['a'+ str(c)] = s.sigmoid(local['z'+ str(c)])
            last = 'a' + str(c)
            
        cost = y * np.log(local[last]) + (1 - y) * np.log(1 - local[last])
        r = (lambd / (2.0 * m)) * local['t']
        j = -(1.0 / m) * cost.sum() + r
        

        local['s' + str(c)] = local['a'+ str(c)] - y
        for i in range(1,(c)):
            local['s' + str(c-i)] = ((local['s' + str(c)]).dot(local['Theta' + str(c-1)][:,1:])) * sigg.sigmoidGradient(local['z'+ str(c-1)])
        for i in range(0,c-1):
            delta = (local['s' + str(c-i)].conj().transpose()).dot(local['a'+ str(c-(i+1))])
            r = (lambd / m) * local['theta' + str(c-(i+1))]
            grad['Theta' + str(c-(i+1))] = (1.0 / m) * delta + r
        for i in range(1,c):
            grad_final =  np.hstack((grad_final.T.ravel(), grad['Theta' + str(i)].T.ravel()))
    return (j, grad_final)
def main():
    ''' Main function  '''

    ## %% =========== Part 1: Loading and Visualizing Data =============
    #%  We start the exercise by first loading and visualizing the dataset. 
    #%  You will be working with a dataset that contains handwritten digits.
    #%


    # Read the Matlab data
    m, n, X, y = getMatlabTrainingData()

    # number of features
    input_layer_size = n    
 

    # Select some random images from X
    print('Selecting random examples of the data to display.\n')
    sel = np.random.permutation(m)
    sel = sel[0:100]
    
    #  Re-work the data orientation of each training example
    image_size = 20
    XMatlab = np.copy(X) # Need a deep copy, not just the reference
    for i in range(m): 
        XMatlab[i, :] = XMatlab[i, :].reshape(image_size, image_size).transpose().reshape(1, image_size*image_size)

    # display the sample images
    displayData(XMatlab[sel, :])

    # Print Out the labels for what is being seen. 
    print('These are the labels for the data ...\n')
    print(y[sel, :].reshape(10, 10))

    # Pause program
    print("Program paused. Press Ctrl-D to continue.\n")
    code.interact(local=dict(globals(), **locals()))
    print(" ... continuing\n ")  


#%% ================ Part 2: Loading Parameters ================
#% In this part of the exercise, we load some pre-initialized 
# % neural network parameters.

    print('\nLoading Saved Neural Network Parameters ...\n')

    # Load the weights into variables Theta1 and Theta2
    import scipy .io as sio
    fnWeights = '/home/jennym/Kaggle/DigitRecognizer/ex4/ex4weights.mat'
    weights = sio.loadmat(fnWeights)
    Theta1 = weights['Theta1']
    Theta2 = weights['Theta2']

    #% Unroll parameters 
    nn_params = np.hstack((Theta1.ravel(order='F'), Theta2.ravel(order='F')))

#%% ================ Part 3: Compute Cost (Feedforward) ================
#%  To the neural network, you should first start by implementing the
#%  feedforward part of the neural network that returns the cost only. You
#%  should complete the code in nnCostFunction.m to return cost. After
#%  implementing the feedforward to compute the cost, you can verify that
#%  your implementation is correct by verifying that you get the same cost
#%  as us for the fixed debugging parameters.
#%
#%  We suggest implementing the feedforward cost *without* regularization
#%  first so that it will be easier for you to debug. Later, in part 4, you
#%  will get to implement the regularized cost.
#%
    print('\nFeedforward Using Neural Network ...\n')

    #% Weight regularization parameter (we set this to 0 here).
    MLlambda = 0.0

    # Cluge, put y back to matlab version, then adjust to use python
    #  indexing later into y_matrix
    y[(y == 0)] = 10
    y = y - 1
    J, _ = nnCostFunction(nn_params, input_layer_size, hidden_layer_size,
                   num_labels, X, y, MLlambda)

    print('Cost at parameters (loaded from ex4weights): ' + str(J) + 
          '\n (this value should be about 0.287629)\n')

    # Pause
    print("Program paused. Press Ctrl-D to continue.\n")
    code.interact(local=dict(globals(), **locals()))
    print(" ... continuing\n ")  

#%% =============== Part 4: Implement Regularization ===============
#%  Once your cost function implementation is correct, you should now
#%  continue to implement the regularization with the cost.
#%

    print('\nChecking Cost Function (with Regularization) ... \n')

    # % Weight regularization parameter (we set this to 1 here).
    MLlambda = 1.0

    J, _ = nnCostFunction(nn_params, input_layer_size, hidden_layer_size,
                   num_labels, X, y, MLlambda)

    print('Cost at parameters (loaded from ex4weights): ' + str(J) +
         '\n(this value should be about 0.383770)\n');

    # Pause
    print("Program paused. Press Ctrl-D to continue.\n")
    code.interact(local=dict(globals(), **locals()))
    print(" ... continuing\n ")  


#%% ================ Part 5: Sigmoid Gradient  ================
#%  Before you start implementing the neural network, you will first
#%  implement the gradient for the sigmoid function. You should complete the
#%  code in the sigmoidGradient.m file.
#%

    print('\nEvaluating sigmoid gradient...\n')
    g = sigmoidGradient(np.array([1, -0.5,  0,  0.5, 1]))
    print('Sigmoid gradient evaluated at [1 -0.5 0 0.5 1]:\n  ')
    print(g)
    print('\n\n')

    # Pause
    print("Program paused. Press Ctrl-D to continue.\n")
    code.interact(local=dict(globals(), **locals()))
    print(" ... continuing\n ")  

 
#%% ================ Part 6: Initializing Parameters ================
#%  In this part of the exercise, you will be starting to implement a two
#%  layer neural network that classifies digits. You will start by
#%  implementing a function to initialize the weights of the neural network
#%  (randInitializeWeights.m)

    print('\nInitializing Neural Network Parameters ...\n')

    initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size)
    initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels)

    #% Unroll parameters
    initial_nn_params = np.hstack(( initial_Theta1.ravel(order = 'F'),
                                   initial_Theta2.ravel(order = 'F')))
    # Pause
    print("Program paused. Press Ctrl-D to continue.\n")
    code.interact(local=dict(globals(), **locals()))
    print(" ... continuing\n ")  


#%% =============== Part 7: Implement Backpropagation ===============
#%  Once your cost matches up with ours, you should proceed to implement the
#%  backpropagation algorithm for the neural network. You should add to the
#%  code you've written in nnCostFunction.m to return the partial
#%  derivatives of the parameters.
#%
    print('\nChecking Backpropagation... \n')

    #%  Check gradients by running checkNNGradients
    checkNNGradients()

    # Pause
    print("Program paused. Press Ctrl-D to continue.\n")
    code.interact(local=dict(globals(), **locals()))
    print(" ... continuing\n ")  

#%% =============== Part 8: Implement Regularization ===============
#%  Once your backpropagation implementation is correct, you should now
#%  continue to implement the regularization with the cost and gradient.
#%

    print('\nChecking Backpropagation (w/ Regularization) ... \n')

    #%  Check gradients by running checkNNGradients
    MLlambda = 3
    checkNNGradients(MLlambda)

    # Pause
    print("Program paused. Press Ctrl-D to continue.\n")
    code.interact(local=dict(globals(), **locals()))
    print(" ... continuing\n ")  

    #% Also output the costFunction debugging values
    debug_J, _  = nnCostFunction(nn_params, input_layer_size,
                          hidden_layer_size, num_labels, X, y, MLlambda)

    print('\n\n Cost at (fixed) debugging parameters (w/ lambda = ' + 
          '{0}): {1}'.format(MLlambda, debug_J))
    print('\n  (this value should be about 0.576051)\n\n')

    # Pause
    print("Program paused. Press Ctrl-D to continue.\n")
    code.interact(local=dict(globals(), **locals()))
    print(" ... continuing\n ")

#%% =================== Part 8b: Training NN ===================
#%  You have now implemented all the code necessary to train a neural 
#%  network. To train your neural network, we will now use "fmincg", which
#%  is a function which works similarly to "fminunc". Recall that these
#%  advanced optimizers are able to train our cost functions efficiently as
#%  long as we provide them with the gradient computations.
#%
    print ('\nTraining Neural Network... \n')

    #%  After you have completed the assignment, change the MaxIter to a larger
    #%  value to see how more training helps.
    #% jkm change maxIter from 50-> 400
    options = {'maxiter': MAXITER}

    #%  You should also try different values of lambda
    MLlambda = 1

    #% Create "short hand" for the cost function to be minimized
    costFunc = lambda p: nnCostFunction(p, input_layer_size, hidden_layer_size,
                               num_labels, X, y, MLlambda)

    #% Now, costFunction is a function that takes in only one argument (the
    #% neural network parameters)

    '''
    NOTES: Call scipy optimize minimize function
        method : str or callable, optional Type of solver. 
           CG -> Minimization of scalar function of one or more variables 
                 using the conjugate gradient algorithm.

        jac : bool or callable, optional Jacobian (gradient) of objective function. 
              Only for CG, BFGS, Newton-CG, L-BFGS-B, TNC, SLSQP, dogleg, trust-ncg. 
              If jac is a Boolean and is True, fun is assumed to return the gradient 
              along with the objective function. If False, the gradient will be 
              estimated numerically. jac can also be a callable returning the 
              gradient of the objective. In this case, it must accept the same 
              arguments as fun.
        callback : callable, optional. Called after each iteration, as callback(xk), 
              where xk is the current parameter vector.
'''
    # Setup a callback for displaying the cost at the end of each iteration 
    class Callback(object): 
        def __init__(self): 
            self.it = 0 
        def __call__(self, p): 
            self.it += 1 
            print "Iteration %5d | Cost: %e" % (self.it, costFunc(p)[0]) 
 
   
    result = sci.minimize(costFunc, initial_nn_params, method='CG', 
                   jac=True, options=options, callback=Callback()) 
    nn_params = result.x 
    cost = result.fun 
 
    # matlab: [nn_params, cost] = fmincg(costFunction, initial_nn_params, options);

    #% Obtain Theta1 and Theta2 back from nn_params
    Theta1 = np.reshape(nn_params[:hidden_layer_size * (input_layer_size + 1)],
               (hidden_layer_size, (input_layer_size + 1)), 
                order = 'F')

    Theta2 = np.reshape(nn_params[hidden_layer_size * (input_layer_size + 1):], 
               (num_labels, (hidden_layer_size + 1)), 
               order = 'F')  


    # Pause
    print("Program paused. Press Ctrl-D to continue.\n")
    code.interact(local=dict(globals(), **locals()))
    print(" ... continuing\n ")


#%% ================= Part 9: Visualize Weights =================
#%  You can now "visualize" what the neural network is learning by 
#%  displaying the hidden units to see what features they are capturing in 
#%  the data.#

    print('\nVisualizing Neural Network... \n')

    displayData(Theta1[:, 1:])

    # Pause
    print("Program paused. Press Ctrl-D to continue.\n")
    code.interact(local=dict(globals(), **locals()))
    print(" ... continuing\n ")


#%% ================= Part 10: Implement Predict =================
#%  After training the neural network, we would like to use it to predict
#%  the labels. You will now implement the "predict" function to use the
#%  neural network to predict the labels of the training set. This lets
#%  you compute the training set accuracy.

    pred = predict(Theta1, Theta2, X)

    # JKM - my array was column stacked - don't understand why this works
    pp = np.row_stack(pred)
    accuracy = np.mean(np.double(pp == y)) * 100

    print('\nTraining Set Accuracy: {0} \n'.format(accuracy))

    # Pause
    print("Program paused. Press Ctrl-D to continue.\n")
    code.interact(local=dict(globals(), **locals()))
    print(" ... continuing\n ")

  
# ========================================

    # All Done!
    return
Beispiel #21
0
def backwards(nn_weights, layers, X, y, num_labels, lambd):
    # Computes the gradient fo the neural network.
    # nn_weights: Neural network parameters (vector)
    # layers: a list with the number of units per layer.
    # X: a matrix where every row is a training example for a handwritten digit image
    # y: a vector with the labels of each instance
    # num_labels: the number of units in the output layer
    # lambd: regularization factor

    # Setup some useful variables
    m = X.shape[0]
    num_layers = len(layers)

    # Roll Params
    # The parameters for the neural network are "unrolled" into the vector
    # nn_params and need to be converted back into the weight matrices.
    Theta = roll_params(nn_weights, layers)

    Theta_grad = [np.zeros(w.shape) for w in Theta]

    yv = np.zeros((num_labels, m))
    for i in range(m):
        yv[y[i]][i] = 1

    # Implementation of the backpropagation algorithm

    for i in range(m):

        a_values, z_values = [], [
        ]  # arrays where the values of the activations are to be stored

        a = np.append([1], X[i, :])
        a_values.append(a)

        # Loop of the feedforward algorithm
        for k in range(num_layers - 1):
            z = np.dot(Theta[k], a)
            z_values.append(z)
            a = np.append([1], sigmoid(z))
            a_values.append(a)

        delta_layer = a[1:] - yv[:, i]  # error array of the outer layer
        # np.outer to calculate the matrix product of delta_layer.T and a_values[-2]
        Theta_grad[-1] += np.outer(delta_layer, a_values[-2]) / m

        # Descending loop
        for h in range(num_layers - 2):
            # Error of the (num_layers - 2 - h)-th hidden layer
            # The error that corresponds to the bias factors is not taken into account
            delta_layer = np.dot(Theta[-1 - h].T,
                                 delta_layer)[1:] * sigmoidGradient(
                                     z_values[-2 - h])
            # Calculation of the gradient
            Theta_grad[-2 - h] += np.outer(delta_layer, a_values[-3 - h]) / m

    #Regularization
    for h in range(num_layers - 1):
        # The terms corresponding to the bias factors are not regularized
        Theta_grad[h][:, 1:] += lambd * Theta[h][:, 1:] / m

    # Unroll Params
    Theta_grad = unroll_params(Theta_grad)

    return Theta_grad
Beispiel #22
0
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels,
                   X, y, lambda_value):
    #NNCOSTFUNCTION Implements the neural network cost function for a two layer
    #neural network which performs classification
    #   [J grad] = NNCOSTFUNCTON(nn_params, hidden_layer_size, num_labels, ...
    #   X, y, lambda) computes the cost and gradient of the neural network. The
    #   parameters for the neural network are "unrolled" into the vector
    #   nn_params and need to be converted back into the weight matrices.
    #
    #   The returned parameter grad should be a "unrolled" vector of the
    #   partial derivatives of the neural network.
    #

    # Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices
    # for our 2 layer neural network
    Theta1 = nn_params[0:hidden_layer_size * (input_layer_size + 1)].reshape(
        hidden_layer_size, input_layer_size + 1)
    Theta2 = nn_params[hidden_layer_size * (input_layer_size + 1):].reshape(
        num_labels, hidden_layer_size + 1)

    # Setup some useful variables
    m, n = X.shape

    # You need to return the following variables correctly
    J = 0
    Theta1_grad = np.zeros(Theta1.shape)
    Theta2_grad = np.zeros(Theta2.shape)

    # ====================== YOUR CODE HERE ======================
    # Instructions: You should complete the code by working through the
    #               following parts.
    #
    # Part 1: Feedforward the neural network and return the cost in the
    #         variable J. After implementing Part 1, you can verify that your
    #         cost function computation is correct by verifying the cost
    #         computed in ex4.m
    #
    # Part 2: Implement the backpropagation algorithm to compute the gradients
    #         Theta1_grad and Theta2_grad. You should return the partial derivatives of
    #         the cost function with respect to Theta1 and Theta2 in Theta1_grad and
    #         Theta2_grad, respectively. After implementing Part 2, you can check
    #         that your implementation is correct by running checkNNGradients
    #
    #         Note: The vector y passed into the function is a vector of labels
    #               containing values from 1..K. You need to map this vector into a
    #               binary vector of 1's and 0's to be used with the neural network
    #               cost function.
    #
    #         Hint: We recommend implementing backpropagation using a for-loop
    #               over the training examples if you are implementing it for the
    #               first time.
    #
    # Part 3: Implement regularization with the cost function and gradients.
    #
    #         Hint: You can implement this around the code for
    #               backpropagation. That is, you can compute the gradients for
    #               the regularization separately and then add them to Theta1_grad
    #               and Theta2_grad from Part 2.
    #

    # Feed forward
    a1 = np.column_stack([np.ones(m), X])
    z2 = np.matmul(a1, Theta1.T)
    a2 = sigmoid(z2)
    a2 = np.column_stack([np.ones(m), a2])
    z3 = np.matmul(a2, Theta2.T)
    h = sigmoid(z3)

    # Main term of the cost function
    for k in range(1, num_labels + 1):
        yk = (y == k).astype(int)
        hk = h[:, k - 1]
        Jk = np.sum(-yk * np.log(hk) - (1 - yk) * np.log(1 - hk)) / m
        J = J + Jk

    # Regularization term of the cost function
    J = J + lambda_value * (np.sum(np.sum(Theta1[:, 1:]**2)) +
                            np.sum(np.sum(Theta2[:, 1:]**2))) / (2 * m)

    # Backpropagation
    for t in range(1, m + 1):
        # For each training sample
        d3 = np.zeros((1, num_labels))
        for k in range(1, num_labels + 1):
            yk = (y[t - 1] == k).astype(int)
            d3[0, k - 1] = h[t - 1, k - 1] - yk
        d2 = np.multiply(np.dot(Theta2.T, d3.T),
                         sigmoidGradient(np.r_[1, z2[t - 1, :]])[None].T)
        d2 = d2[1:]
        Theta1_grad = Theta1_grad + np.dot(d2, a1[t - 1, :][None])
        Theta2_grad = Theta2_grad + np.dot(d3.T, a2[t - 1, :][None])
    # Main term of the gradient
    Theta1_grad = Theta1_grad / m
    Theta2_grad = Theta2_grad / m
    # Regularization term of the gradient
    Theta1_grad[:, 1:] = Theta1_grad[:, 1:] + lambda_value * Theta1[:, 1:] / m
    Theta2_grad[:, 1:] = Theta2_grad[:, 1:] + lambda_value * Theta2[:, 1:] / m

    # -------------------------------------------------------------

    # =========================================================================

    # Unroll gradients
    grad = np.concatenate([Theta1_grad.ravel(), Theta2_grad.ravel()])

    return (J, grad)
Beispiel #23
0
        displayData(tmp)

        pred = predict(Theta1, Theta2, tmp)
        print('Neural Network Prediction: ', pred, '(digit ', pred % 10, ')')

        input('Program paused. Press enter to continue')

# ================ Part 4: Sigmoid Gradient  ================
#  Before you start implementing backpropagation, you will first
#  implement the gradient for the sigmoid function. You should complete the
#  code in the sigmoidGradient.m file.
#

print('Evaluating sigmoid gradient...')
example = np.array([-15, -1, -0.5, 0, 0.5, 1, 15])
g = sigmoidGradient(example)
print('Sigmoid gradient evaluated at', example, ':')
print(g)

# ================ Part 5: Initializing Pameters ================
#  To learn a two layer neural network that classifies digits. You will start
#  by implementing a function to initialize the weights of the neural network
#  (randInitializeWeights.m)

print('Initializing Neural Network Parameters ...')

initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size)
initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels)

# Unroll parameters
initial_Theta1 = np.reshape(initial_Theta1, initial_Theta1.size, order='F')
Beispiel #24
0
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels,
                   X, y, _lambda):

    # Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices
    # for our 2 layer neural network
    Theta1 = nn_params[0:hidden_layer_size * (input_layer_size + 1)].reshape(
        hidden_layer_size, input_layer_size + 1)

    Theta2 = nn_params[hidden_layer_size * (input_layer_size + 1):].reshape(
        num_labels, hidden_layer_size + 1)

    # Setup some useful variables
    m = len(y)  # number of training examples

    # ====================== YOUR CODE HERE ======================
    # Instructions: You should complete the code by working through the
    #               following parts.
    #
    # Part 1: Feedforward the neural network and return the cost in the
    #         variable J. After implementing Part 1, you can verify that your
    #         cost function computation is correct by verifying the cost
    #         computed in ex4.m
    #
    # Part 2: Implement the backpropagation algorithm to compute the gradients
    #         Theta1_grad and Theta2_grad. You should return the partial derivatives of
    #         the cost function with respect to Theta1 and Theta2 in Theta1_grad and
    #         Theta2_grad, respectively. After implementing Part 2, you can check
    #         that your implementation is correct by running checkNNGradients
    #
    #         Note: The vector y passed into the function is a vector of labels
    #               containing values from 1..K. You need to map this vector into a
    #               binary vector of 1's and 0's to be used with the neural network
    #               cost function.
    #
    #         Hint: We recommend implementing backpropagation using a for-loop
    #               over the training examples if you are implementing it for the
    #               first time.
    #
    # Part 3: Implement regularization with the cost function and gradients.
    #
    #         Hint: You can implement this around the code for
    #               backpropagation. That is, you can compute the gradients for
    #               the regularization separately and then add them to Theta1_grad
    #               and Theta2_grad from Part 2.

    a1 = np.vstack((np.ones(m), X.T)).T
    a2 = sigmoid(np.dot(a1, Theta1.T))
    a2 = np.vstack((np.ones(m), a2.T)).T
    a3 = sigmoid(np.dot(a2, Theta2.T))
    y = np.tile((np.arange(num_labels) + 1) % 10,
                (m, 1)) == np.tile(y, (1, num_labels))

    regTheta1 = Theta1[:, 1:]
    regTheta2 = Theta2[:, 1:]

    J = -np.sum(y * np.log(a3) + (1-y) * np.log(1-a3)) / m + \
        _lambda * np.sum(regTheta1*regTheta1) / m/2 + \
        _lambda * np.sum(regTheta2*regTheta2) / m/2

    delta1 = np.zeros(Theta1.shape)
    delta2 = np.zeros(Theta2.shape)
    for i in range(m):
        a1_ = a1[i]
        a2_ = a2[i]
        a3_ = a3[i]
        d3 = a3_ - y[i]
        d2 = np.dot(d3, Theta2) * sigmoidGradient(
            np.append(1, np.dot(a1_, Theta1.T)))
        delta1 = delta1 + np.dot(d2[1:].reshape(-1, 1), a1_.reshape(1, -1))
        delta2 = delta2 + np.dot(d3.reshape(-1, 1), a2_.reshape(1, -1))

    regTheta1 = np.vstack((np.zeros(Theta1.shape[0]), regTheta1.T)).T
    regTheta2 = np.vstack((np.zeros(Theta2.shape[0]), regTheta2.T)).T
    Theta1_grad = delta1 / m + _lambda * regTheta1 / m
    Theta2_grad = delta2 / m + _lambda * regTheta2 / m

    grad = np.append(Theta1_grad.flatten(), Theta2_grad.flatten())
    print('cost value: %lf' % J)

    return J, grad
Beispiel #25
0
g = sigmoid(array([-1, -0.5, 0, 0.5, 1]))
print "Sigmoid evaluated at [1 -0.5 0 0.5 1]:  "
print g

raw_input('\nProgram paused. Press enter to continue!!!')

# ================================ Step 4: Sigmoid Gradient ================================
#  Before you start implementing the neural network, you will first
#  implement the gradient for the sigmoid function. You should complete the
#  code in the sigmoidGradient.m file.
#

print "\nEvaluating Sigmoid Gradient function ...\n"

g = sigmoidGradient(array([-1, -0.5, 0, 0.5, 1]))
print "Sigmoid gradient evaluated at [1 -0.5 0 0.5 1]:  "
print g

raw_input('\nProgram paused. Press enter to continue!!!')

# ================================ Step 5: Implement Feedforward (Cost Function) ================================

print "\nChecking Cost Function without Regularization (Feedforward) ...\n"

lambd = 0.0
checkNNCost(lambd)

print 'This value should be about 2.09680198349'

raw_input('\nProgram paused. Press enter to continue!!!')
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, _lambda):
    
    # Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices
    # for our 2 layer neural network
    Theta1 = nn_params[0:hidden_layer_size * (input_layer_size + 1)].reshape(\
                 hidden_layer_size, input_layer_size + 1)
    
    Theta2 = nn_params[hidden_layer_size * (input_layer_size + 1):].reshape(\
                 num_labels, hidden_layer_size + 1)

    # Setup some useful variables
    m = len(y) # number of training examples

    # ====================== YOUR CODE HERE ======================
    # Instructions: You should complete the code by working through the
    #               following parts.
    #
    # Part 1: Feedforward the neural network and return the cost in the
    #         variable J. After implementing Part 1, you can verify that your
    #         cost function computation is correct by verifying the cost
    #         computed in ex4.m
    #
    # Part 2: Implement the backpropagation algorithm to compute the gradients
    #         Theta1_grad and Theta2_grad. You should return the partial derivatives of
    #         the cost function with respect to Theta1 and Theta2 in Theta1_grad and
    #         Theta2_grad, respectively. After implementing Part 2, you can check
    #         that your implementation is correct by running checkNNGradients
    #
    #         Note: The vector y passed into the function is a vector of labels
    #               containing values from 1..K. You need to map this vector into a 
    #               binary vector of 1's and 0's to be used with the neural network
    #               cost function.
    #
    #         Hint: We recommend implementing backpropagation using a for-loop
    #               over the training examples if you are implementing it for the 
    #               first time.
    #
    # Part 3: Implement regularization with the cost function and gradients.
    #
    #         Hint: You can implement this around the code for
    #               backpropagation. That is, you can compute the gradients for
    #               the regularization separately and then add them to Theta1_grad
    #               and Theta2_grad from Part 2.

    a1 = np.vstack((np.ones(m), X.T)).T
    a2 = sigmoid(np.dot(a1, Theta1.T))
    a2 = np.vstack((np.ones(m), a2.T)).T
    a3 = sigmoid(np.dot(a2, Theta2.T))
    y = np.tile((np.arange(num_labels)+1)%10,(m,1)) == np.tile(y,(1,num_labels))

    regTheta1 = Theta1[:,1:]
    regTheta2 = Theta2[:,1:]

    J = -np.sum( y * np.log(a3) + (1-y) * np.log(1-a3) ) / m + \
        _lambda * np.sum(regTheta1*regTheta1) / m/2 + \
        _lambda * np.sum(regTheta2*regTheta2) / m/2

    delta1 = np.zeros(Theta1.shape)
    delta2 = np.zeros(Theta2.shape)
    for i in range(m):
        a1_ = a1[i]; a2_ = a2[i]; a3_ = a3[i]
        d3 = a3_ - y[i]; d2 = np.dot(d3,Theta2) * sigmoidGradient(np.append(1,np.dot(a1_, Theta1.T)))
        delta1 = delta1 + np.dot(d2[1:].reshape(-1,1),a1_.reshape(1,-1)); 
        delta2 = delta2 + np.dot(d3.reshape(-1,1), a2_.reshape(1,-1))

    regTheta1 = np.vstack((np.zeros(Theta1.shape[0]), regTheta1.T)).T
    regTheta2 = np.vstack((np.zeros(Theta2.shape[0]), regTheta2.T)).T
    Theta1_grad = delta1 / m + _lambda * regTheta1 / m
    Theta2_grad = delta2 / m + _lambda * regTheta2 / m

    grad = np.append(Theta1_grad.flatten(), Theta2_grad.flatten())
    print('cost value: %lf'%J)
    
    return J, grad
Beispiel #27
0
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, _lambda):

    theta1 = nn_params[:(hidden_layer_size * (input_layer_size +1))].reshape(hidden_layer_size, (input_layer_size+1))
    theta2 = nn_params[(hidden_layer_size * (input_layer_size +1)):].reshape(num_labels, (hidden_layer_size+1))
    #print(theta1.shape)
    #print(theta2.shape)



    # Setup some useful variables
    m = X.shape[0]

    # You need to return the following variables correctly
    J = 0
    theta1_grad = np.zeros(np.size(theta1))
    theta2_grad = np.zeros(np.size(theta2))
    '''
    % ====================== YOUR CODE HERE ======================
    % Instructions: You should complete the code by working through the
    %               following parts.
    %
%  Part 1: Feedforward the neural network and return the cost in the
%         variable J. After implementing Part 1, you can verify that your
%         cost function computation is correct by verifying the cost
%         computed in ex4.m
%
% Part 2: Implement the backpropagation algorithm to compute the gradients
%         Theta1_grad and Theta2_grad. You should return the partial derivatives of
%         the cost function with respect to Theta1 and Theta2 in Theta1_grad and
%         Theta2_grad, respectively. After implementing Part 2, you can check
%         that your implementation is correct by running checkNNGradients
%
%         Note: The vector y passed into the function is a vector of labels
%               containing values from 1..K. You need to map this vector into a
%               binary vector of 1's and 0's to be used with the neural network
%               cost function.
%
%         Hint: We recommend implementing backpropagation using a for-loop
%               over the training examples if you are implementing it for the
%               first time.
%
% Part 3: Implement regularization with the cost function and gradients.
%
%         Hint: You can implement this around the code for
%               backpropagation. That is, you can compute the gradients for
%               the regularization separately and then add them to Theta1_grad
%               and Theta2_grad from Part 2.
%
    '''

    X = np.hstack((np.ones((m, 1)), X))


    #Calculating z2 5000 by 25
    #X 5000 401 Theta1 25 x 401 Theta2 10 x 26
    z2 = X.dot(theta1.T) # %5000 x 25
    a2 = sigmoid(z2)
    a2 = np.hstack((np.ones((a2.shape[0], 1)), a2))
    z3 = a2.dot(theta2.T)
    a3 = sigmoid(z3) #5000 x 10

    labels_transform = np.eye(a3.shape[1])
    y_new = labels_transform[y[:,0],:] #5000x x 10

    s = 0

    #costFuntions
    y_new_flat = y_new.reshape(1,-1)
    h_flat = a3.reshape(-1,1)
    s = (-y_new_flat).dot(np.log(h_flat))
    s = s-(1-y_new_flat).dot(np.log(1-h_flat))
    J = (s/m)[0][0]


    unbias_Theta1 = theta1[:, 1:theta1.shape[1]]
    unbias_Theta2 = theta2[:, 1:theta2.shape[1]]

    #regularizing cost function

    regularizator_cost = _lambda/(2*m)*(sum(sum(unbias_Theta1**2))+sum(sum(unbias_Theta2**2)))

    J=s/m+regularizator_cost


    delta3 = a3-y_new # 5000 x 10
    delta2 = delta3.dot(theta2) #5000 x 26
    delta2 = delta2[:, 1:delta2.shape[1]] #%5000 x 25

    delta2 = delta2 * sigmoidGradient(z2) #5000 x 25

    DEL1 = 0
    DEL2 = 0


    DEL1 = delta2.T.dot(X) # 25 X 401 - ok
    DEL2  = delta3.T.dot(a2)#; % 10 X 26   vs Theta 10 x 26
    DEL1 = DEL1/m
    DEL2  = DEL2/m

    theta1_regul = np.zeros((unbias_Theta1.shape[0],1))
    theta1_regul = np.hstack((theta1_regul, unbias_Theta1))
    theta2_regul = np.zeros((unbias_Theta2.shape[0],1))
    theta2_regul = np.hstack((theta2_regul, unbias_Theta2))



    theta1_grad = DEL1+(_lambda/m)*theta1_regul
    theta2_grad = DEL2+(_lambda/m)*theta2_regul




    grad = np.hstack((theta1_grad.flatten(), theta2_grad.flatten()))
    #print(J[0][0])
    return J[0][0], grad
Beispiel #28
0
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels,
                   X, y, Lambda):
    """computes the cost and gradient of the neural network. The
	parameters for the neural network are "unrolled" into the vector
	nn_params and need to be converted back into the weight matrices.
	The returned parameter grad should be a "unrolled" vector of the
	partial derivatives of the neural network.
		"""
    # Obtain Theta1 and Theta2 back from nn_params
    # hidden_layer_size =25, input_layer_size = 400
    #										25, 401
    Theta1 = nn_params[0:(hidden_layer_size * (input_layer_size + 1))].reshape(
        hidden_layer_size, (input_layer_size + 1))
    Theta2 = nn_params[(hidden_layer_size * (input_layer_size + 1)):].reshape(
        num_labels, (hidden_layer_size + 1))
    m, _ = X.shape

    # Part 1: Feedforward the neural network and return the cost in the
    #         variable J. After implementing Part 1, you can verify that your
    #         cost function computation is correct by verifying the cost computed in ex4.m
    a1 = X  # 5000 * 400
    a1_ = np.column_stack((np.ones((m, 1)), X))  # 5000 * 401
    z2 = Theta1.dot(a1_.T)  # Theta1 shape: (25, 401) -->  25 * 5000
    a2 = np.column_stack(
        (np.ones((z2.T.shape[0], 1)),
         sigmoid(z2.T)))  # (5000, 26) # after sigmoid you add 1's
    z3 = Theta2.dot(a2.T)  # Theta2 shape: (10, 26) -->
    a3 = sigmoid(z3)  #  (10, 5000)
    a3_ = a3.T  # (5000, 10)

    y_ = np.zeros((X.shape[0], 10))  # (5000, 10)
    for i in xrange(m):
        y_[i][y[i] - 1] = 1  # index 9 is zero.

    J = 1.0/m * np.sum(  np.sum( np.multiply( -y_, np.log(a3_) ) - np.multiply(1-y_, np.log(1-a3_)) , 0) )\
     +Lambda/(2.0*m)* ( np.sum( np.square(Theta1[:,1:])) + np.sum( np.square(Theta2[:,1:]))     )
    #								 5000*10,  5000*10

    # Part 2: Implement the backpropagation algorithm to compute the gradients
    #         Theta1_grad and Theta2_grad. You should return the partial derivatives of
    #         the cost function with respect to Theta1 and Theta2 in Theta1_grad and
    #         Theta2_grad, respectively. After implementing Part 2, you can check
    #         that your implementation is correct by running checkNNGradients
    #         Hint: We recommend implementing backpropagation using a for-loop
    #               over the training examples if you are implementing it for the
    #               first time.

    #Note: the Delta matrices should include the bias unit
    #The Delta matrices have the same shape as the theta matrices
    Delta1 = np.zeros((hidden_layer_size, input_layer_size + 1))
    Delta2 = np.zeros((num_labels, hidden_layer_size + 1))

    # Loop over the training points (rows in myX, already contain bias unit)
    for irow in xrange(m):
        myrow = a1_[irow]
        a1 = myrow.reshape((input_layer_size + 1, 1))
        # propagateForward returns (zs, activations) for each layer excluding the input layer
        temp = propagateForward(myrow, [Theta1, Theta2])
        z2 = temp[0][0]
        a2 = temp[0][1]
        z3 = temp[1][0]
        a3 = temp[1][1]
        delta3 = a3 - y_[irow].reshape(a3.shape[0], 1)
        delta2 = Theta2.T[1:, :].dot(delta3) * sigmoidGradient(
            z2)  #remove 0th element
        a2 = np.insert(a2, 0, 1, axis=0)
        Delta1 += delta2.dot(a1.T)  #(25,1)x(1,401) = (25,401) (correct)
        Delta2 += delta3.dot(a2.T)  #(10,1)x(1,25) = (10,25) (should be 10,26)

    D1 = Delta1 / float(m)
    D2 = Delta2 / float(m)

    #Regularization:
    D1[:, 1:] = D1[:, 1:] + (float(Lambda) / m) * Theta1[:, 1:]
    D2[:, 1:] = D2[:, 1:] + (float(Lambda) / m) * Theta2[:, 1:]
    """Vectorized version
		d3 = a3_ - y_ # 5000x10
		d2 = np.dot(Theta2[:,1:].T, d3.T ) * sigmoidGradient(z2) 
		  # 25x10 *10x5000 * 25x5000 = 25x5000
		
		#why isn't this theta1 dot delta2?
		delta1 = d2.dot(a1) # 25x5000 * 5000x401 = 25x401 
		delta2 = d3.T.dot(a2) # 10x5000 *5000x26 = 10x26
		
		theta1_ = np.c_[np.ones((theta1.shape[0],1)),theta1[:,1:]]
		theta2_ = np.c_[np.ones((theta2.shape[0],1)),theta2[:,1:]]
		
		theta1_grad = delta1/m + (theta1_*reg)/m
		theta2_grad = delta2/m + (theta2_*reg)/m
		"""
    # Unroll gradient
    # grad = np.hstack((Theta1_grad.T.ravel(), Theta2_grad.T.ravel()))

    # Part 3: Implement regularization with the cost function and gradients.
    #         Hint: You can implement this around the code for
    #               backpropagation. That is, you can compute the gradients for
    #               the regularization separately and then add them to Theta1_grad
    #               and Theta2_grad from Part 2.
    return J, flattenParams([D1, D2], input_layer_size, hidden_layer_size,
                            num_labels).flatten()
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels,
                   X, y, lamda):
    # Reshape nn_params back into the parameters Theta1 and Theta2,
    # the weight matrices for our 2 layer neural network
    Theta1 = nn_params[:hidden_layer_size * (input_layer_size + 1)].reshape(
        (hidden_layer_size, input_layer_size + 1), order='F')
    Theta2 = nn_params[hidden_layer_size * (input_layer_size + 1):].reshape(
        (num_labels, hidden_layer_size + 1), order='F')

    m = X.shape[0]
    J = 0
    Theta1_grad = np.zeros(Theta1.shape)
    Theta2_grad = np.zeros(Theta2.shape)

    X = np.hstack((np.ones((m, 1)), X))
    yv = np.zeros((m, num_labels))
    for i in range(m):
        yv[i, y[i][0] - 1] = 1

    # Part 1: Feedforward the neural network and return the cost in the
    #         variable J. After implementing Part 1, you can verify that your
    #         cost function computation is correct by verifying the cost
    #         computed in ex4.py
    a1 = X
    a2 = np.hstack((np.ones((m, 1)), sigmoid(a1.dot(Theta1.T))))
    a3 = sigmoid(a2.dot(Theta2.T))

    for i in range(m):
        J += (-yv[i, :] * np.log(a3[i, :]) -
              (1 - yv[i, :]) * np.log(1 - a3[i, :])).sum()
    J /= m
    J += ((Theta1[:, 1:]**2).sum() + (Theta2[:, 1:]**2).sum()) * lamda / 2 / m

    # Part 2: Implement the backpropagation algorithm to compute the gradients
    #         Theta1_grad and Theta2_grad. You should return the partial derivatives of
    #         the cost function with respect to Theta1 and Theta2 in Theta1_grad and
    #         Theta2_grad, respectively. After implementing Part 2, you can check
    #         that your implementation is correct by running checkNNGradients
    #
    #         Note: The vector y passed into the function is a vector of labels
    #               containing values from 1..K. You need to map this vector into a
    #               binary vector of 1's and 0's to be used with the neural network
    #               cost function.
    #
    #         Hint: We recommend implementing backpropagation using a for-loop
    #               over the training examples if you are implementing it for the
    #               first time.
    for i in range(m):
        a1 = X[i:i + 1, :].T
        z2 = Theta1.dot(a1)
        a2 = np.vstack(([1], sigmoid(z2)))
        z3 = Theta2.dot(a2)
        a3 = sigmoid(z3)

        delta3 = a3 - yv[i:i + 1, :].T
        delta2 = Theta2.T.dot(delta3) * np.vstack(([1], sigmoidGradient(z2)))

        Theta1_grad += delta2[1:, :].dot(a1.T)
        Theta2_grad += delta3.dot(a2.T)

    Theta1_grad /= m
    Theta2_grad /= m

    # Part 3: Implement regularization with the cost function and gradients.
    #
    #         Hint: You can implement this around the code for
    #               backpropagation. That is, you can compute the gradients for
    #               the regularization separately and then add them to Theta1_grad
    #               and Theta2_grad from Part 2.
    Theta1_grad[:, 1:] += lamda / m * Theta1[:, 1:]
    Theta2_grad[:, 1:] += lamda / m * Theta2[:, 1:]

    # Unroll gradients
    grad = np.hstack((Theta1_grad.T.ravel(), Theta2_grad.T.ravel()))

    return J, grad
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels,
                   X, y, lamda):
    # Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices
    # for our 2 layer neural network
    Theta1 = nn_params[:hidden_layer_size * (input_layer_size + 1)].reshape(
        hidden_layer_size, input_layer_size + 1)
    Theta2 = nn_params[hidden_layer_size * (input_layer_size + 1):].reshape(
        num_labels, hidden_layer_size + 1)

    m = X.shape[0]

    # ====================== YOUR CODE HERE ======================
    # Instructions: You should complete the code by working through the
    #               following parts.
    #
    # Part 1: Feedforward the neural network and return the cost in the
    #         variable J. After implementing Part 1, you can verify that your
    #         cost function computation is correct by verifying the cost
    #         computed in ex4.m
    #
    # Part 2: Implement the backpropagation algorithm to compute the gradients
    #         Theta1_grad and Theta2_grad. You should return the partial derivatives of
    #         the cost function with respect to Theta1 and Theta2 in Theta1_grad and
    #         Theta2_grad, respectively. After implementing Part 2, you can check
    #         that your implementation is correct by running checkNNGradients
    #
    #         Note: The vector y passed into the function is a vector of labels
    #               containing values from 1..K. You need to map this vector into a
    #               binary vector of 1's and 0's to be used with the neural network
    #               cost function.
    #
    #         Hint: We recommend implementing backpropagation using a for-loop
    #               over the training examples if you are implementing it for the
    #               first time.
    #
    # Part 3: Implement regularization with the cost function and gradients.
    #
    #         Hint: You can implement this around the code for
    #               backpropagation. That is, you can compute the gradients for
    #               the regularization separately and then add them to Theta1_grad
    #               and Theta2_grad from Part 2.
    #
    a1 = np.c_[np.ones(m), X]  # 加一列:bias
    z2 = a1.dot(Theta1.T)
    a2 = np.c_[np.ones(m), expit(z2)]  # 加一列:bias
    z3 = a2.dot(Theta2.T)
    h = expit(z3)
    a3 = h.T
    # print("h(x) shape:", h.shape)

    # 首先把原先label表示的y变成向量模式的output
    y_vec = np.zeros((num_labels, m))
    for i in range(m):
        y_vec[y[i][0] - 1][i] = 1

    #每一training example的cost function是使用的向量计算,然后for loop累加所有m个training example
    #的cost function
    J = 0
    for i in range(m):
        J += np.log(h[i, :]).dot(
            y_vec[:, i]) + np.log(1 - h[i, :]).dot(1 - y_vec[:, i])

    J = -J / m

    # 梯度
    delta3 = a3 - y_vec
    delta2 = Theta2[:, 1:].T.dot(delta3) * sigmoidGradient(z2).T
    D2 = delta3.dot(a2)
    D1 = delta2.dot(a1)

    D2 /= m
    D1 /= m
    # print("delta3", delta3.shape, "delta2:", delta2.shape, "D2:", D2.shape, "D1", D1.shape)

    # 加入正则项 (regularization)
    t1 = Theta1[:, 1:]
    t2 = Theta2[:, 1:]
    J += lamda * 0.5 / m * (np.sum(np.square(t1)) + np.sum(np.square(t2)))

    D2[:, 1:] = D2[:, 1:] + lamda * t2 / m
    D1[:, 1:] = D1[:, 1:] + lamda * t1 / m

    return J, unrollParams([D1, D2])
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, \
	num_labels, X, y, lambda_reg):
    #NNCOSTFUNCTION Implements the neural network cost function for a two layer
    #neural network which performs classification
    #   [J grad] = NNCOSTFUNCTON(nn_params, hidden_layer_size, num_labels, ...
    #   X, y, lambda) computes the cost and gradient of the neural network. The
    #   parameters for the neural network are "unrolled" into the vector
    #   nn_params and need to be converted back into the weight matrices. 
    # 
    #   The returned parameter grad should be a "unrolled" vector of the
    #   partial derivatives of the neural network.


    # Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices
    # for our 2 layer neural network
    Theta1 = np.reshape(nn_params[:hidden_layer_size * (input_layer_size + 1)], \
                     (hidden_layer_size, input_layer_size + 1), order='F')

    Theta2 = np.reshape(nn_params[hidden_layer_size * (input_layer_size + 1):], \
                     (num_labels, hidden_layer_size + 1), order='F')

    # Setup some useful variables
    m = len(X)
             
    # # You need to return the following variables correctly 
    J = 0;
    Theta1_grad = np.zeros( Theta1.shape )
    Theta2_grad = np.zeros( Theta2.shape )

    # ====================== YOUR CODE HERE ======================
    # Instructions: You should complete the code by working through the
    #               following parts.
    #
    # Part 1: Feedforward the neural network and return the cost in the
    #         variable J. After implementing Part 1, you can verify that your
    #         cost function computation is correct by verifying the cost
    #         computed in ex4.m
    #
    # Part 2: Implement the backpropagation algorithm to compute the gradients
    #         Theta1_grad and Theta2_grad. You should return the partial derivatives of
    #         the cost function with respect to Theta1 and Theta2 in Theta1_grad and
    #         Theta2_grad, respectively. After implementing Part 2, you can check
    #         that your implementation is correct by running checkNNGradients
    #
    #         Note: The vector y passed into the function is a vector of labels
    #               containing values from 1..K. You need to map this vector into a 
    #               binary vector of 1's and 0's to be used with the neural network
    #               cost function.
    #
    #         Hint: We recommend implementing backpropagation using a for-loop
    #               over the training examples if you are implementing it for the 
    #               first time.
    #
    # Part 3: Implement regularization with the cost function and gradients.
    #
    #         Hint: You can implement this around the code for
    #               backpropagation. That is, you can compute the gradients for
    #               the regularization separately and then add them to Theta1_grad
    #               and Theta2_grad from Part 2.
    #

    # add column of ones as bias unit from input layer to second layer
    X = np.column_stack((np.ones((m,1)), X)) # = a1

    # calculate second layer as sigmoid( z2 ) where z2 = Theta1 * a1
    a2 = s.sigmoid( np.dot(X,Theta1.T) )

    # add column of ones as bias unit from second layer to third layer
    a2 = np.column_stack((np.ones((a2.shape[0],1)), a2))

    # calculate third layer as sigmoid ( z3 ) where z3 = Theta2 * a2
    a3 = s.sigmoid( np.dot(a2,Theta2.T) )

    #%% COST FUNCTION CALCULATION

    #% NONREGULARIZED COST FUNCTION

    # recode labels as vectors containing only values 0 or 1
    labels = y
    # set y to be matrix of size m x k
    y = np.zeros((m,num_labels))
    # for every label, convert it into vector of 0s and a 1 in the appropriate position
    for i in xrange(m):
    	y[i, labels[i]-1] = 1

    # at this point, both a3 and y are m x k matrices, where m is the number of inputs
    # and k is the number of hypotheses. Given that the cost function is a sum
    # over m and k, loop over m and in each loop, sum over k by doing a sum over the row

    cost = 0
    for i in xrange(m):
    	cost += np.sum( y[i] * np.log( a3[i] ) + (1 - y[i]) * np.log( 1 - a3[i] ) )

    J = -(1.0/m)*cost

    #% REGULARIZED COST FUNCTION
    # note that Theta1[:,1:] is necessary given that the first column corresponds to transitions
    # from the bias terms, and we are not regularizing those parameters. Thus, we get rid
    # of the first column.

    sumOfTheta1 = np.sum(np.sum(Theta1[:,1:]**2))
    sumOfTheta2 = np.sum(np.sum(Theta2[:,1:]**2))

    J = J + ( (lambda_reg/(2.0*m))*(sumOfTheta1+sumOfTheta2) )

    #%% BACKPROPAGATION

    bigDelta1 = 0
    bigDelta2 = 0

    # for each training example
    for t in xrange(m):


    	## step 1: perform forward pass
    	# set lowercase x to the t-th row of X
    	x = X[t]
    	# note that uppercase X already included column of ones 
    	# as bias unit from input layer to second layer, so no need to add it

        # calculate second layer as sigmoid( z2 ) where z2 = Theta1 * a1
        a2 = s.sigmoid( np.dot(x,Theta1.T) )

        # add column of ones as bias unit from second layer to third layer
        a2 = np.concatenate((np.array([1]), a2))
        # calculate third layer as sigmoid ( z3 ) where z3 = Theta2 * a2
        a3 = s.sigmoid( np.dot(a2,Theta2.T) )


    	## step 2: for each output unit k in layer 3, set delta_{k}^{(3)}
    	delta3 = np.zeros((num_labels))

    	# see handout for more details, but y_k indicates whether  
    	# the current training example belongs to class k (y_k = 1), 
    	# or if it belongs to a different class (y_k = 1)
    	for k in xrange(num_labels):
            y_k = y[t, k]
            delta3[k] = a3[k] - y_k

    	## step 3: for the hidden layer l=2, set delta2 = Theta2' * delta3 .* sigmoidGradient(z2)
    	# note that we're skipping delta2_0 (=gradients of bias units, which we don't use here)
    	# by doing (Theta2(:,2:end))' instead of Theta2'
    	delta2 = (np.dot(Theta2[:,1:].T, delta3).T) * sg.sigmoidGradient( np.dot(x, Theta1.T) )

    	## step 4: accumulate gradient from this example
    	# accumulation
        # note that 
        #   delta2.shape = 
        #   x.shape      = 
        #   delta3.shape = 
        #   a2.shape     =
        # np.dot(delta2,x) and np.dot(delta3,a2) don't do outer product
        # could do e.g. np.dot(delta2[:,None], x[None,:])
        # seems faster to do np.outer(delta2, x)
        # solution from http://stackoverflow.com/a/22950320/583834 
    	bigDelta1 += np.outer(delta2, x)
    	bigDelta2 += np.outer(delta3, a2)


    # step 5: obtain gradient for neural net cost function by dividing the accumulated gradients by m
    Theta1_grad = bigDelta1 / m
    Theta2_grad = bigDelta2 / m

    #% REGULARIZATION FOR GRADIENT
    # only regularize for j >= 1, so skip the first column
    Theta1_grad_unregularized = np.copy(Theta1_grad)
    Theta2_grad_unregularized = np.copy(Theta2_grad)
    Theta1_grad += (float(lambda_reg)/m)*Theta1
    Theta2_grad += (float(lambda_reg)/m)*Theta2
    Theta1_grad[:,0] = Theta1_grad_unregularized[:,0]
    Theta2_grad[:,0] = Theta2_grad_unregularized[:,0]

    # # -------------------------------------------------------------

    # # =========================================================================

    # Unroll gradients
    grad = np.concatenate((Theta1_grad.reshape(Theta1_grad.size, order='F'), Theta2_grad.reshape(Theta2_grad.size, order='F')))

    return J, grad
Beispiel #32
0
def ex4():
    ## Machine Learning Online Class - Exercise 4 Neural Network Learning

    #  Instructions
    #  ------------
    #
    #  This file contains code that helps you get started on the
    #  linear exercise. You will need to complete the following functions
    #  in this exericse:
    #
    #     sigmoidGradient.m
    #     randInitializeWeights.m
    #     nnCostFunction.m
    #
    #  For this exercise, you will not need to change any code in this file,
    #  or any other files other than those mentioned above.
    #

    ## Initialization
    #clear ; close all; clc

    ## Setup the parameters you will use for this exercise
    input_layer_size = 400  # 20x20 Input Images of Digits
    hidden_layer_size = 25  # 25 hidden units
    num_labels = 10  # 10 labels, from 1 to 10
    # (note that we have mapped "0" to label 10)

    ## =========== Part 1: Loading and Visualizing Data =============
    #  We start the exercise by first loading and visualizing the dataset.
    #  You will be working with a dataset that contains handwritten digits.
    #

    # Load Training Data
    print('Loading and Visualizing Data ...')

    mat = scipy.io.loadmat('ex4data1.mat')
    X = mat['X']
    y = mat['y'].ravel()
    m = X.shape[0]

    # Randomly select 100 data points to display
    sel = np.random.choice(m, 100, replace=False)

    displayData(X[sel, :])
    plt.savefig('figure1.png')

    print('Program paused. Press enter to continue.')
    #pause;

    ## ================ Part 2: Loading Parameters ================
    # In this part of the exercise, we load some pre-initialized
    # neural network parameters.

    print('\nLoading Saved Neural Network Parameters ...')

    # Load the weights into variables Theta1 and Theta2
    mat = scipy.io.loadmat('ex4weights.mat')
    Theta1 = mat['Theta1']
    Theta2 = mat['Theta2']

    # Unroll parameters
    nn_params = np.concatenate([Theta1.ravel(), Theta2.ravel()])

    ## ================ Part 3: Compute Cost (Feedforward) ================
    #  To the neural network, you should first start by implementing the
    #  feedforward part of the neural network that returns the cost only. You
    #  should complete the code in nnCostFunction.m to return cost. After
    #  implementing the feedforward to compute the cost, you can verify that
    #  your implementation is correct by verifying that you get the same cost
    #  as us for the fixed debugging parameters.
    #
    #  We suggest implementing the feedforward cost *without* regularization
    #  first so that it will be easier for you to debug. Later, in part 4, you
    #  will get to implement the regularized cost.
    #
    print('\nFeedforward Using Neural Network ...')

    # Weight regularization parameter (we set this to 0 here).
    lambda_value = 0

    J = nnCostFunction(nn_params, input_layer_size, hidden_layer_size,
                       num_labels, X, y, lambda_value)[0]

    print(
        'Cost at parameters (loaded from ex4weights): %f \n(this value should be about 0.287629)'
        % J)

    print('\nProgram paused. Press enter to continue.')
    #pause;

    ## =============== Part 4: Implement Regularization ===============
    #  Once your cost function implementation is correct, you should now
    #  continue to implement the regularization with the cost.
    #

    print('\nChecking Cost Function (w/ Regularization) ... ')

    # Weight regularization parameter (we set this to 1 here).
    lambda_value = 1

    J = nnCostFunction(nn_params, input_layer_size, hidden_layer_size,
                       num_labels, X, y, lambda_value)[0]

    print(
        'Cost at parameters (loaded from ex4weights): %f \n(this value should be about 0.383770)'
        % J)

    print('Program paused. Press enter to continue.')
    #pause;

    ## ================ Part 5: Sigmoid Gradient  ================
    #  Before you start implementing the neural network, you will first
    #  implement the gradient for the sigmoid function. You should complete the
    #  code in the sigmoidGradient.m file.
    #

    print('\nEvaluating sigmoid gradient...')

    g = sigmoidGradient(np.array([1, -0.5, 0, 0.5, 1]))
    print('Sigmoid gradient evaluated at [1 -0.5 0 0.5 1]:')
    print(formatter('%f ', g))
    print('\n')

    print('Program paused. Press enter to continue.')
    #pause;

    ## ================ Part 6: Initializing Pameters ================
    #  In this part of the exercise, you will be starting to implment a two
    #  layer neural network that classifies digits. You will start by
    #  implementing a function to initialize the weights of the neural network
    #  (randInitializeWeights.m)

    print('\nInitializing Neural Network Parameters ...')

    initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size)
    initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels)

    # Unroll parameters
    initial_nn_params = np.concatenate(
        [initial_Theta1.ravel(),
         initial_Theta2.ravel()])

    ## =============== Part 7: Implement Backpropagation ===============
    #  Once your cost matches up with ours, you should proceed to implement the
    #  backpropagation algorithm for the neural network. You should add to the
    #  code you've written in nnCostFunction.m to return the partial
    #  derivatives of the parameters.
    #
    print('\nChecking Backpropagation... ')

    #  Check gradients by running checkNNGradients
    checkNNGradients()

    print('\nProgram paused. Press enter to continue.')
    #pause;

    ## =============== Part 8: Implement Regularization ===============
    #  Once your backpropagation implementation is correct, you should now
    #  continue to implement the regularization with the cost and gradient.
    #

    print('\nChecking Backpropagation (w/ Regularization) ... ')

    #  Check gradients by running checkNNGradients
    lambda_value = 3
    checkNNGradients(lambda_value)

    # Also output the costFunction debugging values
    debug_J = nnCostFunction(nn_params, input_layer_size, hidden_layer_size,
                             num_labels, X, y, lambda_value)[0]

    print(
        '\n\nCost at (fixed) debugging parameters (w/ lambda = 10): %f \n(this value should be about 0.576051)\n\n'
        % debug_J)

    print('Program paused. Press enter to continue.')
    #pause;

    ## =================== Part 8: Training NN ===================
    #  You have now implemented all the code necessary to train a neural
    #  network. To train your neural network, we will now use "fmincg", which
    #  is a function which works similarly to "fminunc". Recall that these
    #  advanced optimizers are able to train our cost functions efficiently as
    #  long as we provide them with the gradient computations.
    #
    print('\nTraining Neural Network... ')

    #  After you have completed the assignment, change the MaxIter to a larger
    #  value to see how more training helps.
    options = {'maxiter': 50}

    #  You should also try different values of lambda
    lambda_value = 1

    # Create "short hand" for the cost function to be minimized
    costFunction = lambda p: nnCostFunction(
        p, input_layer_size, hidden_layer_size, num_labels, X, y, lambda_value)

    # Now, costFunction is a function that takes in only one argument (the
    # neural network parameters)
    res = optimize.minimize(costFunction,
                            initial_nn_params,
                            jac=True,
                            method='TNC',
                            options=options)
    nn_params = res.x

    # Obtain Theta1 and Theta2 back from nn_params
    Theta1 = nn_params[0:hidden_layer_size * (input_layer_size + 1)].reshape(
        hidden_layer_size, input_layer_size + 1)

    Theta2 = nn_params[hidden_layer_size * (input_layer_size + 1):].reshape(
        num_labels, hidden_layer_size + 1)

    print('Program paused. Press enter to continue.')
    #pause;

    ## ================= Part 9: Visualize Weights =================
    #  You can now "visualize" what the neural network is learning by
    #  displaying the hidden units to see what features they are capturing in
    #  the data.

    print('\nVisualizing Neural Network... ')

    displayData(Theta1[:, 1:])
    plt.savefig('figure2.png')

    print('\nProgram paused. Press enter to continue.')
    #pause;

    ## ================= Part 10: Implement Predict =================
    #  After training the neural network, we would like to use it to predict
    #  the labels. You will now implement the "predict" function to use the
    #  neural network to predict the labels of the training set. This lets
    #  you compute the training set accuracy.

    pred = predict(Theta1, Theta2, X)

    print('\nTraining Set Accuracy: %f' % (np.mean(
        (pred == y).astype(int)) * 100))
Beispiel #33
0
print(
    'Cost at parameters (loaded from ex4weights): %f \n(this value should be about 0.383770)'
    % J)

input("Program paused. Press Enter to continue...")

## ================ Part 5: Sigmoid Gradient  ================
#  Before you start implementing the neural network, you will first
#  implement the gradient for the sigmoid function. You should complete the
#  code in the sigmoidGradient.m file.
#

print('Evaluating sigmoid gradient...')

g = sigmoidGradient(np.array([1, -0.5, 0, 0.5, 1]))
print('Sigmoid gradient evaluated at [1 -0.5 0 0.5 1]: ')
print(g)

input("Program paused. Press Enter to continue...")

## ================ Part 6: Initializing Pameters ================
#  In this part of the exercise, you will be starting to implment a two
#  layer neural network that classifies digits. You will start by
#  implementing a function to initialize the weights of the neural network
#  (randInitializeWeights.m)

print('Initializing Neural Network Parameters ...')

initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size)
initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels)
Beispiel #34
0
# Weight regularization parameter

lambda_param = 1

J, grad = nnCostFunction(nn_params, input_layer_size, hidden_layer_size,
                         num_labels, X, y, lambda_param)

print('Cost at parameters (loaded from ex4weights): {:.6f} '.format(float(J)))
print('\n(this value should be about 0.383770)\n')

input('Program paused. Press enter to continue.\n')

# ================ Part 5: Sigmoid Gradient  ================
print('\nEvaluating sigmoid gradient...\n')

g = sigmoidGradient([-1, -0.5, 0, 0.5, 1])

print('Sigmoid gradient evaluated at [-1 -0.5 0 0.5 1]:\n  ')
print('{}'.format(g))
print('\n\n')

input('Program paused. Press enter to continue.\n')

# ================ Part 6: Initializing Pameters ================
# Implment a two layer neural network that classifies digits.
# Start by implementing a function to initialize the weights of the neural network

print('\nInitializing Neural Network Parameters ...\n')

initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size)
initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels)
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size,
                   num_labels, X, y, MLlambda):
    ''' Some comments.
%NNCOSTFUNCTION Implements the neural network cost function for a two layer
%neural network which performs classification
% [J grad] = NNCOSTFUNCTON(nn_params, hidden_layer_size, num_labels, MLlambda) 
%   computes the cost and gradient of the neural network. The
%   parameters for the neural network are "unrolled" into the vector
%   nn_params and need to be converted back into the weight matrices. 
% 
%   The returned parameter grad should be a "unrolled" vector of the
%   partial derivatives of the neural network.
%
'''
    # make sure all further math with MLlambda is done as float,
    #  sometimes caller sets MLlambda to be an int
    MLlambda = float(MLlambda)  

    Theta1 = np.reshape(nn_params[:hidden_layer_size * (input_layer_size + 1)],
               (hidden_layer_size, (input_layer_size + 1)), 
                order = 'F')

    Theta2 = np.reshape(nn_params[hidden_layer_size * (input_layer_size + 1):], 
               (num_labels, (hidden_layer_size + 1)), 
               order = 'F')  

    # % Setup some useful variables, num examples and features
    m, n = np.shape(X) 
         
    #% You need to return the following variables correctly 
    J = 0;
    Theta1_grad = np.zeros(np.shape(Theta1))
    Theta2_grad = np.zeros(np.shape(Theta2))

    #% Compute Cost of feed forward.

    # create a 10x10 eye matrix of ones.
    y_eye = np.eye(num_labels)
    y_matrix = y_eye[y[:,0], :] # y_matrix = y_eye(y, :); 

    # % Calculate cost. Assuming a 3 layer neural network. 
    # % Add ones column to the X data matrix
    a1 = np.c_[np.ones(m), X]

    #% Calculate a2 outputs for hidden layer
    z2 = np.dot(a1 , Theta1.transpose())   # m X 25
    a2 = sigmoid(z2)    # m x 25
    a2 = np.c_[np.ones(m), a2] # add a0 = 1, column of 1's -? m x 26

    z3 = np.dot(a2, Theta2.transpose())  # m x 10
    a3 = sigmoid(z3)  # m x 10

    hox = a3

    Inner_J = -y_matrix*np.log(hox) - (1 - y_matrix)*np.log(1 - hox)

    J_wo_reg = np.sum(Inner_J)/m #J_wo_reg = sum(sum(Inner_J))/m;


    #% Calculate Regularization portion
    Theta1_no_bias = Theta1[:, 1:]
    Theta2_no_bias = Theta2[:, 1:]

    Theta1_no_bias_squared = np.square(Theta1_no_bias)
    Theta2_no_bias_squared = np.square(Theta2_no_bias)

    reg = (float(MLlambda)/(2*m)) * ( sum(sum(Theta1_no_bias_squared)) + 
                                      sum(sum(Theta2_no_bias_squared)))
    J = J_wo_reg + reg 


#% ***************************************************************
#% ************************* PART 2 ******************************
#% ***************************************************************

    #% Calculate the gradients
    #% Assuming a 3 layer network.

    #% STEP 1: Calculate error at level 3: d3
    d3 = a3 - y_matrix

    #% STEP 2: Calculate error at Level 2: d2
    siggrad_z2 = sigmoidGradient(z2) 
    # % NOTE: a'b = ba
    # d2 = (d3 * Theta2_no_bias).*siggrad_z2;
    d2 = np.dot(d3,Theta2_no_bias) * siggrad_z2

    #% STEP 3: Calculate Delta's:  Delta1 & Delta2 (ie the triangles)
    #% Note, have already removed bias unit in Delta1 prior, as 
    #%  d2 was computed with Theta2 with bias removed.
    Delta1 = np.dot(d2.transpose(), a1)
    Delta2 = np.dot(d3.transpose(), a2)  

    #% Calculate the back prop gradients.
    Theta1_grad = (1./m)* Delta1
    Theta2_grad = (1./m)* Delta2 

    ''' 
% ***************************************************************
% ************************* PART 3 ******************************
% ***************************************************************

% Calculate regularization component of the gradient.
%  Theta1 and Theta2 include the bias components, but to 
%  calculate the regularization, we do not want to include
%  the bias. So we zero out the bias columns, so it will have
%  no impact when we add it to the gradient that was calculated 
%  above (e.g. without regularization). But we want to keep the
%  matrix sizes the same so we can do the additions using vector
%  or matrix math.
'''

    #% Zero out the bias unit in Theta1
    Theta1_bias_zero = np.copy(Theta1)
    Theta1_bias_zero[:, 0] = 0

    #% Zero out the bias unit in Theta2
    Theta2_bias_zero = np.copy(Theta2)
    Theta2_bias_zero[:, 0] = 0

    #% Scale Theta's by lambda/m 
    Theta1_reg = (MLlambda/m ) * Theta1_bias_zero
    Theta2_reg = (MLlambda/m ) * Theta2_bias_zero

    #% Add regularization component to the gradients
    Theta1_grad = Theta1_grad + Theta1_reg
    Theta2_grad = Theta2_grad + Theta2_reg

    #% Unroll gradients
    #grad = [Theta1_grad(:) ; Theta2_grad(:)];
    grad = np.hstack((Theta1_grad.ravel(order='F'), Theta2_grad.ravel(order='F')))


    # JKMM pause for debug
    #print("JKMM Program paused in nnCostFunction. Press Ctrl-D to continue.\n")
    #code.interact(local=dict(globals(), **locals()))
    #print(" ... continuing\n ")  

    return J, grad
Beispiel #36
0
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, reg_lambda, 
                   returnOnlyGrad = None, returnOnlyCost = None, flattenResult=None):
    
    """Implements the neural network cost function for a two layer
       neural network which performs classification
       [J grad] = NNCOSTFUNCTON(nn_params, hidden_layer_size, num_labels, ...
       X, y, lambda) computes the cost and gradient of the neural network. The
       parameters for the neural network are "unrolled" into the vector
       nn_params and need to be converted back into the weight matrices. 
     
       The returned parameter grad should be a "unrolled" vector of the
       partial derivatives of the neural network.
    """
    
    # Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices
    # for our 2 layer neural network
    Theta1 = np.reshape(nn_params[0:hidden_layer_size * (input_layer_size + 1)], (hidden_layer_size, input_layer_size + 1))
    Theta2 = np.reshape(nn_params[(hidden_layer_size * (input_layer_size + 1)):], (num_labels, hidden_layer_size + 1))
                
    # Setup some useful variables
    m = np.shape(X)[0]
        

    # Part 1: Feedforward the neural network and return the cost in the 
    #         variable J. After implementing Part 1, you can verify that your
    #         cost function computation is correct by verifying the cost computed in ex4.py
    
    # Explode each row in y into 10 dimension vector
    # recode y to Y
    Y = np.zeros((m, num_labels))
    
    for i in range(m):
      Y[i, y[i, 0]]= 1
    
    # 1. Feed-forward to compute h = a3.
    a1 = np.c_[np.ones((m, 1)), X]
    z2 = a1.dot(Theta1.T)
    a2 = np.c_[np.ones((z2.shape[0], 1)), sigmoid(z2)]
    z3 = a2.dot(Theta2.T)
    a3 = sigmoid(z3)
    h = a3
    
    
    J = np.sum(np.sum((-Y) * np.log(h) - (1-Y) * np.log(1-h), 1)) / m
    
    #
    # Part 2: Implement the backpropagation algorithm to compute the gradients
    #         Theta1_grad and Theta2_grad. You should return the partial derivatives of
    #         the cost function with respect to Theta1 and Theta2 in Theta1_grad and
    #         Theta2_grad, respectively. After implementing Part 2, you can check
    #         that your implementation is correct by running checkNNGradients
    
    sigma3 = h - Y
    sigma2 = (sigma3.dot(Theta2)) * sigmoidGradient(np.c_[np.ones((np.shape(z2)[0], 1)), z2])
    
    delta2 =  sigma3.T.dot(a2)
    delta1 =  sigma2[:, 1:].T.dot(a1)
    
    # Part 3: Implement regularization with the cost function and gradients.
    #
    #         Hint: You can implement this around the code for
    #               backpropagation. That is, you can compute the gradients for
    #               the regularization separately and then add them to Theta1_grad
    #               and Theta2_grad from Part 2.
    #
    
    # we dont regularize bias
    J = J + (reg_lambda/(2.0 * m)) * np.sum(np.sum(Theta1[:,1:] * Theta1[:,1:]))
    J = J + (reg_lambda/(2.0 * m)) * np.sum(np.sum(Theta2[:,1:] * Theta2[:,1:]))
    
    # calculate penalties (we dont regularize bias)
    p1 = (reg_lambda/m) * np.c_[np.zeros((np.shape(Theta1)[0], 1)), Theta1[:,1:]]
    p2 = (reg_lambda/m) * np.c_[np.zeros((np.shape(Theta2)[0], 1)), Theta2[:,1:]]
    
    Theta1_grad = delta1/m + p1
    Theta2_grad = delta2/m + p2
    
    # Unroll gradients
    grad = np.r_[Theta1_grad.ravel(), Theta2_grad.ravel()]

    
    if (returnOnlyGrad):
        if (flattenResult):
            return grad.flatten()
        return grad
    
    if (returnOnlyCost):
        return J
    
    return (J, grad)
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size,
                   num_labels, X, y, lamda):
    # Reshape nn_params back into the parameters Theta1 and Theta2,
    # the weight matrices for our 2 layer neural network
    Theta1 = nn_params[:hidden_layer_size * (input_layer_size + 1)].reshape(
        (hidden_layer_size, input_layer_size + 1), order='F')
    Theta2 = nn_params[hidden_layer_size * (input_layer_size + 1):].reshape(
        (num_labels, hidden_layer_size + 1), order='F')

    m = X.shape[0]
    J = 0
    Theta1_grad = np.zeros(Theta1.shape)
    Theta2_grad = np.zeros(Theta2.shape)

    X = np.hstack((np.ones((m, 1)), X))
    yv = np.zeros((m, num_labels))
    for i in range(m):
        yv[i, y[i][0] - 1] = 1

    # Part 1: Feedforward the neural network and return the cost in the
    #         variable J. After implementing Part 1, you can verify that your
    #         cost function computation is correct by verifying the cost
    #         computed in ex4.py
    a1 = X
    a2 = np.hstack((np.ones((m, 1)), sigmoid(a1.dot(Theta1.T))))
    a3 = sigmoid(a2.dot(Theta2.T))

    for i in range(m):
        J += (-yv[i, :] * np.log(a3[i, :]) -
              (1 - yv[i, :]) * np.log(1 - a3[i, :])).sum()
    J /= m
    J += ((Theta1[:, 1:] ** 2).sum() +
          (Theta2[:, 1:] ** 2).sum()) * lamda / 2 / m

    # Part 2: Implement the backpropagation algorithm to compute the gradients
    #         Theta1_grad and Theta2_grad. You should return the partial derivatives of
    #         the cost function with respect to Theta1 and Theta2 in Theta1_grad and
    #         Theta2_grad, respectively. After implementing Part 2, you can check
    #         that your implementation is correct by running checkNNGradients
    #
    #         Note: The vector y passed into the function is a vector of labels
    #               containing values from 1..K. You need to map this vector into a
    #               binary vector of 1's and 0's to be used with the neural network
    #               cost function.
    #
    #         Hint: We recommend implementing backpropagation using a for-loop
    #               over the training examples if you are implementing it for the
    #               first time.
    for i in range(m):
        a1 = X[i:i + 1, :].T
        z2 = Theta1.dot(a1)
        a2 = np.vstack(([1], sigmoid(z2)))
        z3 = Theta2.dot(a2)
        a3 = sigmoid(z3)

        delta3 = a3 - yv[i:i + 1, :].T
        delta2 = Theta2.T.dot(delta3) * np.vstack(([1], sigmoidGradient(z2)))

        Theta1_grad += delta2[1:, :].dot(a1.T)
        Theta2_grad += delta3.dot(a2.T)

    Theta1_grad /= m
    Theta2_grad /= m

    # Part 3: Implement regularization with the cost function and gradients.
    #
    #         Hint: You can implement this around the code for
    #               backpropagation. That is, you can compute the gradients for
    #               the regularization separately and then add them to Theta1_grad
    #               and Theta2_grad from Part 2.
    Theta1_grad[:, 1:] += lamda / m * Theta1[:, 1:]
    Theta2_grad[:, 1:] += lamda / m * Theta2[:, 1:]

    # Unroll gradients
    grad = np.hstack((Theta1_grad.T.ravel(), Theta2_grad.T.ravel()))

    return J, grad
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels,
                   X, y, xlambda):
    # Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices for our 2 layer neural network
    Theta1 = nn_params[:hidden_layer_size * (input_layer_size + 1)].reshape(
        hidden_layer_size, input_layer_size + 1)
    Theta2 = nn_params[hidden_layer_size * (input_layer_size + 1):].reshape(
        num_labels, hidden_layer_size + 1)
    m, n = np.shape(X)

    # initial output
    J = 0
    Theta1_grad = np.zeros(np.shape(Theta1))
    Theta2_grad = np.zeros(np.shape(Theta2))

    ## ==========================  Part 1:  ===========================
    # Feedforward the neural network and return the cost in the variable J without regularization
    ylabel = np.zeros([num_labels,
                       m])  # Transform y from a 1-d vector into a 10-d matrix
    for i in range(0, m):
        ylabel[int(y[i] - 1.0), i] = 1

    # process of FP
    X = np.c_[np.ones(m).reshape(m, 1), X]
    z2 = np.dot(X, Theta1.T)
    a2 = sigmoid.sigmoid(z2)
    z3 = np.c_[np.ones(m).reshape(m, 1), a2]
    z3 = np.dot(z3, Theta2.T)
    a3 = sigmoid.sigmoid(z3)  # a3 is the h(x), it is the output layer.

    for i in range(0, m):
        J = J - (np.dot(np.log(a3[i,:].reshape(1,num_labels)),ylabel[:,i].reshape(num_labels,1))+\
            np.dot(np.log(1.0-a3[i,:].reshape(1,num_labels)),(1.0-ylabel[:,i].reshape(num_labels,1))))
    J = J / m

    ## ==================== Part 2: Compute the gradients ======================
    Delta1, Delta2 = np.zeros(np.shape(Theta1)), np.zeros(np.shape(Theta2))

    # BP
    for t in range(0, m):
        delta3 = (a3[t, :] - ylabel[:, t]).reshape(
            num_labels, 1)  # the error between y and a3
        delta2 = np.dot(Theta2.T, delta3) * sigmoidGradient.sigmoidGradient(
            np.c_[np.ones(m).reshape(m, 1), a2][t, :].reshape(
                hidden_layer_size + 1,
                1))  # the error generated in the hidden layer

        Delta1 = Delta1 + np.dot(delta2[1:], X[t, :].reshape(1, n + 1))
        Delta2 = Delta2 + np.dot(
            delta3, np.c_[np.ones(m).reshape(m, 1), a2][t, :].reshape(
                1, hidden_layer_size + 1))

    Theta1_grad = Delta1 / m
    Theta2_grad = Delta2 / m

    m1, n1 = np.shape(Theta1_grad)
    m2, n2 = np.shape(Theta2_grad)

    grad = np.r_[(Theta1_grad.ravel().reshape(m1 * n1, 1),
                  Theta2_grad.ravel().reshape(m2 * n2, 1))]

    return J, grad
Beispiel #39
0
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lmbda):
    # Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices
    # for our 2 layer neural network
    if nn_params.shape[0] != 1:
        nn_params = nn_params.reshape((1, nn_params.shape[0]))

    Theta1 = nn_params[:, :(hidden_layer_size * (input_layer_size + 1))
                       ].reshape((hidden_layer_size, input_layer_size + 1))
    Theta2 = nn_params[:, hidden_layer_size *
                       (input_layer_size + 1):].reshape((num_labels, hidden_layer_size + 1))

    # Setup some useful variables
    m = X.shape[0]

    # You need to return the following variables correctly
    J = 0
    Theta1_grad = np.zeros(Theta1.shape)
    Theta2_grad = np.zeros(Theta2.shape)

    # ====================== YOUR CODE HERE ======================
    new_labels = np.zeros((y.shape[0], num_labels))

    for i in range(m):
        new_labels[i, int(y[i]) - 1] = 1

    X = np.hstack((np.ones((m, 1)), X))
    a_2 = sigmoid(X.dot(Theta1.T))
    a_2 = np.hstack((np.ones((m, 1)), a_2))
    a_3 = sigmoid(a_2.dot(Theta2.T))

    J = np.sum(np.multiply(-new_labels, np.nan_to_num(np.log(a_3))) -
               np.multiply(1 - new_labels, np.nan_to_num(np.log(1 - a_3)))) / m

    t1 = Theta1[:, 1:]
    t2 = Theta2[:, 1:]
    J += (lmbda * (np.sum(np.power(t1, 2)) + np.sum(np.power(t2, 2)))) / (2 * m)

    for t in range(m):
        a_1 = X[t, :]
        z_2 = a_1.dot(Theta1.T)
        a_2 = sigmoid(z_2)
        a_2 = np.matrix(np.append([1], a_2))
        z_3 = a_2.dot(Theta2.T)
        a_3 = sigmoid(z_3)

        delta_3 = a_3 - new_labels[t, :]
        delta_2 = np.multiply(delta_3.dot(Theta2[:, 1:]), sigmoidGradient(z_2))

        Theta1_grad += delta_2.T.dot(a_1)
        Theta2_grad += delta_3.T.dot(a_2)

    Theta1_grad /= m
    Theta2_grad /= m

    Theta1_grad[:, 1:] += (lmbda * Theta1[:, 1:]) / m
    Theta2_grad[:, 1:] += (lmbda * Theta2[:, 1:]) / m

    # ============================================================

    # Unroll gradients
    grad = np.hstack((Theta1_grad.flatten(), Theta2_grad.flatten()))

    return J, grad
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, Lambda):

    """computes the cost and gradient of the neural network. The
  parameters for the neural network are "unrolled" into the vector
  nn_params and need to be converted back into the weight matrices.

  The returned parameter grad should be a "unrolled" vector of the
  partial derivatives of the neural network.
    """
    input_layer_size=int(input_layer_size)
    hidden_layer_size=int(hidden_layer_size)
    num_labels=int(num_labels)
# Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices
# for our 2 layer neural network
# Obtain Theta1 and Theta2 back from nn_params
    Theta1 = np.reshape(nn_params[:hidden_layer_size * (input_layer_size + 1)],
                       (hidden_layer_size, input_layer_size + 1), order='F').copy()

    Theta2 = np.reshape(nn_params[hidden_layer_size * (input_layer_size + 1):],
                       (num_labels, (hidden_layer_size + 1)), order='F').copy()



# Setup some useful variables
    m, _ = X.shape


# ====================== YOUR CODE HERE ======================
# Instructions: You should complete the code by working through the
#               following parts.
#
# Part 1: Feedforward the neural network and return the cost in the
#         variable J. After implementing Part 1, you can verify that your
#         cost function computation is correct by verifying the cost
#         computed in ex4.m
    a1=np.column_stack((np.ones((m, 1)), X))
    #z2 = np.dot(Theta1, a1.T).T
    z2 = np.dot(a1,Theta1.T)
    a2 = np.column_stack((np.ones((m, 1)),sigmoid(z2)))
    z3 = np.dot(a2, Theta2.T)
    a3 = sigmoid(z3)
    
    nn_hx = a3.ravel(order='F')
    #nn_y = np.repeat(y, num_labels)
    nn_y=np.zeros(0)
    for k in range(num_labels):
        nn_y=np.append(nn_y,np.asarray([1 if i==(k+1) else 0 for i in y]))
    MatY=np.reshape(nn_y, (num_labels,m)).T
    
    
    #first = -np.dot(MatY.ravel(), np.log(nn_hx))
    #second = -np.dot((1-MatY.ravel()), np.log(1-nn_hx))
    first = -np.dot(nn_y, np.log(nn_hx))
    second = -np.dot((1-nn_y), np.log(1-nn_hx))
    #first = -np.dot(MatY, np.log(a3))
    #second = -np.dot((1-MatY), np.log(1-a3))
    reg = (Theta1[:,1:]**2).sum() + (Theta2[:,1:]**2).sum()
    J=(first+second)/m + reg*Lambda/(2.*m)
      
    #J = ((-MatY * np.log(a3) - (1-MatY) * np.log(1-a3))/m).sum()

#
# Part 2: Implement the backpropagation algorithm to compute the gradients
#         Theta1_grad and Theta2_grad. You should return the partial derivatives of
#         the cost function with respect to Theta1 and Theta2 in Theta1_grad and
#         Theta2_grad, respectively. After implementing Part 2, you can check
#         that your implementation is correct by running checkNNGradients
#
#         Note: The vector y passed into the function is a vector of labels
#               containing values from 1..K. You need to map this vector into a 
#               binary vector of 1's and 0's to be used with the neural network
#               cost function.
#
#         Hint: We recommend implementing backpropagation using a for-loop
#               over the training examples if you are implementing it for the 
#               first time.
#

    delta3 = a3 - MatY

    delta2 = np.dot(delta3,Theta2)[:,1:] * sigmoidGradient(z2)
    Theta1[:,0]=np.zeros(Theta1.shape[0])
    Theta2[:,0]=np.zeros(Theta2.shape[0])
    Theta1_grad = np.dot(delta2.T,a1)/m + Lambda * Theta1/m
    Theta2_grad = np.dot(delta3.T,a2)/m + Lambda * Theta2/m
# Part 3: Implement regularization with the cost function and gradients.
#
#         Hint: You can implement this around the code for
#               backpropagation. That is, you can compute the gradients for
#               the regularization separately and then add them to Theta1_grad
#               and Theta2_grad from Part 2.
#



    # -------------------------------------------------------------

    # =========================================================================
    
    #Unroll gradient
    Ngrad = np.hstack((Theta1_grad.T.ravel(), Theta2_grad.T.ravel()))

    return J, Ngrad
Beispiel #41
0
# Weight regularization parameter (we set this to 1 here).
_lambda = 1

J = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, _lambda)

print('Cost at parameters (loaded from ex4weights): (this value should be about 0.383770)', J)


#% ================ Part 5: Sigmoid Gradient  ================
#  Before you start implementing the neural network, you will first
#  implement the gradient for the sigmoid function. You should complete the
#  code in the sigmoidGradient.m file.
#

g = sigmoidGradient(np.array([1, -0.5, 0, 0.5, 1]))
print('Sigmoid gradient evaluated at [1 -0.5 0 0.5 1]\n ', g)



#% ================ Part 6: Initializing Pameters ================
#  In this part of the exercise, you will be starting to implment a two
#  layer neural network that classifies digits. You will start by
#  implementing a function to initialize the weights of the neural network
#  (randInitializeWeights.m)


initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size)
initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels)

# Unroll parameters
Beispiel #42
0
def backwards(nn_weights, layers, X, y, num_labels, lambd):
    # Computes the gradient fo the neural network.
    # nn_weights: Neural network parameters (vector)
    # layers: a list with the number of units per layer.
    # X: a matrix where every row is a training example for a handwritten digit image
    # y: a vector with the labels of each instance
    # num_labels: the number of units in the output layer
    # lambd: regularization factor
    
    # Setup some useful variables
    m = X.shape[0]
    num_layers = len(layers)

    # Roll Params
    # The parameters for the neural network are "unrolled" into the vector
    # nn_params and need to be converted back into the weight matrices.
    Theta = roll_params(nn_weights, layers)
  
    # You need to return the following variables correctly 
    Theta_grad = [zeros(w.shape) for w in Theta]

    # ================================ DONE ================================
    # The vector y passed into the function is a vector of labels
    # containing values from 1..K. You need to map this vector into a
    # binary vector of 1's and 0's to be used with the neural network
    # cost function.
    yv = zeros((m, num_labels))
    for i in range(m):
        yv[i][y[i]] += 1

    # ================================ DONE ================================
    # In this point implement the backpropagation algorithm

    # In this point calculate the cost of the neural network (feedforward)

    # Step 1: Initialization of useful variables

    # Z and A will store the hidden states of the network, as lists of matrices, of size num_layers
    A = [addColumnOne(X)]
    Z = [addColumnOne(X)]

    # delta will store the delta for each layer from the last to the second layer (in reverse order)
    delta = []

    # Step 2: Feedforward
    for i in range(num_layers-1):
        h = A[i].dot(Theta[i].T)
        Z.append(h)
        h = addColumnOne(sigmoid(h))
        A.append(h)


    # Step 3: Backpropagation
    d = removeFirstColumn(A[-1]) - yv
    delta.append(d)

    for i in range(num_layers-2, 0, -1):
        d = removeFirstColumn(d.dot(Theta[i])) * sigmoidGradient(Z[i])
        delta.append(d)

    delta.reverse()
    # delta is of size num_layers-1 (no delta for the input layer)

    for i in range(num_layers-1):
        Theta_grad[i] += delta[i].T.dot(A[i])
        # DONE: no regularization on the bias weights !!
        Theta_grad[i] += lambd * Theta[i]
        for j in range(Theta[i].shape[0]):
            Theta_grad[i][j, 0] -= lambd * Theta[i][j, 0]
        Theta_grad[i] /= m

    # Unroll Params
    Theta_grad = unroll_params(Theta_grad)

    return Theta_grad