コード例 #1
0
def compute_gradients(dL_da2, da2_dz2, dz2_dW2, dz2_db2, dz2_da1, da1_dz1,
                      dz1_dW1, dz1_db1):
    '''
       Given the local gradients, compute the gradient of the loss function L w.r.t. model parameters: the weights W1, W2 and biases b1 and b2.
        Input: see details in the above functions.
        Output:
            dL_dW2: the gradient of the loss function L w.r.t. the weight matrix W2
            dL_db2: the gradient of the loss function L w.r.t. the biases b2
            dL_dW1: the gradient of the loss function L w.r.t. the weight matrix W1 
            dL_db1: the gradient of the loss function L w.r.t. the biases b1
        Hint: you could re-use the functions in problem2, such as sr.compute_dL_dz(...) 
    '''

    #########################################
    ## INSERT YOUR CODE HERE

    # the 2nd layer
    dL_dW2 = sr.compute_dL_dW(sr.compute_dL_dz(dL_da2, da2_dz2), dz2_dW2)
    dL_db2 = sr.compute_dL_db(sr.compute_dL_dz(dL_da2, da2_dz2), dz2_db2)

    # the 1st layer
    dL_dW1 = np.multiply(np.multiply(dz1_dW1, da1_dz1),
                         dz2_da1.T * sr.compute_dL_dz(dL_da2, da2_dz2))
    dL_db1 = np.multiply(np.multiply(dz1_db1, da1_dz1),
                         dz2_da1.T * sr.compute_dL_dz(dL_da2, da2_dz2))
    #########################################

    return dL_dW2, dL_db2, dL_dW1, dL_db1
コード例 #2
0
def compute_gradients(dL_da2, da2_dz2, dz2_dW2, dz2_db2, dz2_da1, da1_dz1, dz1_dW1, dz1_db1):
    '''
       Given the local gradients, compute the gradient of the loss function L w.r.t. model parameters: the weights W1, W2 and biases b1 and b2.
        Input: see details in the above functions.
        Output:
            dL_dW2: the gradient of the loss function L w.r.t. the weight matrix W2
            dL_db2: the gradient of the loss function L w.r.t. the biases b2
            dL_dW1: the gradient of the loss function L w.r.t. the weight matrix W1 
            dL_db1: the gradient of the loss function L w.r.t. the biases b1
        Hint: you could re-use the functions in problem2, such as sr.compute_dL_dz(...) 
    '''
    
    #########################################
    ## INSERT YOUR CODE HERE

    # the 2nd layer
    dL_dW2 = sr.compute_dL_dW(sr.compute_dL_dz(dL_da2, da2_dz2), dz2_dW2)
    dL_db2 = sr.compute_dL_db(sr.compute_dL_dz(dL_da2, da2_dz2), dz2_db2)

    # the 1st layer
    import problem2 as lr
    dL_dW1 = np.asmatrix(np.zeros(dz1_dW1.shape))
    dL_db1 = np.asmatrix(np.zeros((dz1_dW1.shape[0], 1)))

    dL_da1 = dL_da2.T * da2_dz2 * dz2_da1
    for i in range(dz1_dW1.shape[0]):
        dL_dW1[:, i] = lr.compute_dL_dw(dL_da1.T[i, 0], da1_dz1[i, 0], dz1_dW1[i, 0])
        dL_db1[i, 0] = lr.compute_dL_db(dL_da1.T[i, 0], da1_dz1[i, 0], dz1_db1[i, 0])

    # another solution
    # dL_dW1 = np.multiply(dL_da2.T * da2_dz2 * dz2_da1 * da1_dz1, dz1_dW1)
    # dL_db1 = np.multiply(dL_da2.T * da2_dz2 * dz2_da1 * da1_dz1, dz1_db1)

    #########################################

    return dL_dW2, dL_db2, dL_dW1, dL_db1
コード例 #3
0
ファイル: NN.py プロジェクト: jinalj07/Logistic-Regression
def compute_gradients(dL_da2, da2_dz2, dz2_dW2, dz2_db2, dz2_da1, da1_dz1, dz1_dW1, dz1_db1):
    '''
       Given the local gradients, computing the gradient of the loss function L w.r.t. model parameters: the weights W1, W2 and biases b1 and b2.
        Input: see details in the above functions.
        Output:
            dL_dW2: the gradient of the loss function L w.r.t. the weight matrix W2
            dL_db2: the gradient of the loss function L w.r.t. the biases b2
            dL_dW1: the gradient of the loss function L w.r.t. the weight matrix W1 
            dL_db1: the gradient of the loss function L w.r.t. the biases b1
    '''
    # the 2nd layer 
    dL_dz2 = sr.compute_dL_dz(dL_da2,da2_dz2)
    dL_dW2 = sr.compute_dL_dW(dL_dz2,dz2_dW2)
    dL_db2 = sr.compute_dL_db(dL_dz2,dz2_db2)

    # the 1st layer 
    dL_da1 = compute_dL_da1(dL_dz2,dz2_da1)
    dL_dz1 = compute_dL_dz1(dL_da1,da1_dz1)
    dL_dW1 = sr.compute_dL_dW(dL_dz1,dz1_dW1)
    dL_db1 = sr.compute_dL_db(dL_dz1,dz1_db1)

    return dL_dW2, dL_db2, dL_dW1, dL_db1