Example #1
0
def compute_da2_dz2(a2):
    '''
        Compute local gradient of the softmax activations a2 w.r.t. the logits z2 in the 2nd layer.
        Input:
            a2: the activation values of softmax function, a numpy float vector of shape c by 1. Here c is the number of classes.
        Output:
            da2_dz2: the local gradient of the activations a2 w.r.t. the logits z2, a float numpy matrix of shape (c by c). 
                   The (i,j)-th element represents the partial gradient ( d_a2[i]  / d_z2[j] )
    '''
    da2_dz2 = sr.compute_da_dz(a2)
    return da2_dz2
Example #2
0
def compute_da1_dz1(a1):
    '''
        Compute local gradient of the sigmoid activations a1 w.r.t. the logits z1 in the first layer.
        Input:
            a1: the activations of sigmoid function, a numpy float vector of shape h by 1. 
            a1: the non-linear activations in the 1st layer.
        Output:
            da1_dz1: the local gradient of the activations a1 w.r.t. the logits z1, a float numpy vector of shape h by 1. 
                   The i-th element of da1_dz1 represents the partial gradient ( d_a1[i]  / d_z1[i] )
        Hint: you could solve this problem using 1 line of code.
    '''
    #########################################
    ## INSERT YOUR CODE HERE
    da1_dz1 = np.asmatrix(np.diagonal(sr.compute_da_dz(a1))).T
    #########################################
    return da1_dz1