def gradient_check_n(parameters, gradients, X, Y, epsilon = 1e-7):
    """
    Checks if backward_propagation_n computes correctly the gradient of the cost output by forward_propagation_n
    
    Arguments:
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3":
    grad -- output of backward_propagation_n, contains gradients of the cost with respect to the parameters. 
    x -- input datapoint, of shape (input size, 1)
    y -- true "label"
    epsilon -- tiny shift to the input to compute approximated gradient with formula(1)
    
    Returns:
    difference -- difference (2) between the approximated gradient and the backward propagation gradient
    """
    
    # Set-up variables
    parameters_values, _ = dictionary_to_vector(parameters)
    grad = gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))
    
    # Compute gradapprox
    for i in range(num_parameters):
        
        # Compute J_plus[i]. Inputs: "parameters_values, epsilon". Output = "J_plus[i]".
        # "_" is used because the function you have to outputs two parameters but we only care about the first one
        ### START CODE HERE ### (approx. 3 lines)
        thetaplus = np.copy(parameters_values)                                        # Step 1
        thetaplus[i][0] = thetaplus[i] + epsilon                                      # Step 2
        J_plus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaplus))   # Step 3
        ### END CODE HERE ###
        
        # Compute J_minus[i]. Inputs: "parameters_values, epsilon". Output = "J_minus[i]".
        ### START CODE HERE ### (approx. 3 lines)
        thetaminus = np.copy(parameters_values)                                       # Step 1
        thetaminus[i][0] = thetaminus[i] - epsilon                                     # Step 2        
        J_minus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaminus)) # Step 3
        ### END CODE HERE ###
        
        # Compute gradapprox[i]
        ### START CODE HERE ### (approx. 1 line)
        gradapprox[i] = (J_plus[i] - J_minus[i]) /(2 * epsilon)
        ### END CODE HERE ###
    
    # Compare gradapprox to backward propagation gradients by computing difference.
    ### START CODE HERE ### (approx. 1 line)
    numerator = np.linalg.norm(grad - gradapprox)                    # Step 1'
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)  # Step 2'
    difference = numerator / denominator                             # Step 3'

    ### END CODE HERE ###

    if difference > 2e-7:
        print ("\033[93m" + "There is a mistake in the backward propagation! difference = " + str(difference) + "\033[0m")
    else:
        print ("\033[92m" + "Your backward propagation works perfectly fine! difference = " + str(difference) + "\033[0m")
    
    return difference
Ejemplo n.º 2
0
def gradient_check_n(parameters, gradients, X, Y, epsilon = 1e-7):
    """
    Checks if backward_propagation_n computes correctly the gradient of the cost output by forward_propagation_n
    
    Arguments:
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3":
    grad -- output of backward_propagation_n, contains gradients of the cost with respect to the parameters. 
    x -- input datapoint, of shape (input size, 1)
    y -- true "label"
    epsilon -- tiny shift to the input to compute approximated gradient with formula(1)
    
    Returns:
    difference -- difference (2) between the approximated gradient and the backward propagation gradient
    """
    
    # Set-up variables
    parameters_values, _ = dictionary_to_vector(parameters)
    grad = gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))
    e = epsilon
    
    # Compute gradapprox
    for i in range(num_parameters):
        
        # Compute J_plus[i]. Inputs: "parameters_values, epsilon". Output = "J_plus[i]".
        # "_" is used because the function you have to outputs two parameters but we only care about the first one
        ### START CODE HERE ### (approx. 3 lines)
        thetaplus = np.copy(parameters_values)                        # Step 1
        thetaplus[i][0] = thetaplus[i][0] + e                         # Step 2
        J_plus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaplus))         # Step 3
        ### END CODE HERE ###
        
        # Compute J_minus[i]. Inputs: "parameters_values, epsilon". Output = "J_minus[i]".
        ### START CODE HERE ### (approx. 3 lines)
        thetaminus = np.copy(parameters_values)                       # Step 1
        thetaminus[i][0] = thetaminus[i][0] - e                       # Step 2        
        J_minus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaminus))       # Step 3
        ### END CODE HERE ###
        
        # Compute gradapprox[i]
        ### START CODE HERE ### (approx. 1 line)
        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2*e)
        ### END CODE HERE ###
    
    # Compare gradapprox to backward propagation gradients by computing difference.
    ### START CODE HERE ### (approx. 1 line)
    numerator = np.linalg.norm(gradapprox - grad)
    denominator = np.linalg.norm(gradapprox) + np.linalg.norm(grad)
    difference = numerator / denominator                                          # Step 3'
    ### END CODE HERE ###

    if difference > 2e-7:
        print ("\033[93m" + "There is a mistake in the backward propagation! difference = " + str(difference) + "\033[0m")
    else:
        print ("\033[92m" + "Your backward propagation works perfectly fine! difference = " + str(difference) + "\033[0m")
    
    return difference
Ejemplo n.º 3
0
def gradient_check_n(parameters,gradients,X,Y,epsilon=1e-7):

    parameters_values, keys = gc_utils.dictionary_to_vector(parameters)
    grad = gc_utils.gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))

    for i in range(num_parameters):
        # 计算J_plus[i]
        theta_plus = np.copy(parameters_values)
        theta_plus[i][0] = theta_plus[i][0] + epsilon
        J_plus[i], cache = forward_propagation_n(X, Y, gc_utils.vector_to_dictionary(theta_plus))

        # 计算J_minus[i]
        theta_minus = np.copy(parameters_values)
        theta_minus[i][0] = theta_minus[i][0] - epsilon
        J_minus[i], cache = forward_propagation_n(X, Y, gc_utils.vector_to_dictionary(theta_minus))

        # 计算gradapprox[i]
        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)

    numerator = np.linalg.norm(grad - gradapprox)
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)
    difference = numerator / denominator

    if difference < 1e-7:
        print("梯度检查:梯度正常")
    else:
        print("梯度检测:梯度超出阈值")

    return difference
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7):
    parameters_values, _ = dictionary_to_vector(parameters)
    grad = gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))

    for i in range(num_parameters):
        thetaplus = np.copy(parameters_values)
        thetaplus[i][0] = thetaplus[i][0] + epsilon
        J_plus[i], _ = forward_propagation_n(X, Y,
                                             vector_to_dictionary(thetaplus))

        thetaminus = np.copy(parameters_values)
        thetaminus[i][0] = thetaplus[i][0] - epsilon
        J_minus[i], _ = forward_propagation_n(X, Y,
                                              vector_to_dictionary(thetaminus))

        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)

    difference = np.linalg.norm(grad - gradapprox) / (
        np.linalg.norm(grad) + np.linalg.norm(gradapprox))

    if difference > 1e-7:
        print("There is a mistake in the backward propagation! difference = " +
              str(difference))
    else:
        print("Your backward propagation works perfectly fine! difference = " +
              str(difference))

    return difference
Ejemplo n.º 5
0
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7):
    parameters_values, _ = dictionary_to_vector(parameters)
    grad = gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))

    for i in range(num_parameters):
        thetaplus = np.copy(parameters_values)  # Step 1
        thetaplus[i][0] = thetaplus[i][0] + epsilon  # Step 2
        J_plus[i], _ = forward_propagation_n(
            X, Y, vector_to_dictionary(thetaplus))  # Step 3
        thetaminus = np.copy(parameters_values)  # Step 1
        thetaminus[i][0] = thetaminus[i][0] - epsilon  # Step 2
        J_minus[i], _ = forward_propagation_n(
            X, Y, vector_to_dictionary(thetaminus))  # Step 3

        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)

    numerator = np.linalg.norm(gradapprox - grad)  # Step 1'
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)  # Step 2'
    difference = numerator / denominator  # Step 3'

    if difference > 1e-7:
        print("\033[93m" +
              "There is a mistake in the backward propagation! difference = " +
              str(difference) + "\033[0m")
    else:
        print("\033[92m" +
              "Your backward propagation works perfectly fine! difference = " +
              str(difference) + "\033[0m")

    return difference
Ejemplo n.º 6
0
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7):
    parameters_values, _ = dictionary_to_vector(parameters)
    grad = gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))

    for i in range(num_parameters):
        thetaplus = np.copy(parameters_values)
        thetaplus[i][0] = thetaplus[i][0] + epsilon
        J_plus[i], _ = forward_propagation_n(X, Y,
                                             vector_to_dictionary(thetaplus))

        thetaminus = np.copy(parameters_values)
        thetaminus[i][0] = thetaminus[i][0] - epsilon
        J_minus[i], _ = forward_propagation_n(X, Y,
                                              vector_to_dictionary(thetaminus))
        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)
    numerator = np.linalg.norm(grad - gradapprox)
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)
    difference = numerator / denominator

    if difference > 2e-7:
        print("\033[93m" + "反向传播有问题! difference = " + str(difference) +
              "\033[0m")
    else:
        print("\033[92m" + "反向传播很完美! difference = " + str(difference) +
              "\033[0m")
    return difference
Ejemplo n.º 7
0
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7):
    """
    Checks if backward_propagation_n computes correctly the gradient of the cost output by forward_propagation_n
    
    Arguments:
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3":
    grad -- output of backward_propagation_n, contains gradients of the cost with respect to the parameters. 
    x -- input datapoint, of shape (input size, 1)
    y -- true "label"
    epsilon -- tiny shift to the input to compute approximated gradient.
    
    Returns:
    difference -- difference (2) between the approximated gradient and the backward propagation gradient.
    """

    # Sets up variables
    parameters_values, _ = dictionary_to_vector(parameters)
    grad = gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))

    # Computes gradapprox
    for i in range(num_parameters):

        # Computes J_plus[i].
        thetaplus = np.copy(parameters_values)
        thetaplus[i][0] = thetaplus[i][0] + epsilon
        J_plus[i], _ = forward_propagation_n(X, Y,
                                             vector_to_dictionary(thetaplus))

        # Computes J_minus[i].
        thetaminus = np.copy(parameters_values)
        thetaminus[i][0] = thetaminus[i][0] - epsilon
        J_minus[i], _ = forward_propagation_n(X, Y,
                                              vector_to_dictionary(thetaminus))

        # Computes gradapprox[i]
        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)

    # Compares gradapprox to backward propagation gradients by computing difference.
    numerator = np.linalg.norm(grad - gradapprox)
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)
    difference = numerator / denominator

    if difference > 2e-7:
        print("\033[93m" +
              "There is a mistake in the backward propagation! difference = " +
              str(difference) + "\033[0m")
    else:
        print("\033[92m" +
              "Your backward propagation works perfectly fine! difference = " +
              str(difference) + "\033[0m")

    return difference
Ejemplo n.º 8
0
def gradient_check_n(parameters,gradients,X,Y,epsilon=1e-7):
    """
    检查backward_propagation_n是否正确计算forward_propagation_n输出的成本梯度
    
    参数:
        parameters - 包含参数“W1”,“b1”,"W2","b2","W3","b3"的python字典
        grentients - grad_output_propagation_n的输出 包含与参数相关的成本梯度
        x - 输入数据点,维度为(输入节点数量,1)\
        y - 标签
        epsilon - 计算输入的微小偏移以计算近似梯度
        
    返回:
        difference - 近似梯度和后向传播梯度之间的差异
        
    """
    
    #初始化参数
    parameters_values,keys = gc_utils.dictionary_to_vector(parameters) #keys用不到
    print("parameters"+str(parameters))
    print("parameters_values"+str(parameters_values))
    grad = gc_utils.gradients_to_vector(gradients)
  
    print("gradients"+str(gradients))
    print("grad"+str(grad))
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters,1))
    J_minus = np.zeros((num_parameters,1))
    gradapprox = np.zeros((num_parameters,1))
    
    #计算gradapprox
    for i in range(num_parameters):
        #计算J——plus[i],输入:"parameters_values,epsilon",输出"J_plus[i]"
        thetaplus = np.copy(parameters_values)                                                 #strp1
        thetaplus[i][0] = thetaplus[i][0] + epsilon                                            #step2
        J_plus[i],cache = forward_propagation_n(X,Y,gc_utils.vector_to_dictionary(thetaplus))  #step3 cache用不到
        
        #计算J_minus[i].输入"parameters_values,epsilon",输出"J_minus[i]"
        thetaminus = np.copy(parameters_values)
        thetaminus[i][0] = thetaminus[i][0] - epsilon
        J_minus[i],cache = forward_propagation_n(X,Y,gc_utils.vector_to_dictionary(thetaminus))
        
        #计算gradapprox[i]
        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)
        
    #通过计算差异比较gradapprox和后向传播梯度
    numerator = np.linalg.norm(grad - gradapprox)
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)
    difference = numerator / denominator
    
    if difference < 1e-7:
        print("梯度检查:梯度正常!")
    else:
        print("梯度检查:梯度超出阈值!")
    
    
    return difference
Ejemplo n.º 9
0
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7):
    """
    Checks if backward_propagation_n computes correctly the gradient of the cost output by forward_propagation_n

    Arguments:
        parameters -- python dictionary containing your parameters
        grad -- output of backward_propagation_n, contains gradients of the cost with respect to the parameter s.
        X -- input datapoint, of shape (input size, 1)
        Y -- true "label"
        epsilon -- tiny shift to the input to compute approximated gradient

    Returns:
        difference -- difference between approximated gradient and the backward propagation gradient
    """
    # Set up variables
    parameters_values, _ = dictionary_to_vector(parameters)
    grad = gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))

    # compute gradapprox
    for i in range(num_parameters):
        # Compute J_plus[i]. Inputs: "parameters_values, epsilon".  Output: "J_plus[i]"
        # "_" is used because the function you have to outputs two parameters but we only care about the first one
        thetaplus = np.copy(parameters_values)
        thetaplus[i][0] += epsilon
        J_plus[i], _ = forward_propagation_n(X, Y,
                                             vector_to_dictionary(thetaplus))

        # Compute J_minus[i]. Inputs: "parameters_values, epsilon".     Output: "J_minus[i]".
        thetaminus = np.copy(parameters_values)
        thetaminus[i][0] -= epsilon
        J_minus[i], _ = forward_propagation_n(X, Y,
                                              vector_to_dictionary(thetaminus))

        # Compute gradapprox[i]
        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)

    # Compare gradapprox to backward propagation gradients by computing difference.
    numerator = np.linalg.norm(gradapprox - grad)
    denominator = np.linalg.norm(gradapprox) + np.linalg.norm(grad)
    difference = numerator / denominator

    if difference > 1.2e-7:
        print("\033[93m" +
              "There is a mistake in the backward propagation! difference = " +
              str(difference) + "\033[0m")
    else:
        print("\033[92m" +
              "Your backward propagation works perfectly fine! difference = " +
              str(difference) + "\033[0m")
    return difference
Ejemplo n.º 10
0
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7):
    """
    检查backward_propagation_n是否正确计算forward_propagation_n输出的成本梯度

    参数:
        parameters - 包含参数“W1”,“b1”,“W2”,“b2”,“W3”,“b3”的python字典:
        grad_output_propagation_n的输出包含与参数相关的成本梯度。
        x  - 输入数据点,维度为(输入节点数量,1)
        y  - 标签
        epsilon  - 计算输入的微小偏移以计算近似梯度

    返回:
        difference - 近似梯度和后向传播梯度之间的差异
    """
    # 初始化参数
    parameters_values, keys = gc_utils.dictionary_to_vector(
        parameters)  # keys用不到
    grad = gc_utils.gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))

    # 计算gradapprox
    for i in range(num_parameters):
        # 计算J_plus [i]。输入:“parameters_values,epsilon”。输出=“J_plus [i]”
        thetaplus = np.copy(parameters_values)  # Step 1
        thetaplus[i][0] = thetaplus[i][0] + epsilon  # Step 2
        J_plus[i], cache = forward_propagation_n(
            X, Y, gc_utils.vector_to_dictionary(thetaplus))  # Step 3 ,cache用不到

        # 计算J_minus [i]。输入:“parameters_values,epsilon”。输出=“J_minus [i]”。
        thetaminus = np.copy(parameters_values)  # Step 1
        thetaminus[i][0] = thetaminus[i][0] - epsilon  # Step 2
        J_minus[i], cache = forward_propagation_n(
            X, Y,
            gc_utils.vector_to_dictionary(thetaminus))  # Step 3 ,cache用不到

        # 计算gradapprox[i]
        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)

    # 通过计算差异比较gradapprox和后向传播梯度。
    numerator = np.linalg.norm(grad - gradapprox)  # Step 1'
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)  # Step 2'
    difference = numerator / denominator  # Step 3'

    if difference < 1e-7:
        print("梯度检查:梯度正常!")
    else:
        print("梯度检查:梯度超出阈值!")

    return difference
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7):
    """
    check if backward_propagation_n computes correctly the gradient of the cost output
    
    arguments:
        parameters -- dictionary containing your parameters "W1","b1","W2","b2","W3","b3"
        grad -- output of backward_propagation_n
        x -- input datapoint,shape(input size,1)
        y -- true label
        epsilon -- tiny shift to the input to compute approximated gradient
        
    returns:
        difference -- difference between the approximated gradient
    """

    #set-up variables
    parameters_values, _ = dictionary_to_vector(parameters)
    grad = gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))

    #compute gradapprox
    for i in range(num_parameters):
        thetaplus = np.copy(parameters_values)
        thetaplus[i][0] = thetaplus[i][0] + epsilon
        J_plus[i], _ = forward_propagation_n(X, Y,
                                             vector_to_dictionary(thetaplus))

        thetaminus = np.copy(parameters_values)
        thetaminus[i][0] = thetaminus[i][0] - epsilon
        J_minus[i], _ = forward_propagation_n(X, Y,
                                              vector_to_dictionary(thetaminus))

        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)

    print('grad.shape = ', grad.shape)
    print('gradapprox.shape = ', gradapprox.shape)
    numerator = np.linalg.norm(grad - gradapprox)
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)
    difference = numerator / denominator

    if difference > 1e-7:
        print('there is a mistake in the backward propagation, difference = ' +
              str(difference))
    else:
        print('your backward propagation works perfectly fine! difference = ' +
              str(difference))

    return difference
Ejemplo n.º 12
0
def gradients_check(X, Y, lambd=0, keep_prob=1, init_method='he'):
    layers_dims = [X.shape[0], 5, 3, 1]
    # inintial params
    if init_method == 'zeros':
        params = init_zeros(layers_dims)
    elif init_method == 'random':
        params = init_random(layers_dims)
    elif init_method == 'he':
        params = init_he(layers_dims)
    else:
        print('Error: unexcepted init_method!')

    # compute grads
    a3, cache = forward_propagate_with_reg(X, params, keep_prob=keep_prob)
    grads = backward_propagate_with_reg(X,
                                        Y,
                                        cache,
                                        lambd=lambd,
                                        keep_prob=keep_prob)
    grads_vector = gc_utils.gradients_to_vector(grads)

    theta, keys = gc_utils.dictionary_to_vector(params)  #转化成向量方便索引(n, 1)
    n = theta.shape[0]  #参数个数
    grads_approx_vector = np.zeros((n, 1))

    # compute grads_approx
    for i in range(n):
        theta_p = np.copy(theta)
        theta_p[i, 0] += 1e-7
        params_p = gc_utils.vector_to_dictionary(theta_p)
        theta_m = np.copy(theta)
        theta_m[i, 0] -= 1e-7
        params_m = gc_utils.vector_to_dictionary(theta_m)
        a3_, cache_ = forward_propagate_with_reg(X,
                                                 params_p,
                                                 keep_prob=keep_prob)
        J_p = compute_loss_with_reg(a3_, Y, params_p, lambd=lambd)
        a3_, cache_ = forward_propagate_with_reg(X,
                                                 params_m,
                                                 keep_prob=keep_prob)
        J_m = compute_loss_with_reg(a3_, Y, params_m, lambd=lambd)
        d_approx = (J_p - J_m) / (2 * 1e-7)
        grads_approx_vector[i, 0] = d_approx

    # compute difference
    numerator = np.linalg.norm(grads_vector - grads_approx_vector)
    denominator = np.linalg.norm(grads_vector) + np.linalg.norm(
        grads_approx_vector)
    diff = numerator / denominator
    return diff
Ejemplo n.º 13
0
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7):
    """
    Checks if backward_propagation_n computes correctly the gradient of the cost output by forward_propagation_n
    
    Arguments:
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3":
    grad -- output of backward_propagation_n, contains gradients of the cost with respect to the parameters. 
    x -- input datapoint, of shape (input size, 1)
    y -- true "label"
    epsilon -- tiny shift to the input to compute approximated gradient with formula(1)
    
    Returns:
    difference -- difference (2) between the approximated gradient and the backward propagation gradient
    """

    # Set-up variables
    parameters_values, _ = dictionary_to_vector(parameters)
    grad = gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))

    # Compute gradapprox
    for i in range(num_parameters):

        # Compute J_plus[i]. Inputs: "parameters_values, epsilon". Output = "J_plus[i]".
        # "_" is used because the function you have to outputs two parameters but we only care about the first one
        ### START CODE HERE ### (approx. 3 lines)
        plus_copy = parameters_values.copy()
        plus_copy[i] = plus_copy[i] + epsilon
        J_plus[i], _ = forward_propagation_n(X, Y,
                                             vector_to_dictionary(plus_copy))
        # Compute J_minus[i]. Inputs: "parameters_values, epsilon". Output = "J_minus[i]".
        minus_copy = parameters_values.copy()
        minus_copy[i] = minus_copy[i] - epsilon
        J_minus[i], _ = forward_propagation_n(X, Y,
                                              vector_to_dictionary(minus_copy))
        # Compute gradapprox[i]
        ### START CODE HERE ### (approx. 1 line)
        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)

    # Compare gradapprox to backward propagation gradients by computing difference.
    num = np.linalg.norm(grad - gradapprox)
    nom = np.linalg.norm(grad) + np.linalg.norm(gradapprox)
    diff = num / nom

    return diff
Ejemplo n.º 14
0
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7):
    """
    Checks if backward_propagation_n computes correctly the gradient of the cost output by forward_propagation_n

    Arguments:
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3":
    grad -- output of backward_propagation_n, contains gradients of the cost with respect to the parameters.
    x -- input datapoint, of shape (input size, 1)
    y -- true "label"
    epsilon -- tiny shift to the input to compute approximated gradient with formula(1)

    Returns:
    difference -- difference (2) between the approximated gradient and the backward propagation gradient
    """

    parameters_values, _ = dictionary_to_vector(parameters)
    grad = gradients_to_vector(gradients)

    num_parameters = parameters_values.shape[0]
    Jplus = np.zeros((num_parameters, 1))
    Jminus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))

    for i in range(num_parameters):
        thetaplus = np.copy(parameters_values)
        thetaplus[i][0] += epsilon
        Jplus[i], _ = forward_propagation_n(X, Y,
                                            vector_to_dictionary(thetaplus))

        thetaminus = np.copy(parameters_values)
        thetaminus[i][0] -= epsilon
        Jminus[i], _ = forward_propagation_n(X, Y,
                                             vector_to_dictionary(thetaminus))

        gradapprox[i] = (Jplus[i] - Jminus[i]) / (2. * epsilon)

    diff = np.linalg.norm(grad - gradapprox) / (np.linalg.norm(grad) +
                                                np.linalg.norm(gradapprox))

    if diff > 1e-7:
        print('There is a mistake in backword propagation diff = {}'.format(
            diff))
    else:
        print(
            'Your backward propagation works well with diff = {}'.format(diff))

    return diff
def gradient_check_n(parameters, grads, X, Y, epsilon=1e-7):
    '''
    Checks if backward_propagation_n computes correctly the gradient of the cost output by forward_propagation_n
    
    Arguments:
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3":
    grad -- output of backward_propagation_n, contains gradients of the cost with respect to the parameters. 
    x -- input datapoint, of shape (input size, 1)
    y -- true "label"
    epsilon -- tiny shift to the input to compute approximated gradient with formula(1)
    
    Returns:
    difference -- difference (2) between the approximated gradient and the backward propagation gradient
    '''
    parameters_values, _ = dictionary_to_vector(parameters)
    grads_value = gradients_to_vector(grads)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradsapprox = np.zeros((num_parameters, 1))

    for i in range(num_parameters):
        thetaplus = np.copy(parameters_values)
        thetaplus[i][0] = thetaplus[i][0] + epsilon
        J_plus[i], _ = forward_propagation_n(X, Y,
                                             vector_to_dictionary(thetaplus))

        thetaminus = np.copy(parameters_values)
        thetaminus[i][0] = thetaminus[i][0] - epsilon
        J_minus[i], _ = forward_propagation_n(X, Y,
                                              vector_to_dictionary(thetaminus))

        gradsapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)

    numeretor = np.linalg.norm(grads_value - gradsapprox)
    denominator = np.linalg.norm(grads_value) + np.linalg.norm(gradsapprox)
    difference = numeretor / denominator

    if difference > 1e-7:
        print(
            'There is a mistake in the backward propagation! difference = {}'.
            format(difference))
    else:
        print(
            'Your backward propagation worlks perfectly fine! difference = {}'.
            format(difference))
Ejemplo n.º 16
0
def gradient_check_n(parameters,gradients,X,Y,epsilon = 1e-7):
    '''
    检查backward_propagation_n是否正确计算forward_propagation_n输出的成本梯度
    :param parameters: 包含参数'W1','b1','W2','b2','W3','b3'的python字典
    grad_output_propagation_n的输出包含与参数相关的成本梯度
    :param gradients: 
    :param X: 输入数据点,维度为(输入节点数量,1)
    :param Y: 标签
    :param epsilon计算输入的微小偏移以计算近似梯度: 
    :return: 
    近似梯度和后向传播梯度之间的差异
    '''
    #初始化参数
    parameters_values , keys = gc_utils.dictionary_to_vector(parameters)#keys用不到,parameters_values是一个n行1列的矩阵
    grad = gc_utils.gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters,1))
    J_minus = np.zeros((num_parameters,1))
    gradapprox = np.zeros(num_parameters,1)

    #计算gradapprox
    for i in range(num_parameters):
        #计算J_plus[i].输入'parameters_values,epsilon'.输出='J_plus[i]'
        thetaplus = np.copy(parameters_values)
        thetaplus[i][0]=thetaplus[i][0]+epsilon
        J_plus[i],cache = forward_propagation_n(X,Y,gc_utils.vector_to_dictionary(thetaplus))

        #计算J_minus[i].输入:'parameters_values,epsilon',输出='J_minus[i]'
        thetaminus = np.copy(parameters_values)
        thetaminus[i][0]=thetaminus[i][0] - epsilon
        J_minus[i],cache = forward_propagation_n(X,Y,gc_utils.vector_to_dictionary(thetaminus))

        #计算gradapprox[i]
        gradapprox[i] = (J_plus[i]-J_minus[i])/(2*epsilon)

    #通过计算差异比较gradapprox和后向传播梯度
    numerator = np.linalg.norm(grad-gradapprox)
    denominator = np.linalg.norm(grad)+np.linalg.norm(gradapprox)
    difference = numerator/denominator

    if difference<1e-7:
        print('梯度检查:梯度正常')
    else:
        print('梯度检查:梯度超出阈值')

    return difference
Ejemplo n.º 17
0
def gradient_check_n(parameters, gradients, X, Y, epsilon = 1e-7):
    
    # Set-up variables
    parameters_values, _ = dictionary_to_vector(parameters)
    grad = gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))
    
    # Compute gradapprox
    for i in range(num_parameters):
        
        # Compute J_plus[i]. Inputs: "parameters_values, epsilon". Output = "J_plus[i]".
        # "_" is used because the function you have to outputs two parameters but we only care about the first one
        thetaplus = np.copy(parameters_values)          # Step 1
        thetaplus[i][0] += epsilon                      # Step 2
        J_plus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaplus))                                 # Step 3
        
        # Compute J_minus[i]. Inputs: "parameters_values, epsilon". Output = "J_minus[i]".
        thetaminus = np.copy(parameters_values)                                     # Step 1
        thetaminus[i][0] -= epsilon                                # Step 2
        J_minus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaminus))                                  # Step 3
        
        # Compute gradapprox[i]
        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2. * epsilon)
    
    # Compare gradapprox to backward propagation gradients by computing difference.
    numerator = np.linalg.norm(grad - gradapprox)                               # Step 1'
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)             # Step 2'
    difference = numerator / denominator

    if difference > 1e-6:
        print ("There is a mistake in the backward propagation! difference = " + str(difference))
    else:
        print ("Your backward propagation works perfectly fine! difference = " + str(difference))
    
    return difference
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7):
    # 初始化参数
    parameters_values, keys = gc_utils.dictionary_to_vector(
        parameters)  # 将parameters字典转换为array
    grad = gc_utils.gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))

    # 计算grad approx
    for i in range(num_parameters):  # 遍历所有的参数
        # 计算 J_plus[i]
        thetaplus = np.copy(parameters_values)
        thetaplus[i][0] = thetaplus[i][0] + epsilon
        J_plus[i], cache = forward_propagation_n(
            X, Y, gc_utils.vector_to_dictionary(thetaplus))  # cache用不到

        # 计算 J_minus[i]
        thetaminus = np.copy(parameters_values)
        thetaminus[i][0] = thetaminus[i][0] - epsilon
        J_minus[i], cache = forward_propagation_n(
            X, Y, gc_utils.vector_to_dictionary(thetaminus))  # cache用不到

        # 计算 grad apporx[i]
        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)

    # 通过计算差异比较 gradapprox 和后向传播梯度
    numerator = np.linalg.norm(grad - gradapprox)
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)
    difference = numerator / denominator

    if difference < 1e-7:
        print("Gradient Checking: 梯度正常!")
    else:
        print("Gradient Checking:梯度超出阈值!")
    return difference
How does gradient checking work?.

As in 1) and 2), you want to compare "gradapprox" to the gradient computed by backpropagation. The formula is still:

∂J∂θ=limε→0J(θ+ε)−J(θ−ε)2ε(1)
(1)∂J∂θ=limε→0J(θ+ε)−J(θ−ε)2ε
However, θθ is not a scalar anymore. It is a dictionary called "parameters". We implemented a function "dictionary_to_vector()" for you. It converts the "parameters" dictionary into a vector called "values", obtained by reshaping all parameters (W1, b1, W2, b2, W3, b3) into vectors and concatenating them.

The inverse function is "vector_to_dictionary" which outputs back the "parameters" dictionary.



Figure 2 : dictionary_to_vector() and vector_to_dictionary()
You will need these functions in gradient_check_n()
We have also converted the "gradients" dictionary into a vector "grad" using gradients_to_vector(). You don't need to worry about that.

Exercise: Implement gradient_check_n().

Instructions: Here is pseudo-code that will help you implement the gradient check.

For each i in num_parameters:

To compute J_plus[i]:
Set θ+θ+ to np.copy(parameters_values)
Set θ+iθi+ to θ+i+εθi++ε
Calculate J+iJi+ using to forward_propagation_n(x, y, vector_to_dictionary(θ+θ+ )).
To compute J_minus[i]: do the same thing with θ−θ−
Compute gradapprox[i]=J+i−J−i2εgradapprox[i]=Ji+−Ji−2ε
Thus, you get a vector gradapprox, where gradapprox[i] is an approximation of the gradient with respect to parameter_values[i]. You can now compare this gradapprox vector to the gradients vector from backpropagation. Just like for the 1D case (Steps 1', 2', 3'), compute:
difference=∥grad−gradapprox∥2∥grad∥2+∥gradapprox∥2(3)
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7):
    """
    Checks if backward_propagation_n computes correctly the gradient of the cost output by forward_propagation_n
    
    Arguments:
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3":
    grad -- output of backward_propagation_n, contains gradients of the cost with respect to the parameters. 
    x -- input datapoint, of shape (input size, 1)
    y -- true "label"
    epsilon -- tiny shift to the input to compute approximated gradient with formula(1)
    
    Returns:
    difference -- difference (2) between the approximated gradient and the backward propagation gradient
    """

    # Set-up variables
    parameters_values, _ = dictionary_to_vector(parameters)
    grad = gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))

    # Compute gradapprox
    for i in range(num_parameters):

        # Compute J_plus[i]. Inputs: "parameters_values, epsilon". Output = "J_plus[i]".
        # "_" is used because the function you have to outputs two parameters but we only care about the first one
        thetaplus = np.copy(parameters_values)  # Step 1
        thetaplus[i][0] = thetaplus[i][0] + epsilon  # Step 2
        J_plus[i], _ = forward_propagation_n(
            X, Y, vector_to_dictionary(thetaplus))  # Step 3

        # Compute J_minus[i]. Inputs: "parameters_values, epsilon". Output = "J_minus[i]".
        thetaminus = np.copy(parameters_values)  # Step 1
        thetaminus[i][0] = thetaminus[i][0] - epsilon  # Step 2
        J_minus[i], _ = forward_propagation_n(
            X, Y, vector_to_dictionary(thetaminus))  # Step 3

        # Compute gradapprox[i]
        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)

    # Compare gradapprox to backward propagation gradients by computing difference.
    numerator = np.linalg.norm(grad - gradapprox)  # Step 1'
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)  # Step 2'
    difference = numerator / denominator  # Step 3'

    if difference > 2e-7:
        print("\033[93m" +
              "There is a mistake in the backward propagation! difference = " +
              str(difference) + "\033[0m")
    else:
        print("\033[92m" +
              "Your backward propagation works perfectly fine! difference = " +
              str(difference) + "\033[0m")

    return difference


# What you should remember from this notebook:

# Gradient checking verifies closeness between the gradients from backpropagation
# and the numerical approximation of the gradient (computed using forward propagation).
# Gradient checking is slow, so we don't run it in every iteration of training.
# You would usually run it only to make sure your code is correct,
# then turn it off and use backprop for the actual learning process.
Ejemplo n.º 21
0
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7):
    """
    Checks if backward_propagation_n computes correctly the gradient of the cost output by forward_propagation_n
    
    Arguments:
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3":
    grad -- output of backward_propagation_n, contains gradients of the cost with respect to the parameters. 
    x -- input datapoint, of shape (input size, 1)
    y -- true "label"
    epsilon -- tiny shift to the input to compute approximated gradient with formula(1)
    
    Returns:
    difference -- difference (2) between the approximated gradient and the backward propagation gradient
    """
    # How to help implement gradient check.
    # Set-up variables
    parameters_values, _ = dictionary_to_vector(
        parameters
    )  # converts the "parameters" dictionary into a vector called "values"
    grad = gradients_to_vector(
        gradients)  # convert gradients dictionary into a vector, "grads"
    num_parameters = parameters_values.shape[
        0]  # get current shape of an array by assigning a tuple of array dimensions
    J_plus = np.zeros(
        (num_parameters,
         1))  # initialize J_plus with zeros and number of parameter objects
    J_minus = np.zeros(
        (num_parameters,
         1))  # initialize J_minus with zeros and number of parameter objects
    gradapprox = np.zeros((
        num_parameters,
        1))  # initialize gradapprox with zeros and number of parameter objects

    # Compute gradapprox
    for i in range(num_parameters):

        # Compute J_plus[i]. Inputs: "parameters_values, epsilon". Output = "J_plus[i]".
        # "_" is used because the function outputs two parameters but only care about the first one
        thetaplus = np.copy(parameters_values)  # Set theta to np.copy
        thetaplus[i][0] = thetaplus[i][0] + epsilon  # Set theta_plus
        J_plus[i], _ = forward_propagation_n(
            X, Y, vector_to_dictionary(
                thetaplus))  # Calculate J_plus using forward propagation

        # Compute J_minus[i]. Inputs: "parameters_values, epsilon". Output = "J_minus[i]".
        thetaminus = np.copy(parameters_values)  # Set theta to np.copy
        thetaminus[i][0] = thetaminus[i][0] - epsilon  # Set theta_minus
        J_minus[i], _ = forward_propagation_n(
            X, Y, vector_to_dictionary(
                thetaminus))  # Calculate J_minus using forward propagation

        # Compute gradapprox[i]
        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)

    # Compare gradapprox to backward propagation gradients by computing difference.
    numerator = np.linalg.norm(
        grad - gradapprox)  # compute the numerator using np.linag.norm(...)
    denominator = np.linalg.norm(grad) + np.linalg.norm(
        gradapprox
    )  # compute the denominator(need to call np.linag.norm(...) twice)
    difference = numerator / denominator  # divide both

    if difference > 2e-7:
        print("\033[93m" +
              "There is a mistake in the backward propagation! difference = " +
              str(difference) + "\033[0m")
    else:
        print("\033[92m" +
              "Your backward propagation works perfectly fine! difference = " +
              str(difference) + "\033[0m")

    return difference
Ejemplo n.º 22
0
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7):
    """
    梯度校验,检查后向传播是否正确计算前向传播输出的 cost 的梯度
    
    :param parameters: 参数字典,包含 "W1", "b1", "W2", "b2", "W3", "b3":
    :param grad: 后向传播的输出, 包含与参数相关的 cost 梯度
    :param x: 输入数据点, of shape (input size, 1)
    :param y: 正确的标签
    :param epsilon: 输入的微小偏移,用来计算近似梯度
    
    :return difference: 近似梯度和后向传播计算的梯度之间的差异
    """

    # Set-up variables
    parameters_values, _ = dictionary_to_vector(parameters)
    grad = gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))

    # 计算 gradapprox
    for i in range(num_parameters):

        # Compute J_plus[i]. Inputs: "parameters_values, epsilon". Output = "J_plus[i]".
        # "_" is used because the function you have to outputs two parameters but we only care about the first one
        ### START CODE HERE ### (approx. 3 lines)
        thetaplus = np.copy(parameters_values)  # Step 1
        thetaplus[i][0] = thetaplus[i][0] + epsilon  # Step 2
        J_plus[i], _ = forward_propagation_n(
            X, Y, vector_to_dictionary(thetaplus))  # Step 3
        ### END CODE HERE ###

        # Compute J_minus[i]. Inputs: "parameters_values, epsilon". Output = "J_minus[i]".
        ### START CODE HERE ### (approx. 3 lines)
        thetaminus = np.copy(parameters_values)  # Step 1
        thetaminus[i][0] = thetaminus[i][0] - epsilon  # Step 2
        J_minus[i], _ = forward_propagation_n(
            X, Y, vector_to_dictionary(thetaminus))  # Step 3
        ### END CODE HERE ###

        # Compute gradapprox[i]
        ### START CODE HERE ### (approx. 1 line)
        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)
        ### END CODE HERE ###

    # 通过计算差异来比较 gradapprox 梯度和后向传播计算的梯度
    ### START CODE HERE ### (approx. 1 line)
    numerator = np.linalg.norm(grad - gradapprox)  # Step 1'
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)  # Step 2'
    difference = numerator / denominator  # Step 3'
    ### END CODE HERE ###

    if difference > 2e-7:
        print("\033[93m" +
              "There is a mistake in the backward propagation! difference = " +
              str(difference) + "\033[0m")
    else:
        print("\033[92m" +
              "Your backward propagation works perfectly fine! difference = " +
              str(difference) + "\033[0m")

    return difference