コード例 #1
0
def gradient_check_n(parameters, gradients, X, Y, epsilon = 1e-7):
    """
    Checks if backward_propagation_n computes correctly the gradient of the cost output by forward_propagation_n
    
    Arguments:
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3":
    grad -- output of backward_propagation_n, contains gradients of the cost with respect to the parameters. 
    x -- input datapoint, of shape (input size, 1)
    y -- true "label"
    epsilon -- tiny shift to the input to compute approximated gradient with formula(1)
    
    Returns:
    difference -- difference (2) between the approximated gradient and the backward propagation gradient
    """
    
    # Set-up variables
    parameters_values, _ = dictionary_to_vector(parameters)
    grad = gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))
    
    # Compute gradapprox
    for i in range(num_parameters):
        
        # Compute J_plus[i]. Inputs: "parameters_values, epsilon". Output = "J_plus[i]".
        # "_" is used because the function you have to outputs two parameters but we only care about the first one
        ### START CODE HERE ### (approx. 3 lines)
        thetaplus = np.copy(parameters_values)                                        # Step 1
        thetaplus[i][0] = thetaplus[i] + epsilon                                      # Step 2
        J_plus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaplus))   # Step 3
        ### END CODE HERE ###
        
        # Compute J_minus[i]. Inputs: "parameters_values, epsilon". Output = "J_minus[i]".
        ### START CODE HERE ### (approx. 3 lines)
        thetaminus = np.copy(parameters_values)                                       # Step 1
        thetaminus[i][0] = thetaminus[i] - epsilon                                     # Step 2        
        J_minus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaminus)) # Step 3
        ### END CODE HERE ###
        
        # Compute gradapprox[i]
        ### START CODE HERE ### (approx. 1 line)
        gradapprox[i] = (J_plus[i] - J_minus[i]) /(2 * epsilon)
        ### END CODE HERE ###
    
    # Compare gradapprox to backward propagation gradients by computing difference.
    ### START CODE HERE ### (approx. 1 line)
    numerator = np.linalg.norm(grad - gradapprox)                    # Step 1'
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)  # Step 2'
    difference = numerator / denominator                             # Step 3'

    ### END CODE HERE ###

    if difference > 2e-7:
        print ("\033[93m" + "There is a mistake in the backward propagation! difference = " + str(difference) + "\033[0m")
    else:
        print ("\033[92m" + "Your backward propagation works perfectly fine! difference = " + str(difference) + "\033[0m")
    
    return difference
コード例 #2
0
ファイル: 2.py プロジェクト: jaymell/ng-deeplearn
def gradient_check_n(parameters, gradients, X, Y, epsilon = 1e-7):
    """
    Checks if backward_propagation_n computes correctly the gradient of the cost output by forward_propagation_n
    
    Arguments:
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3":
    grad -- output of backward_propagation_n, contains gradients of the cost with respect to the parameters. 
    x -- input datapoint, of shape (input size, 1)
    y -- true "label"
    epsilon -- tiny shift to the input to compute approximated gradient with formula(1)
    
    Returns:
    difference -- difference (2) between the approximated gradient and the backward propagation gradient
    """
    
    # Set-up variables
    parameters_values, _ = dictionary_to_vector(parameters)
    grad = gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))
    e = epsilon
    
    # Compute gradapprox
    for i in range(num_parameters):
        
        # Compute J_plus[i]. Inputs: "parameters_values, epsilon". Output = "J_plus[i]".
        # "_" is used because the function you have to outputs two parameters but we only care about the first one
        ### START CODE HERE ### (approx. 3 lines)
        thetaplus = np.copy(parameters_values)                        # Step 1
        thetaplus[i][0] = thetaplus[i][0] + e                         # Step 2
        J_plus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaplus))         # Step 3
        ### END CODE HERE ###
        
        # Compute J_minus[i]. Inputs: "parameters_values, epsilon". Output = "J_minus[i]".
        ### START CODE HERE ### (approx. 3 lines)
        thetaminus = np.copy(parameters_values)                       # Step 1
        thetaminus[i][0] = thetaminus[i][0] - e                       # Step 2        
        J_minus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaminus))       # Step 3
        ### END CODE HERE ###
        
        # Compute gradapprox[i]
        ### START CODE HERE ### (approx. 1 line)
        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2*e)
        ### END CODE HERE ###
    
    # Compare gradapprox to backward propagation gradients by computing difference.
    ### START CODE HERE ### (approx. 1 line)
    numerator = np.linalg.norm(gradapprox - grad)
    denominator = np.linalg.norm(gradapprox) + np.linalg.norm(grad)
    difference = numerator / denominator                                          # Step 3'
    ### END CODE HERE ###

    if difference > 2e-7:
        print ("\033[93m" + "There is a mistake in the backward propagation! difference = " + str(difference) + "\033[0m")
    else:
        print ("\033[92m" + "Your backward propagation works perfectly fine! difference = " + str(difference) + "\033[0m")
    
    return difference
コード例 #3
0
def gradient_check_n(parameters,gradients,X,Y,epsilon=1e-7):

    parameters_values, keys = gc_utils.dictionary_to_vector(parameters)
    grad = gc_utils.gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))

    for i in range(num_parameters):
        # 计算J_plus[i]
        theta_plus = np.copy(parameters_values)
        theta_plus[i][0] = theta_plus[i][0] + epsilon
        J_plus[i], cache = forward_propagation_n(X, Y, gc_utils.vector_to_dictionary(theta_plus))

        # 计算J_minus[i]
        theta_minus = np.copy(parameters_values)
        theta_minus[i][0] = theta_minus[i][0] - epsilon
        J_minus[i], cache = forward_propagation_n(X, Y, gc_utils.vector_to_dictionary(theta_minus))

        # 计算gradapprox[i]
        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)

    numerator = np.linalg.norm(grad - gradapprox)
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)
    difference = numerator / denominator

    if difference < 1e-7:
        print("梯度检查:梯度正常")
    else:
        print("梯度检测:梯度超出阈值")

    return difference
コード例 #4
0
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7):
    parameters_values, _ = dictionary_to_vector(parameters)
    grad = gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))

    for i in range(num_parameters):
        thetaplus = np.copy(parameters_values)
        thetaplus[i][0] = thetaplus[i][0] + epsilon
        J_plus[i], _ = forward_propagation_n(X, Y,
                                             vector_to_dictionary(thetaplus))

        thetaminus = np.copy(parameters_values)
        thetaminus[i][0] = thetaplus[i][0] - epsilon
        J_minus[i], _ = forward_propagation_n(X, Y,
                                              vector_to_dictionary(thetaminus))

        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)

    difference = np.linalg.norm(grad - gradapprox) / (
        np.linalg.norm(grad) + np.linalg.norm(gradapprox))

    if difference > 1e-7:
        print("There is a mistake in the backward propagation! difference = " +
              str(difference))
    else:
        print("Your backward propagation works perfectly fine! difference = " +
              str(difference))

    return difference
コード例 #5
0
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7):
    parameters_values, _ = dictionary_to_vector(parameters)
    grad = gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))

    for i in range(num_parameters):
        thetaplus = np.copy(parameters_values)  # Step 1
        thetaplus[i][0] = thetaplus[i][0] + epsilon  # Step 2
        J_plus[i], _ = forward_propagation_n(
            X, Y, vector_to_dictionary(thetaplus))  # Step 3
        thetaminus = np.copy(parameters_values)  # Step 1
        thetaminus[i][0] = thetaminus[i][0] - epsilon  # Step 2
        J_minus[i], _ = forward_propagation_n(
            X, Y, vector_to_dictionary(thetaminus))  # Step 3

        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)

    numerator = np.linalg.norm(gradapprox - grad)  # Step 1'
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)  # Step 2'
    difference = numerator / denominator  # Step 3'

    if difference > 1e-7:
        print("\033[93m" +
              "There is a mistake in the backward propagation! difference = " +
              str(difference) + "\033[0m")
    else:
        print("\033[92m" +
              "Your backward propagation works perfectly fine! difference = " +
              str(difference) + "\033[0m")

    return difference
コード例 #6
0
ファイル: 6judge.py プロジェクト: sdfs1231/AI
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7):
    parameters_values, _ = dictionary_to_vector(parameters)
    grad = gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))

    for i in range(num_parameters):
        thetaplus = np.copy(parameters_values)
        thetaplus[i][0] = thetaplus[i][0] + epsilon
        J_plus[i], _ = forward_propagation_n(X, Y,
                                             vector_to_dictionary(thetaplus))

        thetaminus = np.copy(parameters_values)
        thetaminus[i][0] = thetaminus[i][0] - epsilon
        J_minus[i], _ = forward_propagation_n(X, Y,
                                              vector_to_dictionary(thetaminus))
        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)
    numerator = np.linalg.norm(grad - gradapprox)
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)
    difference = numerator / denominator

    if difference > 2e-7:
        print("\033[93m" + "反向传播有问题! difference = " + str(difference) +
              "\033[0m")
    else:
        print("\033[92m" + "反向传播很完美! difference = " + str(difference) +
              "\033[0m")
    return difference
コード例 #7
0
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7):
    """
    Checks if backward_propagation_n computes correctly the gradient of the cost output by forward_propagation_n
    
    Arguments:
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3":
    grad -- output of backward_propagation_n, contains gradients of the cost with respect to the parameters. 
    x -- input datapoint, of shape (input size, 1)
    y -- true "label"
    epsilon -- tiny shift to the input to compute approximated gradient.
    
    Returns:
    difference -- difference (2) between the approximated gradient and the backward propagation gradient.
    """

    # Sets up variables
    parameters_values, _ = dictionary_to_vector(parameters)
    grad = gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))

    # Computes gradapprox
    for i in range(num_parameters):

        # Computes J_plus[i].
        thetaplus = np.copy(parameters_values)
        thetaplus[i][0] = thetaplus[i][0] + epsilon
        J_plus[i], _ = forward_propagation_n(X, Y,
                                             vector_to_dictionary(thetaplus))

        # Computes J_minus[i].
        thetaminus = np.copy(parameters_values)
        thetaminus[i][0] = thetaminus[i][0] - epsilon
        J_minus[i], _ = forward_propagation_n(X, Y,
                                              vector_to_dictionary(thetaminus))

        # Computes gradapprox[i]
        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)

    # Compares gradapprox to backward propagation gradients by computing difference.
    numerator = np.linalg.norm(grad - gradapprox)
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)
    difference = numerator / denominator

    if difference > 2e-7:
        print("\033[93m" +
              "There is a mistake in the backward propagation! difference = " +
              str(difference) + "\033[0m")
    else:
        print("\033[92m" +
              "Your backward propagation works perfectly fine! difference = " +
              str(difference) + "\033[0m")

    return difference
コード例 #8
0
def gradient_check_n(parameters,gradients,X,Y,epsilon=1e-7):
    """
    检查backward_propagation_n是否正确计算forward_propagation_n输出的成本梯度
    
    参数:
        parameters - 包含参数“W1”,“b1”,"W2","b2","W3","b3"的python字典
        grentients - grad_output_propagation_n的输出 包含与参数相关的成本梯度
        x - 输入数据点,维度为(输入节点数量,1)\
        y - 标签
        epsilon - 计算输入的微小偏移以计算近似梯度
        
    返回:
        difference - 近似梯度和后向传播梯度之间的差异
        
    """
    
    #初始化参数
    parameters_values,keys = gc_utils.dictionary_to_vector(parameters) #keys用不到
    print("parameters"+str(parameters))
    print("parameters_values"+str(parameters_values))
    grad = gc_utils.gradients_to_vector(gradients)
  
    print("gradients"+str(gradients))
    print("grad"+str(grad))
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters,1))
    J_minus = np.zeros((num_parameters,1))
    gradapprox = np.zeros((num_parameters,1))
    
    #计算gradapprox
    for i in range(num_parameters):
        #计算J——plus[i],输入:"parameters_values,epsilon",输出"J_plus[i]"
        thetaplus = np.copy(parameters_values)                                                 #strp1
        thetaplus[i][0] = thetaplus[i][0] + epsilon                                            #step2
        J_plus[i],cache = forward_propagation_n(X,Y,gc_utils.vector_to_dictionary(thetaplus))  #step3 cache用不到
        
        #计算J_minus[i].输入"parameters_values,epsilon",输出"J_minus[i]"
        thetaminus = np.copy(parameters_values)
        thetaminus[i][0] = thetaminus[i][0] - epsilon
        J_minus[i],cache = forward_propagation_n(X,Y,gc_utils.vector_to_dictionary(thetaminus))
        
        #计算gradapprox[i]
        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)
        
    #通过计算差异比较gradapprox和后向传播梯度
    numerator = np.linalg.norm(grad - gradapprox)
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)
    difference = numerator / denominator
    
    if difference < 1e-7:
        print("梯度检查:梯度正常!")
    else:
        print("梯度检查:梯度超出阈值!")
    
    
    return difference
コード例 #9
0
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7):
    """
    Checks if backward_propagation_n computes correctly the gradient of the cost output by forward_propagation_n

    Arguments:
        parameters -- python dictionary containing your parameters
        grad -- output of backward_propagation_n, contains gradients of the cost with respect to the parameter s.
        X -- input datapoint, of shape (input size, 1)
        Y -- true "label"
        epsilon -- tiny shift to the input to compute approximated gradient

    Returns:
        difference -- difference between approximated gradient and the backward propagation gradient
    """
    # Set up variables
    parameters_values, _ = dictionary_to_vector(parameters)
    grad = gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))

    # compute gradapprox
    for i in range(num_parameters):
        # Compute J_plus[i]. Inputs: "parameters_values, epsilon".  Output: "J_plus[i]"
        # "_" is used because the function you have to outputs two parameters but we only care about the first one
        thetaplus = np.copy(parameters_values)
        thetaplus[i][0] += epsilon
        J_plus[i], _ = forward_propagation_n(X, Y,
                                             vector_to_dictionary(thetaplus))

        # Compute J_minus[i]. Inputs: "parameters_values, epsilon".     Output: "J_minus[i]".
        thetaminus = np.copy(parameters_values)
        thetaminus[i][0] -= epsilon
        J_minus[i], _ = forward_propagation_n(X, Y,
                                              vector_to_dictionary(thetaminus))

        # Compute gradapprox[i]
        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)

    # Compare gradapprox to backward propagation gradients by computing difference.
    numerator = np.linalg.norm(gradapprox - grad)
    denominator = np.linalg.norm(gradapprox) + np.linalg.norm(grad)
    difference = numerator / denominator

    if difference > 1.2e-7:
        print("\033[93m" +
              "There is a mistake in the backward propagation! difference = " +
              str(difference) + "\033[0m")
    else:
        print("\033[92m" +
              "Your backward propagation works perfectly fine! difference = " +
              str(difference) + "\033[0m")
    return difference
コード例 #10
0
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7):
    """
    检查backward_propagation_n是否正确计算forward_propagation_n输出的成本梯度

    参数:
        parameters - 包含参数“W1”,“b1”,“W2”,“b2”,“W3”,“b3”的python字典:
        grad_output_propagation_n的输出包含与参数相关的成本梯度。
        x  - 输入数据点,维度为(输入节点数量,1)
        y  - 标签
        epsilon  - 计算输入的微小偏移以计算近似梯度

    返回:
        difference - 近似梯度和后向传播梯度之间的差异
    """
    # 初始化参数
    parameters_values, keys = gc_utils.dictionary_to_vector(
        parameters)  # keys用不到
    grad = gc_utils.gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))

    # 计算gradapprox
    for i in range(num_parameters):
        # 计算J_plus [i]。输入:“parameters_values,epsilon”。输出=“J_plus [i]”
        thetaplus = np.copy(parameters_values)  # Step 1
        thetaplus[i][0] = thetaplus[i][0] + epsilon  # Step 2
        J_plus[i], cache = forward_propagation_n(
            X, Y, gc_utils.vector_to_dictionary(thetaplus))  # Step 3 ,cache用不到

        # 计算J_minus [i]。输入:“parameters_values,epsilon”。输出=“J_minus [i]”。
        thetaminus = np.copy(parameters_values)  # Step 1
        thetaminus[i][0] = thetaminus[i][0] - epsilon  # Step 2
        J_minus[i], cache = forward_propagation_n(
            X, Y,
            gc_utils.vector_to_dictionary(thetaminus))  # Step 3 ,cache用不到

        # 计算gradapprox[i]
        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)

    # 通过计算差异比较gradapprox和后向传播梯度。
    numerator = np.linalg.norm(grad - gradapprox)  # Step 1'
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)  # Step 2'
    difference = numerator / denominator  # Step 3'

    if difference < 1e-7:
        print("梯度检查:梯度正常!")
    else:
        print("梯度检查:梯度超出阈值!")

    return difference
コード例 #11
0
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7):
    """
    check if backward_propagation_n computes correctly the gradient of the cost output
    
    arguments:
        parameters -- dictionary containing your parameters "W1","b1","W2","b2","W3","b3"
        grad -- output of backward_propagation_n
        x -- input datapoint,shape(input size,1)
        y -- true label
        epsilon -- tiny shift to the input to compute approximated gradient
        
    returns:
        difference -- difference between the approximated gradient
    """

    #set-up variables
    parameters_values, _ = dictionary_to_vector(parameters)
    grad = gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))

    #compute gradapprox
    for i in range(num_parameters):
        thetaplus = np.copy(parameters_values)
        thetaplus[i][0] = thetaplus[i][0] + epsilon
        J_plus[i], _ = forward_propagation_n(X, Y,
                                             vector_to_dictionary(thetaplus))

        thetaminus = np.copy(parameters_values)
        thetaminus[i][0] = thetaminus[i][0] - epsilon
        J_minus[i], _ = forward_propagation_n(X, Y,
                                              vector_to_dictionary(thetaminus))

        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)

    print('grad.shape = ', grad.shape)
    print('gradapprox.shape = ', gradapprox.shape)
    numerator = np.linalg.norm(grad - gradapprox)
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)
    difference = numerator / denominator

    if difference > 1e-7:
        print('there is a mistake in the backward propagation, difference = ' +
              str(difference))
    else:
        print('your backward propagation works perfectly fine! difference = ' +
              str(difference))

    return difference
コード例 #12
0
def gradients_check(X, Y, lambd=0, keep_prob=1, init_method='he'):
    layers_dims = [X.shape[0], 5, 3, 1]
    # inintial params
    if init_method == 'zeros':
        params = init_zeros(layers_dims)
    elif init_method == 'random':
        params = init_random(layers_dims)
    elif init_method == 'he':
        params = init_he(layers_dims)
    else:
        print('Error: unexcepted init_method!')

    # compute grads
    a3, cache = forward_propagate_with_reg(X, params, keep_prob=keep_prob)
    grads = backward_propagate_with_reg(X,
                                        Y,
                                        cache,
                                        lambd=lambd,
                                        keep_prob=keep_prob)
    grads_vector = gc_utils.gradients_to_vector(grads)

    theta, keys = gc_utils.dictionary_to_vector(params)  #转化成向量方便索引(n, 1)
    n = theta.shape[0]  #参数个数
    grads_approx_vector = np.zeros((n, 1))

    # compute grads_approx
    for i in range(n):
        theta_p = np.copy(theta)
        theta_p[i, 0] += 1e-7
        params_p = gc_utils.vector_to_dictionary(theta_p)
        theta_m = np.copy(theta)
        theta_m[i, 0] -= 1e-7
        params_m = gc_utils.vector_to_dictionary(theta_m)
        a3_, cache_ = forward_propagate_with_reg(X,
                                                 params_p,
                                                 keep_prob=keep_prob)
        J_p = compute_loss_with_reg(a3_, Y, params_p, lambd=lambd)
        a3_, cache_ = forward_propagate_with_reg(X,
                                                 params_m,
                                                 keep_prob=keep_prob)
        J_m = compute_loss_with_reg(a3_, Y, params_m, lambd=lambd)
        d_approx = (J_p - J_m) / (2 * 1e-7)
        grads_approx_vector[i, 0] = d_approx

    # compute difference
    numerator = np.linalg.norm(grads_vector - grads_approx_vector)
    denominator = np.linalg.norm(grads_vector) + np.linalg.norm(
        grads_approx_vector)
    diff = numerator / denominator
    return diff
コード例 #13
0
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7):
    """
    Checks if backward_propagation_n computes correctly the gradient of the cost output by forward_propagation_n
    
    Arguments:
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3":
    grad -- output of backward_propagation_n, contains gradients of the cost with respect to the parameters. 
    x -- input datapoint, of shape (input size, 1)
    y -- true "label"
    epsilon -- tiny shift to the input to compute approximated gradient with formula(1)
    
    Returns:
    difference -- difference (2) between the approximated gradient and the backward propagation gradient
    """

    # Set-up variables
    parameters_values, _ = dictionary_to_vector(parameters)
    grad = gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))

    # Compute gradapprox
    for i in range(num_parameters):

        # Compute J_plus[i]. Inputs: "parameters_values, epsilon". Output = "J_plus[i]".
        # "_" is used because the function you have to outputs two parameters but we only care about the first one
        ### START CODE HERE ### (approx. 3 lines)
        plus_copy = parameters_values.copy()
        plus_copy[i] = plus_copy[i] + epsilon
        J_plus[i], _ = forward_propagation_n(X, Y,
                                             vector_to_dictionary(plus_copy))
        # Compute J_minus[i]. Inputs: "parameters_values, epsilon". Output = "J_minus[i]".
        minus_copy = parameters_values.copy()
        minus_copy[i] = minus_copy[i] - epsilon
        J_minus[i], _ = forward_propagation_n(X, Y,
                                              vector_to_dictionary(minus_copy))
        # Compute gradapprox[i]
        ### START CODE HERE ### (approx. 1 line)
        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)

    # Compare gradapprox to backward propagation gradients by computing difference.
    num = np.linalg.norm(grad - gradapprox)
    nom = np.linalg.norm(grad) + np.linalg.norm(gradapprox)
    diff = num / nom

    return diff
コード例 #14
0
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7):
    """
    Checks if backward_propagation_n computes correctly the gradient of the cost output by forward_propagation_n

    Arguments:
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3":
    grad -- output of backward_propagation_n, contains gradients of the cost with respect to the parameters.
    x -- input datapoint, of shape (input size, 1)
    y -- true "label"
    epsilon -- tiny shift to the input to compute approximated gradient with formula(1)

    Returns:
    difference -- difference (2) between the approximated gradient and the backward propagation gradient
    """

    parameters_values, _ = dictionary_to_vector(parameters)
    grad = gradients_to_vector(gradients)

    num_parameters = parameters_values.shape[0]
    Jplus = np.zeros((num_parameters, 1))
    Jminus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))

    for i in range(num_parameters):
        thetaplus = np.copy(parameters_values)
        thetaplus[i][0] += epsilon
        Jplus[i], _ = forward_propagation_n(X, Y,
                                            vector_to_dictionary(thetaplus))

        thetaminus = np.copy(parameters_values)
        thetaminus[i][0] -= epsilon
        Jminus[i], _ = forward_propagation_n(X, Y,
                                             vector_to_dictionary(thetaminus))

        gradapprox[i] = (Jplus[i] - Jminus[i]) / (2. * epsilon)

    diff = np.linalg.norm(grad - gradapprox) / (np.linalg.norm(grad) +
                                                np.linalg.norm(gradapprox))

    if diff > 1e-7:
        print('There is a mistake in backword propagation diff = {}'.format(
            diff))
    else:
        print(
            'Your backward propagation works well with diff = {}'.format(diff))

    return diff
def gradient_check_n(parameters, grads, X, Y, epsilon=1e-7):
    '''
    Checks if backward_propagation_n computes correctly the gradient of the cost output by forward_propagation_n
    
    Arguments:
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3":
    grad -- output of backward_propagation_n, contains gradients of the cost with respect to the parameters. 
    x -- input datapoint, of shape (input size, 1)
    y -- true "label"
    epsilon -- tiny shift to the input to compute approximated gradient with formula(1)
    
    Returns:
    difference -- difference (2) between the approximated gradient and the backward propagation gradient
    '''
    parameters_values, _ = dictionary_to_vector(parameters)
    grads_value = gradients_to_vector(grads)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradsapprox = np.zeros((num_parameters, 1))

    for i in range(num_parameters):
        thetaplus = np.copy(parameters_values)
        thetaplus[i][0] = thetaplus[i][0] + epsilon
        J_plus[i], _ = forward_propagation_n(X, Y,
                                             vector_to_dictionary(thetaplus))

        thetaminus = np.copy(parameters_values)
        thetaminus[i][0] = thetaminus[i][0] - epsilon
        J_minus[i], _ = forward_propagation_n(X, Y,
                                              vector_to_dictionary(thetaminus))

        gradsapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)

    numeretor = np.linalg.norm(grads_value - gradsapprox)
    denominator = np.linalg.norm(grads_value) + np.linalg.norm(gradsapprox)
    difference = numeretor / denominator

    if difference > 1e-7:
        print(
            'There is a mistake in the backward propagation! difference = {}'.
            format(difference))
    else:
        print(
            'Your backward propagation worlks perfectly fine! difference = {}'.
            format(difference))
コード例 #16
0
def gradient_check_n(parameters,gradients,X,Y,epsilon = 1e-7):
    '''
    检查backward_propagation_n是否正确计算forward_propagation_n输出的成本梯度
    :param parameters: 包含参数'W1','b1','W2','b2','W3','b3'的python字典
    grad_output_propagation_n的输出包含与参数相关的成本梯度
    :param gradients: 
    :param X: 输入数据点,维度为(输入节点数量,1)
    :param Y: 标签
    :param epsilon计算输入的微小偏移以计算近似梯度: 
    :return: 
    近似梯度和后向传播梯度之间的差异
    '''
    #初始化参数
    parameters_values , keys = gc_utils.dictionary_to_vector(parameters)#keys用不到,parameters_values是一个n行1列的矩阵
    grad = gc_utils.gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters,1))
    J_minus = np.zeros((num_parameters,1))
    gradapprox = np.zeros(num_parameters,1)

    #计算gradapprox
    for i in range(num_parameters):
        #计算J_plus[i].输入'parameters_values,epsilon'.输出='J_plus[i]'
        thetaplus = np.copy(parameters_values)
        thetaplus[i][0]=thetaplus[i][0]+epsilon
        J_plus[i],cache = forward_propagation_n(X,Y,gc_utils.vector_to_dictionary(thetaplus))

        #计算J_minus[i].输入:'parameters_values,epsilon',输出='J_minus[i]'
        thetaminus = np.copy(parameters_values)
        thetaminus[i][0]=thetaminus[i][0] - epsilon
        J_minus[i],cache = forward_propagation_n(X,Y,gc_utils.vector_to_dictionary(thetaminus))

        #计算gradapprox[i]
        gradapprox[i] = (J_plus[i]-J_minus[i])/(2*epsilon)

    #通过计算差异比较gradapprox和后向传播梯度
    numerator = np.linalg.norm(grad-gradapprox)
    denominator = np.linalg.norm(grad)+np.linalg.norm(gradapprox)
    difference = numerator/denominator

    if difference<1e-7:
        print('梯度检查:梯度正常')
    else:
        print('梯度检查:梯度超出阈值')

    return difference
コード例 #17
0
def gradient_check_n(parameters, gradients, X, Y, epsilon = 1e-7):
    
    # Set-up variables
    parameters_values, _ = dictionary_to_vector(parameters)
    grad = gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))
    
    # Compute gradapprox
    for i in range(num_parameters):
        
        # Compute J_plus[i]. Inputs: "parameters_values, epsilon". Output = "J_plus[i]".
        # "_" is used because the function you have to outputs two parameters but we only care about the first one
        thetaplus = np.copy(parameters_values)          # Step 1
        thetaplus[i][0] += epsilon                      # Step 2
        J_plus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaplus))                                 # Step 3
        
        # Compute J_minus[i]. Inputs: "parameters_values, epsilon". Output = "J_minus[i]".
        thetaminus = np.copy(parameters_values)                                     # Step 1
        thetaminus[i][0] -= epsilon                                # Step 2
        J_minus[i], _ = forward_propagation_n(X, Y, vector_to_dictionary(thetaminus))                                  # Step 3
        
        # Compute gradapprox[i]
        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2. * epsilon)
    
    # Compare gradapprox to backward propagation gradients by computing difference.
    numerator = np.linalg.norm(grad - gradapprox)                               # Step 1'
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)             # Step 2'
    difference = numerator / denominator

    if difference > 1e-6:
        print ("There is a mistake in the backward propagation! difference = " + str(difference))
    else:
        print ("Your backward propagation works perfectly fine! difference = " + str(difference))
    
    return difference
コード例 #18
0
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7):
    # 初始化参数
    parameters_values, keys = gc_utils.dictionary_to_vector(
        parameters)  # 将parameters字典转换为array
    grad = gc_utils.gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))

    # 计算grad approx
    for i in range(num_parameters):  # 遍历所有的参数
        # 计算 J_plus[i]
        thetaplus = np.copy(parameters_values)
        thetaplus[i][0] = thetaplus[i][0] + epsilon
        J_plus[i], cache = forward_propagation_n(
            X, Y, gc_utils.vector_to_dictionary(thetaplus))  # cache用不到

        # 计算 J_minus[i]
        thetaminus = np.copy(parameters_values)
        thetaminus[i][0] = thetaminus[i][0] - epsilon
        J_minus[i], cache = forward_propagation_n(
            X, Y, gc_utils.vector_to_dictionary(thetaminus))  # cache用不到

        # 计算 grad apporx[i]
        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)

    # 通过计算差异比较 gradapprox 和后向传播梯度
    numerator = np.linalg.norm(grad - gradapprox)
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)
    difference = numerator / denominator

    if difference < 1e-7:
        print("Gradient Checking: 梯度正常!")
    else:
        print("Gradient Checking:梯度超出阈值!")
    return difference
コード例 #19
0
    return gradients
You obtained some results on the fraud detection test set but you are not 100% sure of your model. Nobody's perfect! Let's implement gradient checking to verify if your gradients are correct.

How does gradient checking work?.

As in 1) and 2), you want to compare "gradapprox" to the gradient computed by backpropagation. The formula is still:

∂J∂θ=limε→0J(θ+ε)−J(θ−ε)2ε(1)
(1)∂J∂θ=limε→0J(θ+ε)−J(θ−ε)2ε
However, θθ is not a scalar anymore. It is a dictionary called "parameters". We implemented a function "dictionary_to_vector()" for you. It converts the "parameters" dictionary into a vector called "values", obtained by reshaping all parameters (W1, b1, W2, b2, W3, b3) into vectors and concatenating them.

The inverse function is "vector_to_dictionary" which outputs back the "parameters" dictionary.



Figure 2 : dictionary_to_vector() and vector_to_dictionary()
You will need these functions in gradient_check_n()
We have also converted the "gradients" dictionary into a vector "grad" using gradients_to_vector(). You don't need to worry about that.

Exercise: Implement gradient_check_n().

Instructions: Here is pseudo-code that will help you implement the gradient check.

For each i in num_parameters:

To compute J_plus[i]:
Set θ+θ+ to np.copy(parameters_values)
Set θ+iθi+ to θ+i+εθi++ε
Calculate J+iJi+ using to forward_propagation_n(x, y, vector_to_dictionary(θ+θ+ )).
To compute J_minus[i]: do the same thing with θ−θ−
Compute gradapprox[i]=J+i−J−i2εgradapprox[i]=Ji+−Ji−2ε
コード例 #20
0
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7):
    """
    Checks if backward_propagation_n computes correctly the gradient of the cost output by forward_propagation_n
    
    Arguments:
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3":
    grad -- output of backward_propagation_n, contains gradients of the cost with respect to the parameters. 
    x -- input datapoint, of shape (input size, 1)
    y -- true "label"
    epsilon -- tiny shift to the input to compute approximated gradient with formula(1)
    
    Returns:
    difference -- difference (2) between the approximated gradient and the backward propagation gradient
    """

    # Set-up variables
    parameters_values, _ = dictionary_to_vector(parameters)
    grad = gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))

    # Compute gradapprox
    for i in range(num_parameters):

        # Compute J_plus[i]. Inputs: "parameters_values, epsilon". Output = "J_plus[i]".
        # "_" is used because the function you have to outputs two parameters but we only care about the first one
        thetaplus = np.copy(parameters_values)  # Step 1
        thetaplus[i][0] = thetaplus[i][0] + epsilon  # Step 2
        J_plus[i], _ = forward_propagation_n(
            X, Y, vector_to_dictionary(thetaplus))  # Step 3

        # Compute J_minus[i]. Inputs: "parameters_values, epsilon". Output = "J_minus[i]".
        thetaminus = np.copy(parameters_values)  # Step 1
        thetaminus[i][0] = thetaminus[i][0] - epsilon  # Step 2
        J_minus[i], _ = forward_propagation_n(
            X, Y, vector_to_dictionary(thetaminus))  # Step 3

        # Compute gradapprox[i]
        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)

    # Compare gradapprox to backward propagation gradients by computing difference.
    numerator = np.linalg.norm(grad - gradapprox)  # Step 1'
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)  # Step 2'
    difference = numerator / denominator  # Step 3'

    if difference > 2e-7:
        print("\033[93m" +
              "There is a mistake in the backward propagation! difference = " +
              str(difference) + "\033[0m")
    else:
        print("\033[92m" +
              "Your backward propagation works perfectly fine! difference = " +
              str(difference) + "\033[0m")

    return difference


# What you should remember from this notebook:

# Gradient checking verifies closeness between the gradients from backpropagation
# and the numerical approximation of the gradient (computed using forward propagation).
# Gradient checking is slow, so we don't run it in every iteration of training.
# You would usually run it only to make sure your code is correct,
# then turn it off and use backprop for the actual learning process.
コード例 #21
0
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7):
    """
    Checks if backward_propagation_n computes correctly the gradient of the cost output by forward_propagation_n
    
    Arguments:
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3":
    grad -- output of backward_propagation_n, contains gradients of the cost with respect to the parameters. 
    x -- input datapoint, of shape (input size, 1)
    y -- true "label"
    epsilon -- tiny shift to the input to compute approximated gradient with formula(1)
    
    Returns:
    difference -- difference (2) between the approximated gradient and the backward propagation gradient
    """
    # How to help implement gradient check.
    # Set-up variables
    parameters_values, _ = dictionary_to_vector(
        parameters
    )  # converts the "parameters" dictionary into a vector called "values"
    grad = gradients_to_vector(
        gradients)  # convert gradients dictionary into a vector, "grads"
    num_parameters = parameters_values.shape[
        0]  # get current shape of an array by assigning a tuple of array dimensions
    J_plus = np.zeros(
        (num_parameters,
         1))  # initialize J_plus with zeros and number of parameter objects
    J_minus = np.zeros(
        (num_parameters,
         1))  # initialize J_minus with zeros and number of parameter objects
    gradapprox = np.zeros((
        num_parameters,
        1))  # initialize gradapprox with zeros and number of parameter objects

    # Compute gradapprox
    for i in range(num_parameters):

        # Compute J_plus[i]. Inputs: "parameters_values, epsilon". Output = "J_plus[i]".
        # "_" is used because the function outputs two parameters but only care about the first one
        thetaplus = np.copy(parameters_values)  # Set theta to np.copy
        thetaplus[i][0] = thetaplus[i][0] + epsilon  # Set theta_plus
        J_plus[i], _ = forward_propagation_n(
            X, Y, vector_to_dictionary(
                thetaplus))  # Calculate J_plus using forward propagation

        # Compute J_minus[i]. Inputs: "parameters_values, epsilon". Output = "J_minus[i]".
        thetaminus = np.copy(parameters_values)  # Set theta to np.copy
        thetaminus[i][0] = thetaminus[i][0] - epsilon  # Set theta_minus
        J_minus[i], _ = forward_propagation_n(
            X, Y, vector_to_dictionary(
                thetaminus))  # Calculate J_minus using forward propagation

        # Compute gradapprox[i]
        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)

    # Compare gradapprox to backward propagation gradients by computing difference.
    numerator = np.linalg.norm(
        grad - gradapprox)  # compute the numerator using np.linag.norm(...)
    denominator = np.linalg.norm(grad) + np.linalg.norm(
        gradapprox
    )  # compute the denominator(need to call np.linag.norm(...) twice)
    difference = numerator / denominator  # divide both

    if difference > 2e-7:
        print("\033[93m" +
              "There is a mistake in the backward propagation! difference = " +
              str(difference) + "\033[0m")
    else:
        print("\033[92m" +
              "Your backward propagation works perfectly fine! difference = " +
              str(difference) + "\033[0m")

    return difference
コード例 #22
0
def gradient_check_n(parameters, gradients, X, Y, epsilon=1e-7):
    """
    梯度校验,检查后向传播是否正确计算前向传播输出的 cost 的梯度
    
    :param parameters: 参数字典,包含 "W1", "b1", "W2", "b2", "W3", "b3":
    :param grad: 后向传播的输出, 包含与参数相关的 cost 梯度
    :param x: 输入数据点, of shape (input size, 1)
    :param y: 正确的标签
    :param epsilon: 输入的微小偏移,用来计算近似梯度
    
    :return difference: 近似梯度和后向传播计算的梯度之间的差异
    """

    # Set-up variables
    parameters_values, _ = dictionary_to_vector(parameters)
    grad = gradients_to_vector(gradients)
    num_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_parameters, 1))
    J_minus = np.zeros((num_parameters, 1))
    gradapprox = np.zeros((num_parameters, 1))

    # 计算 gradapprox
    for i in range(num_parameters):

        # Compute J_plus[i]. Inputs: "parameters_values, epsilon". Output = "J_plus[i]".
        # "_" is used because the function you have to outputs two parameters but we only care about the first one
        ### START CODE HERE ### (approx. 3 lines)
        thetaplus = np.copy(parameters_values)  # Step 1
        thetaplus[i][0] = thetaplus[i][0] + epsilon  # Step 2
        J_plus[i], _ = forward_propagation_n(
            X, Y, vector_to_dictionary(thetaplus))  # Step 3
        ### END CODE HERE ###

        # Compute J_minus[i]. Inputs: "parameters_values, epsilon". Output = "J_minus[i]".
        ### START CODE HERE ### (approx. 3 lines)
        thetaminus = np.copy(parameters_values)  # Step 1
        thetaminus[i][0] = thetaminus[i][0] - epsilon  # Step 2
        J_minus[i], _ = forward_propagation_n(
            X, Y, vector_to_dictionary(thetaminus))  # Step 3
        ### END CODE HERE ###

        # Compute gradapprox[i]
        ### START CODE HERE ### (approx. 1 line)
        gradapprox[i] = (J_plus[i] - J_minus[i]) / (2 * epsilon)
        ### END CODE HERE ###

    # 通过计算差异来比较 gradapprox 梯度和后向传播计算的梯度
    ### START CODE HERE ### (approx. 1 line)
    numerator = np.linalg.norm(grad - gradapprox)  # Step 1'
    denominator = np.linalg.norm(grad) + np.linalg.norm(gradapprox)  # Step 2'
    difference = numerator / denominator  # Step 3'
    ### END CODE HERE ###

    if difference > 2e-7:
        print("\033[93m" +
              "There is a mistake in the backward propagation! difference = " +
              str(difference) + "\033[0m")
    else:
        print("\033[92m" +
              "Your backward propagation works perfectly fine! difference = " +
              str(difference) + "\033[0m")

    return difference