def compute_gradients(hyperparameters, parameters, cache, l, debug_mode=False):
    activation = hyperparameters["activations"][l]
    W = parameters["W"][l]
    Z = cache["Z"][l]
    A = cache["A"][l]
    A_prev = cache["A"][l - 1]
    dA = cache["dA"][l]
    # get the gradient of Z
    if activation.lower() == "sigmoid":
        cache["dZ"][l] = np.multiply(dA, af.sigmoid_backward(Z=Z, A=A, debug_mode=debug_mode))
    elif activation.lower() == "tanh":
        cache["dZ"][l] = np.multiply(dA, af.tanh_backward(Z=Z, A=A, debug_mode=debug_mode))
    elif activation.lower() == "relu":
        cache["dZ"][l] = np.multiply(dA, af.relu_backward(Z=Z, A=A, debug_mode=debug_mode))
    elif activation.lower() == "leaky tanh":
        cache["dZ"][l] = np.multiply(dA, af.leaky_relu_backward(Z=Z, A=A, debug_mode=debug_mode))
    else:
        if debug_mode:
            print("Error: unsupported activation function")
            print("\tStack trace: hidden_layer_propagation.nonlinear_backward()")
        return None
    dZ = cache["dZ"][l]
    # get the number of examples
    m = A_prev.shape[1]
    # get the gradient of W
    cache["dW"][l] = (1.0 / m) * np.dot(dZ, A_prev.T)
    # get the gradient of b
    cache["db"][l] = (1.0 / m) * np.sum(dZ, axis=1, keepdims=True)
    # get the gradient of A_prev
    cache["dA"][l - 1] = np.dot(W.T, dZ)
    return (parameters, cache)
예제 #2
0
def linear_activation_backward(dA, cache, activation, output_size):
    """
    Implement the backward propagation for the LINEAR->ACTIVATION layer.
    
    Arguments:
    dA -- post-activation gradient for current layer l 
    cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently
    activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"
    output_size -- bit width of output data
    
    Returns:
    dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
    dW -- Gradient of the cost with respect to W (current layer l), same shape as W
    db -- Gradient of the cost with respect to b (current layer l), same shape as b
    """
    linear_cache = cache[0:-1]
    activation_cache = cache[-1]

    if activation == "relu":
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    elif activation == "sigmoid":
        dZ = sigmoid_backward(dA, activation_cache, output_size)
        print(dZ.shape)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    return dA_prev, dW, db
예제 #3
0
def linear_activation_backward(dA, cache, activation):

    linear_cache, activation_cache = cache

    if activation == "relu":
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    elif activation == "sigmoid":
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    return dA_prev, dW, db
예제 #4
0
def linear_activation_backward(dA, cache, activation):
    """
    Arguments:
    dA -- post-activation gradient for current layer l
    cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently
    activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"

    Returns:
    dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
    dW -- Gradient of the cost with respect to W (current layer l), same shape as W
    db -- Gradient of the cost with respect to b (current layer l), same shape as b
    """
    linear_cache, activation_cache = cache

    if activation == 'sigmoid':
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    elif activation == 'relu':
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    return dA_prev, dW, db
예제 #5
0
def activation_backward(dA, cache, activation_way):
    """

    :param dA: Gradient of the activation output this layer with respect to cost J, shape(nl, #)
    :param activation_cache: -- a dictionary contains "A" and "Z"
    :return:
    dA_prev -- the gradient of activation previous layer respect to the cost J
    dW -- the gradient of W this layer respect to the cost J
    db -- the gradient of b this layer respect to the cost J
    """
    linear_cache = cache["linear_cache"]
    activation_cache = cache["activation_cache"]

    if activation_way == "relu":
        dZ = relu_backward(dA, activation_cache)
        dA_prev,dW,db = linear_backward(dZ, linear_cache)
    elif activation_way == "sigmoid":
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    elif activation_way == "tanh":
        dZ = tanh_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    return dA_prev, dW, db