def linear_activation_backward(dA, cache, activation):
    """
    Implement the backward propagation for the LINEAR->ACTIVATION layer.
    
    Arguments:
    dA -- post-activation gradient for current layer l 
    cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently
    activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"
    
    Returns:
    dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
    dW -- Gradient of the cost with respect to W (current layer l), same shape as W
    db -- Gradient of the cost with respect to b (current layer l), same shape as b
    """
    linear_cache, activation_cache = cache
    
    if activation == "relu":
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
        
    elif activation == "sigmoid":
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    
    return dA_prev, dW, db
Beispiel #2
0
def linear_activation_backward(dA, cache, activation):
    """
    Implement the backward propagation for the LINEAR->ACTIVATION layer.
    
    Arguments:
    dA -- post-activation gradient for current layer l 
    cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently
    activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"
    
    Returns:
    dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
    dW -- Gradient of the cost with respect to W (current layer l), same shape as W
    db -- Gradient of the cost with respect to b (current layer l), same shape as b
    """
    linear_cache, activation_cache = cache

    if activation == "relu":
        ### START CODE HERE ### (≈ 2 lines of code)
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
        ### END CODE HERE ###

    elif activation == "sigmoid":
        ### START CODE HERE ### (≈ 2 lines of code)
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
        ### END CODE HERE ###

    return dA_prev, dW, db
Beispiel #3
0
def linear_activation_backward(dA, cache, activation):
    (linear_cache, activation_cache) = cache
    if activation == "sigmoid":
        dZ = sigmoid_backward(dA, activation_cache)
    else:
        dZ = relu_backward(dA, activation_cache)
    dA_prev, dw, db = linear_backward(dZ, linear_cache)
    return dA_prev, dw, db
def linear_activation_backward(dA, cache, activation):
    linear_cache, activation_cache = cache
    if activation == "relu":
        dZ = relu_backward(dA, activation_cache)
    elif activation == 'sigmoid':
        dZ = sigmoid_backward(dA, activation_cache)
    dA_prev, dW, db = linear_backward(dZ, linear_cache)
    return dA_prev, dW, db
Beispiel #5
0
def L_model_backward(AL, Y, caches):
    """
    Implement the backward propagation for the [LINEAR->RELU] * (L-1) -> LINEAR -> SIGMOID group
    
    Arguments:
    AL -- probability vector, output of the forward propagation (L_model_forward())
    Y -- true "label" vector (containing 0 if non-cat, 1 if cat)
    caches -- list of caches containing:
                every cache of linear_activation_forward() with "relu" (it's caches[l], for l in range(L-1) i.e l = 0...L-2)
                the cache of linear_activation_forward() with "sigmoid" (it's caches[L-1])
    
    Returns:
    grads -- A dictionary with the gradients
             grads["dA" + str(l)] = ... 
             grads["dW" + str(l)] = ...
             grads["db" + str(l)] = ... 
    """
    grads = {}
    L = len(caches)  # the number of layers
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)  # after this line, Y is the same shape as AL

    # Initializing the backpropagation
    dAL = -(np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))

    # Lth layer (SIGMOID -> LINEAR) gradients. Inputs: "AL, Y, caches". Outputs: "grads["dAL"], grads["dWL"], grads["dbL"]
    current_cache = caches[-1]
    grads["dA" +
          str(L)], grads["dW" +
                         str(L)], grads["db" + str(L)] = linear_backward(
                             sigmoid_backward(dAL, current_cache[1]),
                             current_cache[0])

    for l in reversed(range(L - 1)):
        # lth layer: (RELU -> LINEAR) gradients.
        # Inputs: "grads["dA" + str(l + 2)], caches". Outputs: "grads["dA" + str(l + 1)] , grads["dW" + str(l + 1)] , grads["db" + str(l + 1)]
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = linear_backward(
            sigmoid_backward(dAL, current_cache[1]), current_cache[0])
        grads["dA" + str(l + 1)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp

    return grads
Beispiel #6
0
    def back(self, i, dA, activation):

        if activation == 'relu':
            dZ = relu_backward(dA, self.Z[i])
        else:
            dZ = sigmoid_backward(dA, self.Z[i])
        dW = np.dot(dZ, self.A[i - 1].T) / self.A[i].shape[1]
        dB = np.sum(dZ, axis=1, keepdims=True) / self.A[i].shape[1]
        dA = np.dot(self.W[i].T, dZ)
        return dA, dW, dB
def linear_activation_backward(dA, cache, activation):
    linear_cache, activation_cache = cache

    if activation == "sigmoid":
        dZ = dnn_utils_v2.sigmoid_backward(dA, activation_cache)
    elif activation == "relu":
        dZ = dnn_utils_v2.relu_backward(dA, activation_cache)

    dA_prev, dW, db = linear_backward(dZ, linear_cache)

    return dA_prev, dW, db
Beispiel #8
0
def linear_activation_backward(dA, cache, activation):
    linear_cache, activation_cache = cache

    if(activation == "relu"):
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    elif(activation == "sigmoid"):
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    return dA_prev, dW, db

    def L_model_backward(AL, Y, caches):
    grads = {}
    L = len(caches)
    #Reshaping Y into the shape of AL
    Y = Y.reshape(AL.shape)
    m = AL.shape[1]

    dAL = -(np.divide(Y, AL) - np.divide(1-Y, 1-AL))
    current_cache = caches[L - 1]

    grads["dA"+str(L)], grads["dW"+str(L)], grads["db"+str(L)] = linear_activation_backward(dAL, current_cache, activation = "sigmoid")

    for l in reversed(range(L-1)):
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA"+str(l+2)], current_cache, activation = "relu")
        grads["dA"+str(l + 1)] = dA_prev_temp
        grads["dW"+str(l + 1)] = dW_temp
        grads["db"+str(l + 1)] = db_temp

    return

def update_parameters(parameters, grads, learning_rate):
    L = len(parameters) // 2

    for l in range(L):
        parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate * grads["dW" + str(l+1)]
        parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate * grads["db" + str(l+1)]

    return parameters

def main():
    parameters, grads = update_parameters_test_case()
    parameters = update_parameters(parameters, grads, 0.1)

    print ("W1 = "+ str(parameters["W1"]))
    print ("b1 = "+ str(parameters["b1"]))
    print ("W2 = "+ str(parameters["W2"]))
    print ("b2 = "+ str(parameters["b2"]))
if __name__ == "__main__":
    main()
Beispiel #9
0
def linear_activation_backward(dA , cache , activation):
    '''
        this move me from layer to the one before it
    '''
    linea_cache , activation_cache=cache
    if activation=="relu":
        dz=relu_backward(dA , activation_cache)
        da_prv , dw , db =linear_backward(dz , linea_cache)
        return da_prv , dw , db
    else:
        dz=sigmoid_backward(dA , activation_cache)
        da_prv , dw , db =linear_backward(dz , linea_cache)
        return da_prv , dw , db
Beispiel #10
0
def linear_activation_backward(dA, cache, activation):
    """
    Implement the backward propagation for the LINEAR->ACTIVATION layer.
    
    Arguments:
    dA : np.ndarray 
         post-activation gradient for current layer l 
         this was calculated by running linear_activation_backward on layer l+1
    
    cache : tuple of linear_cache, activation_cache
            stored for computing the backward pass efficiently
            linear_cache : tuple
                           a python tuple containing A[l], W[l] and b[l]
                           stored during forward propigation for computing 
                           the backward pass efficiently
            activation_cache: np.ndarray
                              Z[l] used to calculate A[l]
                              (size of current layer, number of examples) 
    
    activation : string
                 the activation to be used in this layer, 
                 stored as a text string: "sigmoid" or "relu"    
    Returns:
    dA_prev : np.ndarray
              Gradient of the cost with respect to the activation 
              (of the previous layer l-1), same shape as A_prev.
              Note: the previous layer (l-1) is the next layer to be 
                    calculated since we are going backward.
    dW : np.ndarray
        Gradient of the cost with respect to W 
        (current layer l), same shape as W
    db : np.ndarray vector
         Gradient of the cost with respect to b 
         (current layer l), same shape as b
    """
    
    #define some useful variables
    linear_cache, activation_cache = cache
    
    # Calculate Gradients 
    if activation == "relu":
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
        
    elif activation == "sigmoid":
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    
    return dA_prev, dW, db
Beispiel #11
0
def linear_activation_backward(dA,cache,activation):
    """
    implement the backward propagation for the linear-activation layer
    :param dA:post-activation gradient for current layer l
    :param cache: tuple of vaules (linear_cache,activation_cache)
    :param activation: the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"
    :return:
    dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
    dW -- Gradient of the cost with respect to W (current layer l), same shape as W
    db -- Gradient of the cost with respect to b (current layer l), same shape as b
    """
    linear_cache,activation_cache=cache

    if activation=="relu":
        dZ=relu_backward(dA,activation_cache)
        dA_prev,dW,db=linear_backward(dZ,linear_cache)
    elif activation=="sigmoid":
        dZ=sigmoid_backward(dA,activation_cache)
        dA_prev,dW,db=linear_backward(dZ,linear_cache)

    return dA_prev,dW,db
def linear_activation_backward(dA, cache, activation):
    """
    Implement the backward propagation for the LINEAR->ACTIVATION layer.
    
    Arguments:
    cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently
    activation : "sigmoid" or "relu"
    
    Returns:
    dA_prev, dW, db 
    """
    linear_cache, activation_cache = cache
    Z = activation_cache

    if activation == "sigmoid":
        dZ = sigmoid_backward(dA, Z)
    elif activation == "relu":
        dZ = dA * reluDerivative(Z)
        # dZ = relu_backward(dA, Z)

    dA_prev, dW, db = linear_backward(dZ, linear_cache)
    return dA_prev, dW, db
def linear_activation_backward(dA, cache, activation):
    '''
    Implement the backward propagation for LINEAR -> ACTIVATION layer.
    Arguments:
    dA -- post-activation gradient for current layer l
    cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently
    activation -- the activation to be used in this layer, stored as a text string: 'relu' or 'sigmoid'

    Returns:
    dA_prev -- Gradient fo the cost with respect to the activation (of the previous layer l-1), same as shape A_prev
    dW -- Gradient of the cost with respect to W (current layer l), same shape as W
    db -- Gradient of the cost with respect to b (current layer l), same shape as b
    '''
    linear_cache, activation_cache = cache

    if activation == 'relu':
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    elif activation == 'sigmoid':
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    return dA_prev, dW, db
Beispiel #14
0
def linear_activation_backward(dA, cache, activation):
    """
    神经网络一层,即 LINEAR->ACTIVATION layer 的后向传播
    :param dA: 当前层激活值的梯度
    :param cache: 元组 (linear_cache, activation_cache)
    :param activation: 当前层使用的激活函数,string: "sigmoid" or "relu"

    :return dA_prev: 前一层激活值的梯度
    :return dW: 当前层的权重的梯度
    :return db: 当前层的偏置的梯度
    """
    linear_cache, activation_cache = cache  # 线性部分的缓存,激活部分的cache

    # 当前层的激活函数为 relu()
    if activation == "relu":
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    # 当前层的激活函数为 sigmoid()
    elif activation == "sigmoid":
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    return dA_prev, dW, db
    Returns:
    dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
    dW -- Gradient of the cost with respect to W (current layer l), same shape as W
    db -- Gradient of the cost with respect to b (current layer l), same shape as b
    """
    linear_cache, activation_cache = cache
    
     if activation == "relu":
        ### START CODE HERE ### (≈ 2 lines of code)
        dZ = relu_backward(dA, cache[1])
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
        ### END CODE HERE ###
        
    elif activation == "sigmoid":
        ### START CODE HERE ### (≈ 2 lines of code)
        dZ = sigmoid_backward(dA, cache[1])
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
        ### END CODE HERE ###
    
    return dA_prev, dW, db


# In[ ]:

dAL, linear_activation_cache = linear_activation_backward_test_case()

dA_prev, dW, db = linear_activation_backward(dAL, linear_activation_cache, activation = "sigmoid")
print ("sigmoid:")
print ("dA_prev = "+ str(dA_prev))
print ("dW = " + str(dW))
print ("db = " + str(db) + "\n")
Beispiel #16
0
#         <td> **db** </td>
#         <td> [[ 0.50629448]] </td> 
#     </tr> 
#     
# </table>
# 
# 
### 6.2 - Linear-Activation backward

Next, you will create a function that merges the two helper functions: **`linear_backward`** and the backward step for the activation **`linear_activation_backward`**. 

To help you implement `linear_activation_backward`, we provided two backward functions:
- **`sigmoid_backward`**: Implements the backward propagation for SIGMOID unit. You can call it as follows:

```python
dZ = sigmoid_backward(dA, activation_cache)
```

- **`relu_backward`**: Implements the backward propagation for RELU unit. You can call it as follows:

```python
dZ = relu_backward(dA, activation_cache)
```

If $g(.)$ is the activation function, 
`sigmoid_backward` and `relu_backward` compute $$dZ^{[l]} = dA^{[l]} * g'(Z^{[l]}) \tag{11}$$.  

**Exercise**: Implement the backpropagation for the *LINEAR->ACTIVATION* layer.
# In[65]:

# GRADED FUNCTION: linear_activation_backward
Beispiel #17
0
def L_model_backward(AL, Y, cache):
    """
    :param AL: output of forward propagation
    :param Y: true labels
    :param cache: list of caches (l-1) output for relu and cache l output for sigmoid
    :return:
    grads: gradients for dA, dW and db
    """
    grads = {}
    L = len(cache)
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)
    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))

    current_cache = cache[-1]
    grads["dA" + str(L - 1)], grads["dW" + str(L)], grads["db" + str(L)] = linear_backward(sigmoid_backward(dAL, current_cache[1]), current_cache[0])

    for layers in reversed(range(L - 1)):
        current_cache = cache[layers]
        dA_prev_temp, dW_temp, db_temp = linear_backward(relu_backward(grads["dA" + str(layers + 1)], current_cache[1]), current_cache[0])

        grads["dA" + str(layers)] = dA_prev_temp
        grads["dW" + str(layers+1)] = dW_temp
        grads["db" + str(layers+1)] = db_temp
        np.set_printoptions(suppress=True)
    return grads