def linear_activation_backward(dA, cache, activation):
    """
    Implement the backward propagation for the LINEAR->ACTIVATION layer.
    
    Arguments:
    dA -- post-activation gradient for current layer l 
    cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently
    activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"
    
    Returns:
    dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
    dW -- Gradient of the cost with respect to W (current layer l), same shape as W
    db -- Gradient of the cost with respect to b (current layer l), same shape as b
    """
    linear_cache, activation_cache = cache
    
    if activation == "relu":
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
        
    elif activation == "sigmoid":
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    
    return dA_prev, dW, db
Exemple #2
0
def L_model_backward(AL, Y, cache):
    """
    :param AL: output of forward propagation
    :param Y: true labels
    :param cache: list of caches (l-1) output for relu and cache l output for sigmoid
    :return:
    grads: gradients for dA, dW and db
    """
    grads = {}
    L = len(cache)
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)
    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))

    current_cache = cache[-1]
    grads["dA" + str(L - 1)], grads["dW" + str(L)], grads["db" + str(L)] = linear_backward(sigmoid_backward(dAL, current_cache[1]), current_cache[0])

    for layers in reversed(range(L - 1)):
        current_cache = cache[layers]
        dA_prev_temp, dW_temp, db_temp = linear_backward(relu_backward(grads["dA" + str(layers + 1)], current_cache[1]), current_cache[0])

        grads["dA" + str(layers)] = dA_prev_temp
        grads["dW" + str(layers+1)] = dW_temp
        grads["db" + str(layers+1)] = db_temp
        np.set_printoptions(suppress=True)
    return grads
Exemple #3
0
def linear_activation_backward(dA,cache,activation):
    '''
    :param dA: 
    :param cache: 
    :param activation: 
    :return: 
    '''
    linear_cache,activation_cache=cache
    if activation=="sigmod":
        dZ=dnn_utils_v2.sigmoid_backward(dA,activation_cache)
        dA_prev,dW,db=linear_backward(dZ,linear_cache)
    elif activation== "relu":
        dZ=dnn_utils_v2.relu_backward(dA,activation_cache)
        dA_prev,dW,db=linear_backward(dZ,linear_cache)

    # if activation == "relu":
    #     ### START CODE HERE ### (≈ 2 lines of code)
    #     dZ =dnn_utils_v2.relu_backward(dA, activation_cache)
    #     dA_prev, dW, db = linear_backward(dZ, linear_cache)
    #     ### END CODE HERE ###
    #
    # elif activation == "sigmoid":
    #     ### START CODE HERE ### (≈ 2 lines of code)
    #     dZ = dnn_utils_v2.sigmoid_backward(dA, activation_cache)
    #     dA_prev, dW, db = linear_backward(dZ, linear_cache)
    #     ### END CODE HERE ###
    return dA_prev,dW,db
Exemple #4
0
def linear_activation_backward(dA, cache, activation):
    '''
    Implement the backward propagation for LINEAR -> ACTIVATION layer.
    Arguments:
    dA -- post-activation gradient for current layer l
    cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently
    activation -- the activation to be used in this layer, stored as a text string: 'relu' or 'sigmoid'

    Returns:
    dA_prev -- Gradient fo the cost with respect to the activation (of the previous layer l-1), same as shape A_prev
    dW -- Gradient of the cost with respect to W (current layer l), same shape as W
    db -- Gradient of the cost with respect to b (current layer l), same shape as b
    '''
    linear_cache, activation_cache = cache

    if activation == 'relu':
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    elif activation == 'sigmoid':
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    
    #print('===================== shape of dZ:', dZ.shape)
    #print('===================== shape of dW:', dW.shape)
    #print('===================== shape of db:', db.shape)
    #print('===================== shape of dA_prev:', dA_prev.shape)
    
    return dA_prev, dW, db
def L_model_backward(AL, Y, caches):

    grads = {}
    L = len(caches)

    m = AL.shape[1]
    Y = Y.reshape(AL.shape)

    dAL = -(np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))

    current_cache = caches[L - 1]

    grads["dA" +
          str(L)], grads["dW" +
                         str(L)], grads["db" + str(L)] = linear_backward(
                             sigmoid_backward(dAL, current_cache[1]),
                             current_cache[0])

    # loop for L-1 layers.

    for l in reversed(range(L - 1)):

        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = linear_backward(
            relu_backward(dAL, current_cache[1]), current_cache[0])

        grads["dA" + str(l + 1)] = dA_prev_temp
        grads["dW" + str(l + l)] = dW_temp
        grads["db" + str(l + 1)] = db_temp

    return grads
Exemple #6
0
def linear_activation_backward(dA, cache, activation):
    """Implement the backward propagation for the LINEAR->ACTIVATION layer

    Arguments:
        dA {np.array} -- post-activation gradient for current layer l
        cache {tuple} -- tuple of values
        activation {str} -- activation name

    Returns:
        dA_prev {np.array} -- gradient of the cost with respect to the activation
        dW {np.array} -- gradient of the cost with respect to the weight
        db {np.array} -- gradient of the cost with respect to the bias
    """

    linear_cache, activation_cache = cache

    if activation == "relu":
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    if activation == "sigmoid":
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    return dA_prev, dW, db
Exemple #7
0
def linear_activation_backward(dA, cache, activation):
    """
    Implement the backward propagation for the LINEAR->ACTIVATION layer.
    
    Arguments:
    dA -- post-activation gradient for current layer l 
    cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently
    activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"
    
    Returns:
    dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
    dW -- Gradient of the cost with respect to W (current layer l), same shape as W
    db -- Gradient of the cost with respect to b (current layer l), same shape as b
    """
    linear_cache, activation_cache = cache
    
    if activation == "relu":
        ### START CODE HERE ### (≈ 2 lines of code)
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
        ### END CODE HERE ###
        
    elif activation == "sigmoid":
        ### START CODE HERE ### (≈ 2 lines of code)
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
        ### END CODE HERE ###
    
    return dA_prev, dW, db
Exemple #8
0
def linear_activation_backward(dA, cache, activation="relu"):
    """
	功能:
		根据本层的dA,Z值(注意,针对Z值,先求出A值,再进行反向求偏导),激活函数求解本层的dZ, dZ = dA * sigmoid'(Z)

	参数:
		dA - 当前层的dA值
		cache - (值为linear_cache,activation_cache)
		activation - 要在此层中使用的激活函数名,字符串类型,【"sigmoid" | "relu"】
	返回:
		dA_prev - 相对于激活(前一层l-1)的成本梯度值,与A_prev维度相同
		dW - 相对于W(当前层l)的成本梯度值,与W的维度相同
		db - 相对于b(当前层l)的成本梯度值,与b的维度相同
	"""
    linear_cache, activation_cache = cache
    if activation == "relu":
        dZ = relu_backward(
            dA, activation_cache
        )  #激活函数的反向传播 dZ = dA * sigmoid'(A) - 此例中的A是根据传入的Z值再做一遍sigmoid获得
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    elif activation == "sigmoid":
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    return dA_prev, dW, db
Exemple #9
0
def linear_activation_backward(dA, cache, activation):
    (linear_cache, activation_cache) = cache
    if activation == "sigmoid":
        dZ = sigmoid_backward(dA, activation_cache)
    else:
        dZ = relu_backward(dA, activation_cache)
    dA_prev, dw, db = linear_backward(dZ, linear_cache)
    return dA_prev, dw, db
def linear_activation_backward(dA, cache, activation):
    linear_cache, activation_cache = cache
    if activation == "relu":
        dZ = relu_backward(dA, activation_cache)
    elif activation == 'sigmoid':
        dZ = sigmoid_backward(dA, activation_cache)
    dA_prev, dW, db = linear_backward(dZ, linear_cache)
    return dA_prev, dW, db
Exemple #11
0
def L_model_backward(AL, Y, caches):
    """
    Implement the backward propagation for the [LINEAR->RELU] * (L-1) -> LINEAR -> SIGMOID group

    Arguments:
    AL -- probability vector, output of the forward propagation (L_model_forward())
    Y -- true "label" vector (containing 0 if non-cat, 1 if cat)
    caches -- list of caches containing:
                every cache of linear_activation_forward() with "relu" (it's caches[l], for l in range(L-1) i.e l = 0...L-2)
                the cache of linear_activation_forward() with "sigmoid" (it's caches[L-1])

    Returns:
    grads -- A dictionary with the gradients
             grads["dA" + str(l)] = ...
             grads["dW" + str(l)] = ...
             grads["db" + str(l)] = ...
    """
    grads = {}
    L = len(caches)  # the number of layers
    # print(L)
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)  # after this line, Y is the same shape as AL

    # Initializing the backpropagation
    # START CODE HERE # (1 line of code)
    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    # END CODE HERE ###

    # Lth layer (SIGMOID -> LINEAR) gradients.
    # Inputs: "AL, Y, caches". Outputs: "grads["dAL"], grads["dWL"], grads["dbL"]
    # START CODE HERE # (approx. 2 lines)
    current_cache = caches[-1]  # print(current_cache)
    grads["dA" + str(L)], grads["dW" + str(L)], grads["db" + str(L)] = \
        linear_activation_backward(dAL, current_cache, activation="sigmoid")
    # END CODE HERE ###

    for l in reversed(range(L - 1)):
        # lth layer: (RELU -> LINEAR) gradients.
        # Inputs: "grads["dA" + str(l + 2)], caches".
        # Outputs: "grads["dA" + str(l + 1)] , grads["dW" + str(l + 1)] , grads["db" + str(l + 1)]
        # START CODE HERE # (approx. 5 lines)
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = linear_backward(relu_backward(grads["dA" +
                                                                             str(l + 2)], current_cache[1]),
                                                         current_cache[0])
        # dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA" + str(L)], current_cache, activation = "relu")
        grads["dA" + str(l + 1)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp
        # print("step: " + str(l))
        # print("dA : " + str(dA_prev_temp))
        # print("dW : " + str(dW_temp))
        # print("db : " + str(db_temp))


    # END CODE HERE ###

    return grads
Exemple #12
0
    def back(self, i, dA, activation):

        if activation == 'relu':
            dZ = relu_backward(dA, self.Z[i])
        else:
            dZ = sigmoid_backward(dA, self.Z[i])
        dW = np.dot(dZ, self.A[i - 1].T) / self.A[i].shape[1]
        dB = np.sum(dZ, axis=1, keepdims=True) / self.A[i].shape[1]
        dA = np.dot(self.W[i].T, dZ)
        return dA, dW, dB
def linear_activation_backward(dA, cache, activation):
    linear_cache, activation_cache = cache

    if activation == "sigmoid":
        dZ = dnn_utils_v2.sigmoid_backward(dA, activation_cache)
    elif activation == "relu":
        dZ = dnn_utils_v2.relu_backward(dA, activation_cache)

    dA_prev, dW, db = linear_backward(dZ, linear_cache)

    return dA_prev, dW, db
Exemple #14
0
def linear_activation_backward(dA, cache, activation):
    linear_cache, activation_cache = cache

    if(activation == "relu"):
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    elif(activation == "sigmoid"):
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    return dA_prev, dW, db

    def L_model_backward(AL, Y, caches):
    grads = {}
    L = len(caches)
    #Reshaping Y into the shape of AL
    Y = Y.reshape(AL.shape)
    m = AL.shape[1]

    dAL = -(np.divide(Y, AL) - np.divide(1-Y, 1-AL))
    current_cache = caches[L - 1]

    grads["dA"+str(L)], grads["dW"+str(L)], grads["db"+str(L)] = linear_activation_backward(dAL, current_cache, activation = "sigmoid")

    for l in reversed(range(L-1)):
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA"+str(l+2)], current_cache, activation = "relu")
        grads["dA"+str(l + 1)] = dA_prev_temp
        grads["dW"+str(l + 1)] = dW_temp
        grads["db"+str(l + 1)] = db_temp

    return

def update_parameters(parameters, grads, learning_rate):
    L = len(parameters) // 2

    for l in range(L):
        parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate * grads["dW" + str(l+1)]
        parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate * grads["db" + str(l+1)]

    return parameters

def main():
    parameters, grads = update_parameters_test_case()
    parameters = update_parameters(parameters, grads, 0.1)

    print ("W1 = "+ str(parameters["W1"]))
    print ("b1 = "+ str(parameters["b1"]))
    print ("W2 = "+ str(parameters["W2"]))
    print ("b2 = "+ str(parameters["b2"]))
if __name__ == "__main__":
    main()
Exemple #15
0
def linear_activation_backward(dA , cache , activation):
    '''
        this move me from layer to the one before it
    '''
    linea_cache , activation_cache=cache
    if activation=="relu":
        dz=relu_backward(dA , activation_cache)
        da_prv , dw , db =linear_backward(dz , linea_cache)
        return da_prv , dw , db
    else:
        dz=sigmoid_backward(dA , activation_cache)
        da_prv , dw , db =linear_backward(dz , linea_cache)
        return da_prv , dw , db
Exemple #16
0
def linear_activation_backward(dA, cache, activation):
    """
    Implement the backward propagation for the LINEAR->ACTIVATION layer.
    
    Arguments:
    dA : np.ndarray 
         post-activation gradient for current layer l 
         this was calculated by running linear_activation_backward on layer l+1
    
    cache : tuple of linear_cache, activation_cache
            stored for computing the backward pass efficiently
            linear_cache : tuple
                           a python tuple containing A[l], W[l] and b[l]
                           stored during forward propigation for computing 
                           the backward pass efficiently
            activation_cache: np.ndarray
                              Z[l] used to calculate A[l]
                              (size of current layer, number of examples) 
    
    activation : string
                 the activation to be used in this layer, 
                 stored as a text string: "sigmoid" or "relu"    
    Returns:
    dA_prev : np.ndarray
              Gradient of the cost with respect to the activation 
              (of the previous layer l-1), same shape as A_prev.
              Note: the previous layer (l-1) is the next layer to be 
                    calculated since we are going backward.
    dW : np.ndarray
        Gradient of the cost with respect to W 
        (current layer l), same shape as W
    db : np.ndarray vector
         Gradient of the cost with respect to b 
         (current layer l), same shape as b
    """
    
    #define some useful variables
    linear_cache, activation_cache = cache
    
    # Calculate Gradients 
    if activation == "relu":
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
        
    elif activation == "sigmoid":
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    
    return dA_prev, dW, db
Exemple #17
0
def L_model_backward(AL, Y, caches):
    """
    Implement the backward propagation for the [LINEAR->RELU] * (L-1) -> LINEAR -> SIGMOID group
    
    Arguments:
    AL -- probability vector, output of the forward propagation (L_model_forward())
    Y -- true "label" vector (containing 0 if non-cat, 1 if cat)
    caches -- list of caches containing:
                every cache of linear_activation_forward() with "relu" (it's caches[l], for l in range(L-1) i.e l = 0...L-2)
                the cache of linear_activation_forward() with "sigmoid" (it's caches[L-1])
    
    Returns:
    grads -- A dictionary with the gradients
             grads["dA" + str(l)] = ... 
             grads["dW" + str(l)] = ...
             grads["db" + str(l)] = ... 
    """
    grads = {}
    L = len(caches)  # the number of layers
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)  # after this line, Y is the same shape as AL

    # Initializing the backpropagation
    dAL = -(np.divide(Y, AL) - np.divide(1 - Y, 1 - AL)
            )  # derivative of cost with respect to AL

    # Lth layer (SIGMOID -> LINEAR) gradients. Inputs: "dAL, current_cache". Outputs: "grads["dAL-1"], grads["dWL"], grads["dbL"]
    current_cache = caches[L - 1]
    grads["dA" +
          str(L - 1)], grads["dW" +
                             str(L)], grads["db" + str(L)] = linear_backward(
                                 sigmoid_backward(dAL, current_cache[1]),
                                 current_cache[0])

    # Loop from l=L-2 to l=0
    for l in reversed(range(L - 1)):
        # lth layer: (RELU -> LINEAR) gradients.
        # Inputs: "grads["dA" + str(l + 1)], current_cache". Outputs: "grads["dA" + str(l)] , grads["dW" + str(l + 1)] , grads["db" + str(l + 1)]

        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = linear_backward(
            relu_backward(grads["dA" + str(l + 1)], current_cache[l + 1]),
            current_cache[l])
        grads["dA" + str(l)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp

    return grads
Exemple #18
0
def linear_activation_backward(dA,cache,activation):
    """
    implement the backward propagation for the linear-activation layer
    :param dA:post-activation gradient for current layer l
    :param cache: tuple of vaules (linear_cache,activation_cache)
    :param activation: the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"
    :return:
    dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
    dW -- Gradient of the cost with respect to W (current layer l), same shape as W
    db -- Gradient of the cost with respect to b (current layer l), same shape as b
    """
    linear_cache,activation_cache=cache

    if activation=="relu":
        dZ=relu_backward(dA,activation_cache)
        dA_prev,dW,db=linear_backward(dZ,linear_cache)
    elif activation=="sigmoid":
        dZ=sigmoid_backward(dA,activation_cache)
        dA_prev,dW,db=linear_backward(dZ,linear_cache)

    return dA_prev,dW,db
Exemple #19
0
def linear_activation_backward(dA, cache, activation):
    """
    神经网络一层,即 LINEAR->ACTIVATION layer 的后向传播
    :param dA: 当前层激活值的梯度
    :param cache: 元组 (linear_cache, activation_cache)
    :param activation: 当前层使用的激活函数,string: "sigmoid" or "relu"

    :return dA_prev: 前一层激活值的梯度
    :return dW: 当前层的权重的梯度
    :return db: 当前层的偏置的梯度
    """
    linear_cache, activation_cache = cache  # 线性部分的缓存,激活部分的cache

    # 当前层的激活函数为 relu()
    if activation == "relu":
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    # 当前层的激活函数为 sigmoid()
    elif activation == "sigmoid":
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    return dA_prev, dW, db
Exemple #20
0
# 
### 6.2 - Linear-Activation backward

Next, you will create a function that merges the two helper functions: **`linear_backward`** and the backward step for the activation **`linear_activation_backward`**. 

To help you implement `linear_activation_backward`, we provided two backward functions:
- **`sigmoid_backward`**: Implements the backward propagation for SIGMOID unit. You can call it as follows:

```python
dZ = sigmoid_backward(dA, activation_cache)
```

- **`relu_backward`**: Implements the backward propagation for RELU unit. You can call it as follows:

```python
dZ = relu_backward(dA, activation_cache)
```

If $g(.)$ is the activation function, 
`sigmoid_backward` and `relu_backward` compute $$dZ^{[l]} = dA^{[l]} * g'(Z^{[l]}) \tag{11}$$.  

**Exercise**: Implement the backpropagation for the *LINEAR->ACTIVATION* layer.
# In[65]:

# GRADED FUNCTION: linear_activation_backward

def linear_activation_backward(dA, cache, activation):
    """
    Implement the backward propagation for the LINEAR->ACTIVATION layer.
    
    Arguments: