def linear_activation_backward(dA, cache, activation): """ Implement the backward propagation for the LINEAR->ACTIVATION layer. Arguments: dA -- post-activation gradient for current layer l cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu" Returns: dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev dW -- Gradient of the cost with respect to W (current layer l), same shape as W db -- Gradient of the cost with respect to b (current layer l), same shape as b """ linear_cache, activation_cache = cache if activation == "relu": dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) elif activation == "sigmoid": dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_activation_backward(dA, cache, activation): """ Implement the backward propagation for the LINEAR->ACTIVATION layer. Arguments: dA -- post-activation gradient for current layer l cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu" Returns: dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev dW -- Gradient of the cost with respect to W (current layer l), same shape as W db -- Gradient of the cost with respect to b (current layer l), same shape as b """ linear_cache, activation_cache = cache if activation == "relu": ### START CODE HERE ### (≈ 2 lines of code) dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) ### END CODE HERE ### elif activation == "sigmoid": ### START CODE HERE ### (≈ 2 lines of code) dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) ### END CODE HERE ### return dA_prev, dW, db
def linear_activation_backward(dA, cache, activation): (linear_cache, activation_cache) = cache if activation == "sigmoid": dZ = sigmoid_backward(dA, activation_cache) else: dZ = relu_backward(dA, activation_cache) dA_prev, dw, db = linear_backward(dZ, linear_cache) return dA_prev, dw, db
def linear_activation_backward(dA, cache, activation): linear_cache, activation_cache = cache if activation == "relu": dZ = relu_backward(dA, activation_cache) elif activation == 'sigmoid': dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def L_model_backward(AL, Y, caches): """ Implement the backward propagation for the [LINEAR->RELU] * (L-1) -> LINEAR -> SIGMOID group Arguments: AL -- probability vector, output of the forward propagation (L_model_forward()) Y -- true "label" vector (containing 0 if non-cat, 1 if cat) caches -- list of caches containing: every cache of linear_activation_forward() with "relu" (it's caches[l], for l in range(L-1) i.e l = 0...L-2) the cache of linear_activation_forward() with "sigmoid" (it's caches[L-1]) Returns: grads -- A dictionary with the gradients grads["dA" + str(l)] = ... grads["dW" + str(l)] = ... grads["db" + str(l)] = ... """ grads = {} L = len(caches) # the number of layers m = AL.shape[1] Y = Y.reshape(AL.shape) # after this line, Y is the same shape as AL # Initializing the backpropagation dAL = -(np.divide(Y, AL) - np.divide(1 - Y, 1 - AL)) # Lth layer (SIGMOID -> LINEAR) gradients. Inputs: "AL, Y, caches". Outputs: "grads["dAL"], grads["dWL"], grads["dbL"] current_cache = caches[-1] grads["dA" + str(L)], grads["dW" + str(L)], grads["db" + str(L)] = linear_backward( sigmoid_backward(dAL, current_cache[1]), current_cache[0]) for l in reversed(range(L - 1)): # lth layer: (RELU -> LINEAR) gradients. # Inputs: "grads["dA" + str(l + 2)], caches". Outputs: "grads["dA" + str(l + 1)] , grads["dW" + str(l + 1)] , grads["db" + str(l + 1)] current_cache = caches[l] dA_prev_temp, dW_temp, db_temp = linear_backward( sigmoid_backward(dAL, current_cache[1]), current_cache[0]) grads["dA" + str(l + 1)] = dA_prev_temp grads["dW" + str(l + 1)] = dW_temp grads["db" + str(l + 1)] = db_temp return grads
def back(self, i, dA, activation): if activation == 'relu': dZ = relu_backward(dA, self.Z[i]) else: dZ = sigmoid_backward(dA, self.Z[i]) dW = np.dot(dZ, self.A[i - 1].T) / self.A[i].shape[1] dB = np.sum(dZ, axis=1, keepdims=True) / self.A[i].shape[1] dA = np.dot(self.W[i].T, dZ) return dA, dW, dB
def linear_activation_backward(dA, cache, activation): linear_cache, activation_cache = cache if activation == "sigmoid": dZ = dnn_utils_v2.sigmoid_backward(dA, activation_cache) elif activation == "relu": dZ = dnn_utils_v2.relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_activation_backward(dA, cache, activation): linear_cache, activation_cache = cache if(activation == "relu"): dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) elif(activation == "sigmoid"): dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db def L_model_backward(AL, Y, caches): grads = {} L = len(caches) #Reshaping Y into the shape of AL Y = Y.reshape(AL.shape) m = AL.shape[1] dAL = -(np.divide(Y, AL) - np.divide(1-Y, 1-AL)) current_cache = caches[L - 1] grads["dA"+str(L)], grads["dW"+str(L)], grads["db"+str(L)] = linear_activation_backward(dAL, current_cache, activation = "sigmoid") for l in reversed(range(L-1)): current_cache = caches[l] dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA"+str(l+2)], current_cache, activation = "relu") grads["dA"+str(l + 1)] = dA_prev_temp grads["dW"+str(l + 1)] = dW_temp grads["db"+str(l + 1)] = db_temp return def update_parameters(parameters, grads, learning_rate): L = len(parameters) // 2 for l in range(L): parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate * grads["dW" + str(l+1)] parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate * grads["db" + str(l+1)] return parameters def main(): parameters, grads = update_parameters_test_case() parameters = update_parameters(parameters, grads, 0.1) print ("W1 = "+ str(parameters["W1"])) print ("b1 = "+ str(parameters["b1"])) print ("W2 = "+ str(parameters["W2"])) print ("b2 = "+ str(parameters["b2"])) if __name__ == "__main__": main()
def linear_activation_backward(dA , cache , activation): ''' this move me from layer to the one before it ''' linea_cache , activation_cache=cache if activation=="relu": dz=relu_backward(dA , activation_cache) da_prv , dw , db =linear_backward(dz , linea_cache) return da_prv , dw , db else: dz=sigmoid_backward(dA , activation_cache) da_prv , dw , db =linear_backward(dz , linea_cache) return da_prv , dw , db
def linear_activation_backward(dA, cache, activation): """ Implement the backward propagation for the LINEAR->ACTIVATION layer. Arguments: dA : np.ndarray post-activation gradient for current layer l this was calculated by running linear_activation_backward on layer l+1 cache : tuple of linear_cache, activation_cache stored for computing the backward pass efficiently linear_cache : tuple a python tuple containing A[l], W[l] and b[l] stored during forward propigation for computing the backward pass efficiently activation_cache: np.ndarray Z[l] used to calculate A[l] (size of current layer, number of examples) activation : string the activation to be used in this layer, stored as a text string: "sigmoid" or "relu" Returns: dA_prev : np.ndarray Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev. Note: the previous layer (l-1) is the next layer to be calculated since we are going backward. dW : np.ndarray Gradient of the cost with respect to W (current layer l), same shape as W db : np.ndarray vector Gradient of the cost with respect to b (current layer l), same shape as b """ #define some useful variables linear_cache, activation_cache = cache # Calculate Gradients if activation == "relu": dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) elif activation == "sigmoid": dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_activation_backward(dA,cache,activation): """ implement the backward propagation for the linear-activation layer :param dA:post-activation gradient for current layer l :param cache: tuple of vaules (linear_cache,activation_cache) :param activation: the activation to be used in this layer, stored as a text string: "sigmoid" or "relu" :return: dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev dW -- Gradient of the cost with respect to W (current layer l), same shape as W db -- Gradient of the cost with respect to b (current layer l), same shape as b """ linear_cache,activation_cache=cache if activation=="relu": dZ=relu_backward(dA,activation_cache) dA_prev,dW,db=linear_backward(dZ,linear_cache) elif activation=="sigmoid": dZ=sigmoid_backward(dA,activation_cache) dA_prev,dW,db=linear_backward(dZ,linear_cache) return dA_prev,dW,db
def linear_activation_backward(dA, cache, activation): """ Implement the backward propagation for the LINEAR->ACTIVATION layer. Arguments: cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently activation : "sigmoid" or "relu" Returns: dA_prev, dW, db """ linear_cache, activation_cache = cache Z = activation_cache if activation == "sigmoid": dZ = sigmoid_backward(dA, Z) elif activation == "relu": dZ = dA * reluDerivative(Z) # dZ = relu_backward(dA, Z) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_activation_backward(dA, cache, activation): ''' Implement the backward propagation for LINEAR -> ACTIVATION layer. Arguments: dA -- post-activation gradient for current layer l cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently activation -- the activation to be used in this layer, stored as a text string: 'relu' or 'sigmoid' Returns: dA_prev -- Gradient fo the cost with respect to the activation (of the previous layer l-1), same as shape A_prev dW -- Gradient of the cost with respect to W (current layer l), same shape as W db -- Gradient of the cost with respect to b (current layer l), same shape as b ''' linear_cache, activation_cache = cache if activation == 'relu': dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) elif activation == 'sigmoid': dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_activation_backward(dA, cache, activation): """ 神经网络一层,即 LINEAR->ACTIVATION layer 的后向传播 :param dA: 当前层激活值的梯度 :param cache: 元组 (linear_cache, activation_cache) :param activation: 当前层使用的激活函数,string: "sigmoid" or "relu" :return dA_prev: 前一层激活值的梯度 :return dW: 当前层的权重的梯度 :return db: 当前层的偏置的梯度 """ linear_cache, activation_cache = cache # 线性部分的缓存,激活部分的cache # 当前层的激活函数为 relu() if activation == "relu": dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) # 当前层的激活函数为 sigmoid() elif activation == "sigmoid": dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
Returns: dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev dW -- Gradient of the cost with respect to W (current layer l), same shape as W db -- Gradient of the cost with respect to b (current layer l), same shape as b """ linear_cache, activation_cache = cache if activation == "relu": ### START CODE HERE ### (≈ 2 lines of code) dZ = relu_backward(dA, cache[1]) dA_prev, dW, db = linear_backward(dZ, linear_cache) ### END CODE HERE ### elif activation == "sigmoid": ### START CODE HERE ### (≈ 2 lines of code) dZ = sigmoid_backward(dA, cache[1]) dA_prev, dW, db = linear_backward(dZ, linear_cache) ### END CODE HERE ### return dA_prev, dW, db # In[ ]: dAL, linear_activation_cache = linear_activation_backward_test_case() dA_prev, dW, db = linear_activation_backward(dAL, linear_activation_cache, activation = "sigmoid") print ("sigmoid:") print ("dA_prev = "+ str(dA_prev)) print ("dW = " + str(dW)) print ("db = " + str(db) + "\n")
# <td> **db** </td> # <td> [[ 0.50629448]] </td> # </tr> # # </table> # # ### 6.2 - Linear-Activation backward Next, you will create a function that merges the two helper functions: **`linear_backward`** and the backward step for the activation **`linear_activation_backward`**. To help you implement `linear_activation_backward`, we provided two backward functions: - **`sigmoid_backward`**: Implements the backward propagation for SIGMOID unit. You can call it as follows: ```python dZ = sigmoid_backward(dA, activation_cache) ``` - **`relu_backward`**: Implements the backward propagation for RELU unit. You can call it as follows: ```python dZ = relu_backward(dA, activation_cache) ``` If $g(.)$ is the activation function, `sigmoid_backward` and `relu_backward` compute $$dZ^{[l]} = dA^{[l]} * g'(Z^{[l]}) \tag{11}$$. **Exercise**: Implement the backpropagation for the *LINEAR->ACTIVATION* layer. # In[65]: # GRADED FUNCTION: linear_activation_backward
def L_model_backward(AL, Y, cache): """ :param AL: output of forward propagation :param Y: true labels :param cache: list of caches (l-1) output for relu and cache l output for sigmoid :return: grads: gradients for dA, dW and db """ grads = {} L = len(cache) m = AL.shape[1] Y = Y.reshape(AL.shape) dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL)) current_cache = cache[-1] grads["dA" + str(L - 1)], grads["dW" + str(L)], grads["db" + str(L)] = linear_backward(sigmoid_backward(dAL, current_cache[1]), current_cache[0]) for layers in reversed(range(L - 1)): current_cache = cache[layers] dA_prev_temp, dW_temp, db_temp = linear_backward(relu_backward(grads["dA" + str(layers + 1)], current_cache[1]), current_cache[0]) grads["dA" + str(layers)] = dA_prev_temp grads["dW" + str(layers+1)] = dW_temp grads["db" + str(layers+1)] = db_temp np.set_printoptions(suppress=True) return grads