def linear_activation_backward(dA, cache, activation): """ Implement the backward propagation for the LINEAR->ACTIVATION layer. Arguments: dA -- post-activation gradient for current layer l cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu" Returns: dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev dW -- Gradient of the cost with respect to W (current layer l), same shape as W db -- Gradient of the cost with respect to b (current layer l), same shape as b """ linear_cache, activation_cache = cache if activation == "relu": dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) elif activation == "sigmoid": dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def L_model_backward(AL, Y, cache): """ :param AL: output of forward propagation :param Y: true labels :param cache: list of caches (l-1) output for relu and cache l output for sigmoid :return: grads: gradients for dA, dW and db """ grads = {} L = len(cache) m = AL.shape[1] Y = Y.reshape(AL.shape) dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL)) current_cache = cache[-1] grads["dA" + str(L - 1)], grads["dW" + str(L)], grads["db" + str(L)] = linear_backward(sigmoid_backward(dAL, current_cache[1]), current_cache[0]) for layers in reversed(range(L - 1)): current_cache = cache[layers] dA_prev_temp, dW_temp, db_temp = linear_backward(relu_backward(grads["dA" + str(layers + 1)], current_cache[1]), current_cache[0]) grads["dA" + str(layers)] = dA_prev_temp grads["dW" + str(layers+1)] = dW_temp grads["db" + str(layers+1)] = db_temp np.set_printoptions(suppress=True) return grads
def linear_activation_backward(dA,cache,activation): ''' :param dA: :param cache: :param activation: :return: ''' linear_cache,activation_cache=cache if activation=="sigmod": dZ=dnn_utils_v2.sigmoid_backward(dA,activation_cache) dA_prev,dW,db=linear_backward(dZ,linear_cache) elif activation== "relu": dZ=dnn_utils_v2.relu_backward(dA,activation_cache) dA_prev,dW,db=linear_backward(dZ,linear_cache) # if activation == "relu": # ### START CODE HERE ### (≈ 2 lines of code) # dZ =dnn_utils_v2.relu_backward(dA, activation_cache) # dA_prev, dW, db = linear_backward(dZ, linear_cache) # ### END CODE HERE ### # # elif activation == "sigmoid": # ### START CODE HERE ### (≈ 2 lines of code) # dZ = dnn_utils_v2.sigmoid_backward(dA, activation_cache) # dA_prev, dW, db = linear_backward(dZ, linear_cache) # ### END CODE HERE ### return dA_prev,dW,db
def linear_activation_backward(dA, cache, activation): ''' Implement the backward propagation for LINEAR -> ACTIVATION layer. Arguments: dA -- post-activation gradient for current layer l cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently activation -- the activation to be used in this layer, stored as a text string: 'relu' or 'sigmoid' Returns: dA_prev -- Gradient fo the cost with respect to the activation (of the previous layer l-1), same as shape A_prev dW -- Gradient of the cost with respect to W (current layer l), same shape as W db -- Gradient of the cost with respect to b (current layer l), same shape as b ''' linear_cache, activation_cache = cache if activation == 'relu': dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) elif activation == 'sigmoid': dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) #print('===================== shape of dZ:', dZ.shape) #print('===================== shape of dW:', dW.shape) #print('===================== shape of db:', db.shape) #print('===================== shape of dA_prev:', dA_prev.shape) return dA_prev, dW, db
def L_model_backward(AL, Y, caches): grads = {} L = len(caches) m = AL.shape[1] Y = Y.reshape(AL.shape) dAL = -(np.divide(Y, AL) - np.divide(1 - Y, 1 - AL)) current_cache = caches[L - 1] grads["dA" + str(L)], grads["dW" + str(L)], grads["db" + str(L)] = linear_backward( sigmoid_backward(dAL, current_cache[1]), current_cache[0]) # loop for L-1 layers. for l in reversed(range(L - 1)): current_cache = caches[l] dA_prev_temp, dW_temp, db_temp = linear_backward( relu_backward(dAL, current_cache[1]), current_cache[0]) grads["dA" + str(l + 1)] = dA_prev_temp grads["dW" + str(l + l)] = dW_temp grads["db" + str(l + 1)] = db_temp return grads
def linear_activation_backward(dA, cache, activation): """Implement the backward propagation for the LINEAR->ACTIVATION layer Arguments: dA {np.array} -- post-activation gradient for current layer l cache {tuple} -- tuple of values activation {str} -- activation name Returns: dA_prev {np.array} -- gradient of the cost with respect to the activation dW {np.array} -- gradient of the cost with respect to the weight db {np.array} -- gradient of the cost with respect to the bias """ linear_cache, activation_cache = cache if activation == "relu": dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) if activation == "sigmoid": dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_activation_backward(dA, cache, activation): """ Implement the backward propagation for the LINEAR->ACTIVATION layer. Arguments: dA -- post-activation gradient for current layer l cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu" Returns: dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev dW -- Gradient of the cost with respect to W (current layer l), same shape as W db -- Gradient of the cost with respect to b (current layer l), same shape as b """ linear_cache, activation_cache = cache if activation == "relu": ### START CODE HERE ### (≈ 2 lines of code) dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) ### END CODE HERE ### elif activation == "sigmoid": ### START CODE HERE ### (≈ 2 lines of code) dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) ### END CODE HERE ### return dA_prev, dW, db
def linear_activation_backward(dA, cache, activation="relu"): """ 功能: 根据本层的dA,Z值(注意,针对Z值,先求出A值,再进行反向求偏导),激活函数求解本层的dZ, dZ = dA * sigmoid'(Z) 参数: dA - 当前层的dA值 cache - (值为linear_cache,activation_cache) activation - 要在此层中使用的激活函数名,字符串类型,【"sigmoid" | "relu"】 返回: dA_prev - 相对于激活(前一层l-1)的成本梯度值,与A_prev维度相同 dW - 相对于W(当前层l)的成本梯度值,与W的维度相同 db - 相对于b(当前层l)的成本梯度值,与b的维度相同 """ linear_cache, activation_cache = cache if activation == "relu": dZ = relu_backward( dA, activation_cache ) #激活函数的反向传播 dZ = dA * sigmoid'(A) - 此例中的A是根据传入的Z值再做一遍sigmoid获得 dA_prev, dW, db = linear_backward(dZ, linear_cache) elif activation == "sigmoid": dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_activation_backward(dA, cache, activation): (linear_cache, activation_cache) = cache if activation == "sigmoid": dZ = sigmoid_backward(dA, activation_cache) else: dZ = relu_backward(dA, activation_cache) dA_prev, dw, db = linear_backward(dZ, linear_cache) return dA_prev, dw, db
def linear_activation_backward(dA, cache, activation): linear_cache, activation_cache = cache if activation == "relu": dZ = relu_backward(dA, activation_cache) elif activation == 'sigmoid': dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def L_model_backward(AL, Y, caches): """ Implement the backward propagation for the [LINEAR->RELU] * (L-1) -> LINEAR -> SIGMOID group Arguments: AL -- probability vector, output of the forward propagation (L_model_forward()) Y -- true "label" vector (containing 0 if non-cat, 1 if cat) caches -- list of caches containing: every cache of linear_activation_forward() with "relu" (it's caches[l], for l in range(L-1) i.e l = 0...L-2) the cache of linear_activation_forward() with "sigmoid" (it's caches[L-1]) Returns: grads -- A dictionary with the gradients grads["dA" + str(l)] = ... grads["dW" + str(l)] = ... grads["db" + str(l)] = ... """ grads = {} L = len(caches) # the number of layers # print(L) m = AL.shape[1] Y = Y.reshape(AL.shape) # after this line, Y is the same shape as AL # Initializing the backpropagation # START CODE HERE # (1 line of code) dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL)) # END CODE HERE ### # Lth layer (SIGMOID -> LINEAR) gradients. # Inputs: "AL, Y, caches". Outputs: "grads["dAL"], grads["dWL"], grads["dbL"] # START CODE HERE # (approx. 2 lines) current_cache = caches[-1] # print(current_cache) grads["dA" + str(L)], grads["dW" + str(L)], grads["db" + str(L)] = \ linear_activation_backward(dAL, current_cache, activation="sigmoid") # END CODE HERE ### for l in reversed(range(L - 1)): # lth layer: (RELU -> LINEAR) gradients. # Inputs: "grads["dA" + str(l + 2)], caches". # Outputs: "grads["dA" + str(l + 1)] , grads["dW" + str(l + 1)] , grads["db" + str(l + 1)] # START CODE HERE # (approx. 5 lines) current_cache = caches[l] dA_prev_temp, dW_temp, db_temp = linear_backward(relu_backward(grads["dA" + str(l + 2)], current_cache[1]), current_cache[0]) # dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA" + str(L)], current_cache, activation = "relu") grads["dA" + str(l + 1)] = dA_prev_temp grads["dW" + str(l + 1)] = dW_temp grads["db" + str(l + 1)] = db_temp # print("step: " + str(l)) # print("dA : " + str(dA_prev_temp)) # print("dW : " + str(dW_temp)) # print("db : " + str(db_temp)) # END CODE HERE ### return grads
def back(self, i, dA, activation): if activation == 'relu': dZ = relu_backward(dA, self.Z[i]) else: dZ = sigmoid_backward(dA, self.Z[i]) dW = np.dot(dZ, self.A[i - 1].T) / self.A[i].shape[1] dB = np.sum(dZ, axis=1, keepdims=True) / self.A[i].shape[1] dA = np.dot(self.W[i].T, dZ) return dA, dW, dB
def linear_activation_backward(dA, cache, activation): linear_cache, activation_cache = cache if activation == "sigmoid": dZ = dnn_utils_v2.sigmoid_backward(dA, activation_cache) elif activation == "relu": dZ = dnn_utils_v2.relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_activation_backward(dA, cache, activation): linear_cache, activation_cache = cache if(activation == "relu"): dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) elif(activation == "sigmoid"): dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db def L_model_backward(AL, Y, caches): grads = {} L = len(caches) #Reshaping Y into the shape of AL Y = Y.reshape(AL.shape) m = AL.shape[1] dAL = -(np.divide(Y, AL) - np.divide(1-Y, 1-AL)) current_cache = caches[L - 1] grads["dA"+str(L)], grads["dW"+str(L)], grads["db"+str(L)] = linear_activation_backward(dAL, current_cache, activation = "sigmoid") for l in reversed(range(L-1)): current_cache = caches[l] dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA"+str(l+2)], current_cache, activation = "relu") grads["dA"+str(l + 1)] = dA_prev_temp grads["dW"+str(l + 1)] = dW_temp grads["db"+str(l + 1)] = db_temp return def update_parameters(parameters, grads, learning_rate): L = len(parameters) // 2 for l in range(L): parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate * grads["dW" + str(l+1)] parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate * grads["db" + str(l+1)] return parameters def main(): parameters, grads = update_parameters_test_case() parameters = update_parameters(parameters, grads, 0.1) print ("W1 = "+ str(parameters["W1"])) print ("b1 = "+ str(parameters["b1"])) print ("W2 = "+ str(parameters["W2"])) print ("b2 = "+ str(parameters["b2"])) if __name__ == "__main__": main()
def linear_activation_backward(dA , cache , activation): ''' this move me from layer to the one before it ''' linea_cache , activation_cache=cache if activation=="relu": dz=relu_backward(dA , activation_cache) da_prv , dw , db =linear_backward(dz , linea_cache) return da_prv , dw , db else: dz=sigmoid_backward(dA , activation_cache) da_prv , dw , db =linear_backward(dz , linea_cache) return da_prv , dw , db
def linear_activation_backward(dA, cache, activation): """ Implement the backward propagation for the LINEAR->ACTIVATION layer. Arguments: dA : np.ndarray post-activation gradient for current layer l this was calculated by running linear_activation_backward on layer l+1 cache : tuple of linear_cache, activation_cache stored for computing the backward pass efficiently linear_cache : tuple a python tuple containing A[l], W[l] and b[l] stored during forward propigation for computing the backward pass efficiently activation_cache: np.ndarray Z[l] used to calculate A[l] (size of current layer, number of examples) activation : string the activation to be used in this layer, stored as a text string: "sigmoid" or "relu" Returns: dA_prev : np.ndarray Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev. Note: the previous layer (l-1) is the next layer to be calculated since we are going backward. dW : np.ndarray Gradient of the cost with respect to W (current layer l), same shape as W db : np.ndarray vector Gradient of the cost with respect to b (current layer l), same shape as b """ #define some useful variables linear_cache, activation_cache = cache # Calculate Gradients if activation == "relu": dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) elif activation == "sigmoid": dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def L_model_backward(AL, Y, caches): """ Implement the backward propagation for the [LINEAR->RELU] * (L-1) -> LINEAR -> SIGMOID group Arguments: AL -- probability vector, output of the forward propagation (L_model_forward()) Y -- true "label" vector (containing 0 if non-cat, 1 if cat) caches -- list of caches containing: every cache of linear_activation_forward() with "relu" (it's caches[l], for l in range(L-1) i.e l = 0...L-2) the cache of linear_activation_forward() with "sigmoid" (it's caches[L-1]) Returns: grads -- A dictionary with the gradients grads["dA" + str(l)] = ... grads["dW" + str(l)] = ... grads["db" + str(l)] = ... """ grads = {} L = len(caches) # the number of layers m = AL.shape[1] Y = Y.reshape(AL.shape) # after this line, Y is the same shape as AL # Initializing the backpropagation dAL = -(np.divide(Y, AL) - np.divide(1 - Y, 1 - AL) ) # derivative of cost with respect to AL # Lth layer (SIGMOID -> LINEAR) gradients. Inputs: "dAL, current_cache". Outputs: "grads["dAL-1"], grads["dWL"], grads["dbL"] current_cache = caches[L - 1] grads["dA" + str(L - 1)], grads["dW" + str(L)], grads["db" + str(L)] = linear_backward( sigmoid_backward(dAL, current_cache[1]), current_cache[0]) # Loop from l=L-2 to l=0 for l in reversed(range(L - 1)): # lth layer: (RELU -> LINEAR) gradients. # Inputs: "grads["dA" + str(l + 1)], current_cache". Outputs: "grads["dA" + str(l)] , grads["dW" + str(l + 1)] , grads["db" + str(l + 1)] current_cache = caches[l] dA_prev_temp, dW_temp, db_temp = linear_backward( relu_backward(grads["dA" + str(l + 1)], current_cache[l + 1]), current_cache[l]) grads["dA" + str(l)] = dA_prev_temp grads["dW" + str(l + 1)] = dW_temp grads["db" + str(l + 1)] = db_temp return grads
def linear_activation_backward(dA,cache,activation): """ implement the backward propagation for the linear-activation layer :param dA:post-activation gradient for current layer l :param cache: tuple of vaules (linear_cache,activation_cache) :param activation: the activation to be used in this layer, stored as a text string: "sigmoid" or "relu" :return: dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev dW -- Gradient of the cost with respect to W (current layer l), same shape as W db -- Gradient of the cost with respect to b (current layer l), same shape as b """ linear_cache,activation_cache=cache if activation=="relu": dZ=relu_backward(dA,activation_cache) dA_prev,dW,db=linear_backward(dZ,linear_cache) elif activation=="sigmoid": dZ=sigmoid_backward(dA,activation_cache) dA_prev,dW,db=linear_backward(dZ,linear_cache) return dA_prev,dW,db
def linear_activation_backward(dA, cache, activation): """ 神经网络一层,即 LINEAR->ACTIVATION layer 的后向传播 :param dA: 当前层激活值的梯度 :param cache: 元组 (linear_cache, activation_cache) :param activation: 当前层使用的激活函数,string: "sigmoid" or "relu" :return dA_prev: 前一层激活值的梯度 :return dW: 当前层的权重的梯度 :return db: 当前层的偏置的梯度 """ linear_cache, activation_cache = cache # 线性部分的缓存,激活部分的cache # 当前层的激活函数为 relu() if activation == "relu": dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) # 当前层的激活函数为 sigmoid() elif activation == "sigmoid": dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
# ### 6.2 - Linear-Activation backward Next, you will create a function that merges the two helper functions: **`linear_backward`** and the backward step for the activation **`linear_activation_backward`**. To help you implement `linear_activation_backward`, we provided two backward functions: - **`sigmoid_backward`**: Implements the backward propagation for SIGMOID unit. You can call it as follows: ```python dZ = sigmoid_backward(dA, activation_cache) ``` - **`relu_backward`**: Implements the backward propagation for RELU unit. You can call it as follows: ```python dZ = relu_backward(dA, activation_cache) ``` If $g(.)$ is the activation function, `sigmoid_backward` and `relu_backward` compute $$dZ^{[l]} = dA^{[l]} * g'(Z^{[l]}) \tag{11}$$. **Exercise**: Implement the backpropagation for the *LINEAR->ACTIVATION* layer. # In[65]: # GRADED FUNCTION: linear_activation_backward def linear_activation_backward(dA, cache, activation): """ Implement the backward propagation for the LINEAR->ACTIVATION layer. Arguments: