def L_model_backward(AL, Y, caches): """ Implement the backward propagation for the [LINEAR->RELU] * (L-1) -> LINEAR -> SIGMOID group Arguments: AL -- probability vector, output of the forward propagation (L_model_forward()) Y -- true "label" vector (containing 0 if non-cat, 1 if cat) caches -- list of caches containing: every cache of linear_activation_forward() with "relu" (it's caches[l], for l in range(L-1) i.e l = 0...L-2) the cache of linear_activation_forward() with "sigmoid" (it's caches[L-1]) Returns: grads -- A dictionary with the gradients grads["dA" + str(l)] = ... grads["dW" + str(l)] = ... grads["db" + str(l)] = ... """ grads = {} L = len(caches) # the number of layers m = AL.shape[1] Y = Y.reshape(AL.shape) # after this line, Y is the same shape as AL # Initializing the backpropagation ### START CODE HERE ### (1 line of code) dAL = -(np.divide(Y, AL) - np.divide(1 - Y, 1 - AL)) ### END CODE HERE ### # Lth layer (SIGMOID -> LINEAR) gradients. Inputs: "AL, Y, caches". Outputs: "grads["dAL"], grads["dWL"], grads["dbL"] ### START CODE HERE ### (approx. 2 lines) current_cache = caches[-1] grads["dA" + str(L)], grads["dW" + str(L)], grads["db" + str(L)] = linear_backward( sigmoid_backward(dAL, current_cache[1]), current_cache[0]) ### END CODE HERE ### for l in reversed(range(L - 1)): # lth layer: (RELU -> LINEAR) gradients. # Inputs: "grads["dA" + str(l + 2)], caches". Outputs: "grads["dA" + str(l + 1)] , grads["dW" + str(l + 1)] , grads["db" + str(l + 1)] ### START CODE HERE ### (approx. 5 lines) current_cache = caches[l] dA_prev_temp, dW_temp, db_temp = linear_backward( sigmoid_backward(dAL, current_cache[1]), current_cache[0]) grads["dA" + str(l + 1)] = dA_prev_temp grads["dW" + str(l + 1)] = dW_temp grads["db" + str(l + 1)] = db_temp ### END CODE HERE ### return grads
def linear_activation_backward(dA, cache, activation="relu"): """ 实现【LINEAR -> ACTIVATION】线性+激活部分反向计算。 :param dA: 当前层l的激活后的梯度值 :param cache: 我们存储的用于有效计算反向传播的值的元组(值为linear_cache,activation_cache) :param activation: 要在此层中使用的激活函数名,字符串类型,【"sigmoid" | "relu"】 :return: dA_prev - 相对于激活(前一层l-1)的成本梯度值,与A_prev维度相同 dW - 相对于W(当前层l)的成本梯度值,与W的维度相同 db - 相对于b(当前层l)的成本梯度值,与b的维度相同 """ # linear_activation_forward()的返回值为: # cache = (linear_cache, activation_cache) # linear_cache(上层的A, 本层的W, 本层的b) # activation_cache(本层的Z) linear_cache, activation_cache = cache # 当激活函数选择ReLU时 if activation == "relu": # 使用函数relu_backward()计算反向传播dZ(外套,激活函数求导) dZ = relu_backward(dA, activation_cache) # 使用函数linear_backward()计算反向传播dA^[l-1]、dW^[l]、db^[l](内含,线性部分求导) dA_prev, dW, db = linear_backward(dZ, linear_cache) # 当激活函数选择Sigmoid时 elif activation == "sigmoid": # 使用函数sigmoid_backward()计算激活函数反向传播dZ(外套,激活函数求导) dZ = sigmoid_backward(dA, activation_cache) # 使用函数linear_backward()计算线性部分反向传播dA^[l-1]、dW^[l]、db^[l](内含,线性部分求导) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def __linear_activation_backward(self, dA, cache, activation): """ Implement the backward propagation for the LINEAR->ACTIVATION layer. Arguments: dA -- post-activation gradient for current layer l cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu" Returns: dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev dW -- Gradient of the cost with respect to W (current layer l), same shape as W db -- Gradient of the cost with respect to b (current layer l), same shape as b """ linear_cache, activation_cache = cache[0], cache[1] if activation == "relu": dZ = relu_backward(dA, activation_cache) elif activation == "sigmoid": dZ = sigmoid_backward(dA, activation_cache) else: raise Exception("No such activation method: {}.".format(activation)) dA_prev, dW, db = self.__linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_activation_backward(dA, cachee, activation): if activation == "relu": dZ = relu_backward(dA, cachee[1]) else: dZ = sigmoid_backward(dA, cachee[1]) dA_prev, dW, db = linear_backward(dZ, cachee[0]) return dA_prev, dW, db
def linear_activation_backward(dA, cache, activation): """ Implement the backward propagation for the LINEAR->ACTIVATION layer. Arguments: dA -- post-activation gradient for current layer l cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu" Returns: dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev dW -- Gradient of the cost with respect to W (current layer l), same shape as W db -- Gradient of the cost with respect to b (current layer l), same shape as b """ linear_cache, activation_cache = cache Z = activation_cache A_prev, W, b = linear_cache if activation == "relu": ### START CODE HERE ### (≈ 2 lines of code) dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) ### END CODE HERE ### elif activation == "sigmoid": ### START CODE HERE ### (≈ 2 lines of code) dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) ### END CODE HERE ### return dA_prev, dW, db
def l_model_backward(AL, Y, cache): """ :param AL: output of forward propagation :param Y: true labels :param cache: list of caches (l-1) output for relu and cache l output for sigmoid :return: grads: gradients for dA, dW and db """ grads = {} L = len(cache) m = AL.shape[1] Y = Y.reshape(AL.shape) dAL = -(np.divide(Y, AL) - np.divide(1 - Y, 1 - AL)) current_cache = cache[-1] grads["dA" + str(L - 1)], grads["dW" + str(L)], grads["db" + str(L)] = linear_backward( sigmoid_backward(dAL, current_cache[1]), current_cache[0]) for layers in reversed(range(L - 1)): current_cache = cache[layers] dA_prev_temp, dW_temp, db_temp = linear_backward( relu_backward(grads["dA" + str(layers + 1)], current_cache[1]), current_cache[0]) grads["dA" + str(layers)] = dA_prev_temp grads["dW" + str(layers + 1)] = dW_temp grads["db" + str(layers + 1)] = db_temp np.set_printoptions(suppress=True) return grads
def linear_activation_backward(dA, cache, activation): """ Implement the backward propagation for the LINEAR->ACTIVATION layer. Arguments: dA {np array} -- [post-activation gradient for current layer l] cache {tuple} -- [(linear_cache, activation_cache)] activation {string} -- the activation to be used in this layer Returns: [dA_prev] -- [gradient of the cost wrt the activation of the previous layer] [dW] -- [] [db] -- [] """ linear_cache, activation_cache = cache if activation == "relu": dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) elif activation == "sigmoid": dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_activation_backward(dA, cache, activation="relu"): """ 实现LINEAR-> ACTIVATION层的后向传播。 参数: dA - 当前层l的激活后的梯度值 cache - 我们存储的用于有效计算反向传播的值的元组(值为linear_cache,activation_cache) activation - 要在此层中使用的激活函数名,字符串类型,【"sigmoid" | "relu"】 返回: dA_prev - 相对于激活(前一层l-1)的成本梯度值,与A_prev维度相同 dW - 相对于W(当前层l)的成本梯度值,与W的维度相同 db - 相对于b(当前层l)的成本梯度值,与b的维度相同 """ linear_cache, activation_cache = cache if activation == "relu": dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) elif activation == "softmax": dZ = softmax_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) elif activation == "sigmoid": dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def L_model_backward(AL,Y,cache): """ Implement backprop for (LINEAR->ReLU)*L-1 -> LINEAR->SIGMOID. Arguments: AL-- probability vector output of forward propgation process (L_model_forward()) Y-- labels vector (1 if cat, 0 if non-cat) cache -- Returns: grads -- A dictionary with the gradients grads["dA" + str(l)] = ... grads["dW" + str(l)] = ... grads["db" + str(l)] = ... """ grads = {} L = len(cache) # number of layers m = AL.shape[1] # number of training examples Y = Y.reshape(AL.shape) # Initialise backpropagation dAL = np.divide(1-Y,1-AL) - np.divide(Y,AL) """ Lth layer (SIGMOID -> LINEAR) gradients. Inputs: "AL, Y, caches". Outputs: "grads["dAL"], grads["dWL"], grads["dbL"] """ current_cache = cache[-1] grads["dA"+str(L)],grads["dW"+str(L)],grads["db"+str(L)] =linear_backward( sigmoid_backward(dAL,current_cache[1]),current_cache[0]) for l in reversed(range(L-1)): current_cache = cache[l] dA_prev_temp,dW_temp,db_temp = linear_backward(sigmoid_backward( dAL,current_cache[1]), current_cache[0]) grads["dA"+str(l+1)] = dA_prev_temp grads["dW"+str(l+1)] = dW_prev_temp grads["db"+str(l+1)] = db_prev_temp return grads
def linear_activation_backward(dA, cache, activation="relu"): linear_cache, activation_cache = cache if activation == "relu": dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) elif activation == "sigmoid": dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_activation_backward(dA,cache,activation='relu'): linear_cache,activation_cache = cache if activation == 'relu': dZ = relu_backward(dA,activation_cache) elif activation == 'sigmoid': dZ = sigmoid_backward(dA,activation_cache) else: dZ = dA return linear_backward(dZ,linear_cache)
def L_model_backward(AL, Y, caches): grads = {} L = len(caches) m = AL.shape[1] Y = Y.reshape(AL.shape) dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL)) current_cache = caches[-1] grads["dA" + str(L)], grads["dW" + str(L)], grads["db" + str(L)] = linear_backward(sigmoid_backward(dAL, current_cache[1]), current_cache[0]) for l in reversed(range(L-1)): current_cache = cache[l] dA_prev_temp, dW_temp, db_temp = linear_backward(sigmoid_backward(dAL, current_cache[1]), current_cache[0]) grads["dA" + str(l + 1)] = dA_prev_temp grads["dW" + str(l + 1)] = dW_temp grads["db" + str(l + 1)] = db_temp return grads
def __linear_activation_backward(self, dA, cache, activation): linear_cache, activation_cache = cache if activation == "relu": dZ = relu_backward(dA, activation_cache) elif activation == "sigmoid": dZ = sigmoid_backward(dA, activation_cache) elif activation == "tanh": dZ = tanh_backward(dA, activation_cache) dA_prev, dW, db = self.__linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_activation_backward(dA, cache, activation): ''' Implement the backward propagation for the Linear->Activation layer ''' linear_cache, activation_cache = cache if activation == "relu": dZ = relu_backward(dA, activation_cache) elif activation == "sigmoid": dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_activation_backward(dA, cache, activation): linear_cache, active_cache = cache #计算dZ if activation == 'relu': dZ = utils.relu_backward(dA, cache) elif activation == 'sigmoid': dZ = utils.sigmoid_backward(dA, cache) #由dZ计算dW,db,dA_prev dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_activation_backward(self, dA, cache, activation): # 先激活函数求导算出dZ linear_cache, activation_cache = cache # 取出两部分缓存 if activation == 'relu': dZ = relu_backward(dA, activation_cache) # 求导需要dA和Z elif activation == 'sigmoid': dZ = sigmoid_backward(dA, activation_cache) # 随后对参数线性求导 dA_prev, dW, db = self.linear_backward( dZ, linear_cache) # dA_prev会用作上一层的激活函数求导 return dA_prev, dW, db
def L_model_backward(AL, Y, caches): ''' Implement the backward propagation for the [LINEAR->RELU] * (L-1) -> LINEAR -> SIGMOID group Arguments: AL -- probability vector, output of the forward propagation (L_model_forward()) Y -- true "label" vector (containing 0 if non-cat, 1 if cat) caches -- list of caches containing: every cache of linear_activation_forward() with "relu" (it's caches[l], for l in range(L-1) i.e l = 0...L-2) the cache of linear_activation_forward() with "sigmoid" (it's caches[L-1]) Returns: grads -- A dictionary with the gradients grads["dA" + str(l)] = ... grads["dW" + str(l)] = ... grads["db" + str(l)] = ... ''' grads = {} L = len(caches) m = AL.shape[1] y = Y.reshape(AL.shape) # Initializing the backpropagation dAL = -(np.divide(Y, AL) - np.divide(1 - Y, 1 - AL)) # Lth layer (SIGMOID -> LINEAR) gradients. Inputs: "AL, Y, caches". Outputs: "grads["dAL"], grads["dWL"], grads["dbL"] current_cache = caches[-1] grads["dA" + str(L)], grads["dW" + str(L)], grads["db" + str(L)] = linear_backward( sigmoid_backward(dAL, current_cache[1]), current_cache[0]) for l in reversed(range(L - 1)): current_cache = caches[l] dA_prev_temp, dw_temp, db_temp = linear_backward( sigmoid_backward(AL, current_cache[1]), current_cache[0]) grads["dA" + str(l + 1)] = dA_prev_temp grads["dW" + str(l + 1)] = dw_temp grads["db" + str(l + 1)] = db_temp return grads
def linear_activation_backward(dA, cache, activation): linear_cache, activation_function = cache if activation == "relu": dZ = relu_backward(dA, cache[1]) dA_prev, dW, db = linear_backward(dZ, cache[0]) if activation == "sigmoid": dZ = sigmoid_backward(dA, cache[1]) dA_prev, dW, db = linear_backward(dZ, cache[0]) return dA_prev, dW, db
def linear_activation_backward(dA,cache,activation): """ Implement the backward propagation of neural unit """ if activation == "Relu": dz = relu_backward(dA,cache[1]) elif activation == "sigmoid": dz = sigmoid_backward(dA,cache[1]) dw,db,dA_pre = linear_backward(dz,cache[0]) return dw,db,dA_pre
def L_model_backward(AL, Y, caches, lamba): grads = {} L = len(caches) Y = Y.reshape(AL.shape) dAL = np.divide(1 - Y, 1 - AL) - np.divide(Y, AL) current_cache = caches[L - 1] grads["dA" + str(L)], grads["dW" + str(L)], grads[ "db" + str(L)] = linear_activation_backward( dAL, current_cache, lambda dA, Z: sigmoid_backward(dA, Z), lamba) for l in reversed(range(L - 1)): current_cache = caches[l] grads["dA" + str(l + 1)], grads["dW" + str(l + 1)], grads[ "db" + str(l + 1)] = linear_activation_backward( grads["dA" + str(l + 2)], current_cache, lambda dA, Z: relu_backward(dA, Z), lamba) return grads
def linear_activation_backward(dA, cache, activation): """ :param dA: :param cache: :param activation: :return:dA_prev, dW, db """ linear_cache, activation_cache = cache if activation == "relu": dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) elif activation == "sigmoid": dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) elif activation == "softmax": dZ = softmax_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_activation_backward(dA, cache, activation): linear_cache, activation_cache = cache if activation == "relu": ### START CODE HERE ### (≈ 2 lines of code) dZ = relu_backward(dA, activation_cache) ### END CODE HERE ### elif activation == "sigmoid": ### START CODE HERE ### (≈ 2 lines of code) dZ = sigmoid_backward(dA, activation_cache) ### END CODE HERE ### # Shorten the code dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_activation_backward(dA, cache, activation): ''' 实现linear -> Activation 层的后向传播 :param dA: 当前层激活后的梯度值 :param cache: 我们存储用于有效计算反向传播的值的元组,值为(linear_cache(# linear_cache = (A, W, b)),activation_cache(# Z)) :param activation:要在此层中使用的激活函数的名称,字符串类型,如["relu"|"sigmoid"] :return: dA_prev:相对于激活(前一层L-1)的成本梯度值,与A_prev的维度相同 dW:相对于W(当前层l)的成本梯度值,与W维度相同 db:相对于b(当前层l)的成本梯度值,与b维度相同 ''' linear_cache, activation_cache = cache if activation == "relu": dZ = relu_backward(dA, activation_cache) # activation_cache = Z if activation == "sigmoid": dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_activation_backward(dA, cache, activation): """ :param dA: 当前层l的激活后的梯度值 :param cache:我们存储的用于有效计算反向传播的值的元组(值为linear_cache,activation_cache) :param activation: 要在此层中使用的激活函数名,字符串类型,【"sigmoid" | "relu"】 :return: dA_prev - 相对于激活(前一层l-1)的成本梯度值,与A_prev维度相同 dW - 相对于W(当前层l)的成本梯度值,与W的维度相同 db - 相对于b(当前层l)的成本梯度值,与b的维度相同 """ linear_cache, activation_cache = cache if activation == "relu": dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) elif activation == "sigmoid": dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_activation_backward(dA,cache,activation='relu'): ''' 实现linear->activation层的后向传播 :param dA: 当前层l的激活后的梯度值 :param cache: 我们存储的用于有效计算反向传播的值的元组(值为linear_cache,activation_cache) :param activation: 要在此层中使用的激活函数名,字符串类型,['sigmoid'|'relu] :return: dA_prev-相当于激活(前一层)的 成本梯度值,与W的维度相同 dW-相当于W(当前层l)的成本梯度值,与W的维度相同 db-相当于b(当前层l)的成本梯度值,与b的维度相同 ''' linear_cache,activation_cache=cache if activation == 'relu': dZ = relu_backward(dA,activation_cache) dA_prev,dW,db = linear_backward(dZ,linear_cache) elif activation == 'sigmoid': dZ = sigmoid_backward(dA,activation_cache) dA_prev,dW,db = linear_backward(dZ,linear_cache) return dA_prev,dW,db
def linear_activation_backward(dA, cache, activation="relu"): """ 实现linear->activation层的后向传播 参数: dA -当前层的激活后的梯度值 cache -前向过程中存储的用于计算反向传播的值的元组 activation -激活函数名称 sigmoid或者relu 返回: dA_prev -相对于激活(前一层l-1)成本梯度,与A_prev维度相同 dW -相对于W(当前层l)的成本梯度,与W维度相同 db -相对于b(当前层l)的成本梯度,与b维度相同 """ linear_cache, activation_cache = cache if activation == "relu": dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) if activation == "sigmoid": dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_activation_backward(dA, cache, activation="relu"): """ 实现LINEAR->ACTIVATION层的后向传播 参数: dA:当前层l的激活后的梯度值 cache:我们存储的用于有效计算反向传播的值的元组(值为linear_cache,activation_cache) activation: 要在此层中使用的激活函数名,字符串类型,【"sigmoid" | "relu"】 返回: dA_prev:相对于前一层的成本梯度值,与A_prev维度相同 dW:相对于W的成本梯度值,与W的维度相同 db:相对于b的成本梯度值,与b的维度相同 """ linear_cache, activation_cache = cache if activation == "relu": dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) elif activation == "sigmoid": dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_activation_backward(dA, cache, activation): """ Implement the backward propagation for the LINEAR->ACTIVATION layer. Arguments: dA -- post-activation gradient for current layer l cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu" Returns: dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev dW -- Gradient of the cost with respect to W (current layer l), same shape as W db -- Gradient of the cost with respect to b (current layer l), same shape as b """ A_prev, W, b, Z = cache dL_dZ = None if activation == "relu": dL_dZ = utils.relu_backward(dA, Z) elif activation == "sigmoid": dL_dZ = utils.sigmoid_backward(dA, Z) m = A_prev.shape[1] dL_dW = 1 / m * np.dot(dL_dZ, A_prev.T) # dL_dA * dA_dZ * dZ_dW = dL_dW dL_db = 1 / m * np.sum(dL_dZ, axis=1, keepdims=True) # dL_dA * dA_dZ * dZ_db = dL_db dA_prev = np.dot( W.T, dL_dZ) # think of this step as -> dA[l-1] = W[l] * dL_dZ[l] assert (dA_prev.shape == A_prev.shape) assert (dL_dW.shape == W.shape) assert (dL_db.shape == b.shape) return dL_dW, dL_db, dA_prev
### END CODE HERE ### # Lth layer (SIGMOID -> LINEAR) gradients. Inputs: "AL, Y, caches". Outputs: "grads["dAL"], grads["dWL"], grads["dbL"] ### START CODE HERE ### (approx. 2 lines) current_cache = caches[-1] grads["dA" + str(L)], grads["dW" + str(L)], grads["db" + str(L)] = linear_backward(sigmoid_backward(dAL,current_cache[1]),current_cache[0]) ) ### END CODE HERE ### for l in reversed(range(L-1)): # lth layer: (RELU -> LINEAR) gradients. # Inputs: "grads["dA" + str(l + 2)], caches". Outputs: "grads["dA" + str(l + 1)] , grads["dW" + str(l + 1)] , grads["db" + str(l + 1)] ### START CODE HERE ### (approx. 5 lines) current_cache = caches[l] dA_prev_temp, dW_temp, db_temp = linear_backward(sigmoid_backward(dAL, current_cache[1]), current_cache[0]) grads["dA" + str(l + 1)] = dA_prev_temp grads["dW" + str(l + 1)] = dW_temp grads["db" + str(l + 1)] = db_temp ### END CODE HERE ### return grads X_assess, Y_assess, AL, caches = L_model_backward_test_case() grads = L_model_backward(AL, Y_assess, caches) print ("dW1 = "+ str(grads["dW1"])) print ("db1 = "+ str(grads["db1"])) print ("dA1 = "+ str(grads["dA1"])) # GRADED FUNCTION: update_parameters def update_parameters(parameters, grads, learning_rate): """