def compute_gradients(hyperparameters, parameters, cache, l, debug_mode=False): activation = hyperparameters["activations"][l] W = parameters["W"][l] Z = cache["Z"][l] A = cache["A"][l] A_prev = cache["A"][l - 1] dA = cache["dA"][l] # get the gradient of Z if activation.lower() == "sigmoid": cache["dZ"][l] = np.multiply(dA, af.sigmoid_backward(Z=Z, A=A, debug_mode=debug_mode)) elif activation.lower() == "tanh": cache["dZ"][l] = np.multiply(dA, af.tanh_backward(Z=Z, A=A, debug_mode=debug_mode)) elif activation.lower() == "relu": cache["dZ"][l] = np.multiply(dA, af.relu_backward(Z=Z, A=A, debug_mode=debug_mode)) elif activation.lower() == "leaky tanh": cache["dZ"][l] = np.multiply(dA, af.leaky_relu_backward(Z=Z, A=A, debug_mode=debug_mode)) else: if debug_mode: print("Error: unsupported activation function") print("\tStack trace: hidden_layer_propagation.nonlinear_backward()") return None dZ = cache["dZ"][l] # get the number of examples m = A_prev.shape[1] # get the gradient of W cache["dW"][l] = (1.0 / m) * np.dot(dZ, A_prev.T) # get the gradient of b cache["db"][l] = (1.0 / m) * np.sum(dZ, axis=1, keepdims=True) # get the gradient of A_prev cache["dA"][l - 1] = np.dot(W.T, dZ) return (parameters, cache)
def linear_activation_backward(dA, cache, activation, output_size): """ Implement the backward propagation for the LINEAR->ACTIVATION layer. Arguments: dA -- post-activation gradient for current layer l cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu" output_size -- bit width of output data Returns: dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev dW -- Gradient of the cost with respect to W (current layer l), same shape as W db -- Gradient of the cost with respect to b (current layer l), same shape as b """ linear_cache = cache[0:-1] activation_cache = cache[-1] if activation == "relu": dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) elif activation == "sigmoid": dZ = sigmoid_backward(dA, activation_cache, output_size) print(dZ.shape) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_activation_backward(dA, cache, activation): linear_cache, activation_cache = cache if activation == "relu": dZ = relu_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) elif activation == "sigmoid": dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def linear_activation_backward(dA, cache, activation): """ Arguments: dA -- post-activation gradient for current layer l cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu" Returns: dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev dW -- Gradient of the cost with respect to W (current layer l), same shape as W db -- Gradient of the cost with respect to b (current layer l), same shape as b """ linear_cache, activation_cache = cache if activation == 'sigmoid': dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) elif activation == 'relu': dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db
def activation_backward(dA, cache, activation_way): """ :param dA: Gradient of the activation output this layer with respect to cost J, shape(nl, #) :param activation_cache: -- a dictionary contains "A" and "Z" :return: dA_prev -- the gradient of activation previous layer respect to the cost J dW -- the gradient of W this layer respect to the cost J db -- the gradient of b this layer respect to the cost J """ linear_cache = cache["linear_cache"] activation_cache = cache["activation_cache"] if activation_way == "relu": dZ = relu_backward(dA, activation_cache) dA_prev,dW,db = linear_backward(dZ, linear_cache) elif activation_way == "sigmoid": dZ = sigmoid_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) elif activation_way == "tanh": dZ = tanh_backward(dA, activation_cache) dA_prev, dW, db = linear_backward(dZ, linear_cache) return dA_prev, dW, db