def backward_propagate(params, cache, funcs, X, Y): """ Returns gradients (mean loss) of params relative to cost """ m = X.shape[1] # Get W1, W2 from params: W1 = params["W1"] W2 = params["W2"] # Get A1, A2, Z1, Z2 from cache: A1 = cache["A1"] A2 = cache["A2"] Z1 = cache["Z1"] Z2 = cache["Z2"] # Get layer 1 & 2 functions: L1_func = funcs["L1_func"] L2_func = funcs["L2_func"] # Calculate gradients: dZ2 = A2 - Y dW2 = np.dot(dZ2, A1.T) / m db2 = np.sum(dZ2, axis=1, keepdims=True) / m dZ1 = np.dot(W2.T, dZ2) * derivative(f=L1_func, fx=L1_func(Z1)) dW1 = np.dot(dZ1, X.T) / m db1 = np.sum(dZ1, axis=1, keepdims=True) / m grads = {"dW1": dW1, "db1": db1, "dW2": dW2, "db2": db2} return grads
def backward_propagate(self, weights, cache, funcs, X, Y): m = X.shape[1] L = len(weights) // 2 grads = {} dZl = cache[f"A{L}"] - Y grads[f"dW{L}"] = np.dot(dZl, cache[f"A{L-1}"].T) / m grads[f"db{L}"] = np.sum(dZl, axis=1, keepdims=True) / m for l in range(L - 1, 0, -1): funcl = funcs[f"L{l}_func"] dZl = np.dot(weights[f"W{l+1}"].T, dZl) * derivative( f=funcl, fx=funcl(cache[f"Z{l}"])) grads[f"dW{l}"] = np.dot(dZl, cache[f"A{l-1}"].T) grads[f"db{l}"] = np.sum(dZl, axis=1, keepdims=True) / m return grads
def backward_propagate(params, cache, funcs, X, Y): m = X.shape[1] L = len(params) // 2 grads = {} # Do first back prob separately, since calculation for dZL is different: dZl = cache[f"A{L}"] - Y grads[f"dW{L}"] = np.dot(dZl, cache[f"A{L-1}"].T) / m grads[f"db{L}"] = np.sum(dZl, axis=1, keepdims=True) / m for l in range(L - 1, 0, -1): # Define lth function for cleaner code: funcl = funcs[f"L{l}_func"] # perform a backprop step: dZl = np.dot(params[f"W{l+1}"].T, dZl) * derivative( f=funcl, fx=funcl(cache[f"Z{l}"])) grads[f"dW{l}"] = np.dot(dZl, cache[f"A{l-1}"].T) / m grads[f"db{l}"] = np.sum(dZl, axis=1, keepdims=True) / m return grads