コード例 #1
0
def backward_propagate(params, cache, funcs, X, Y):
    """
    Returns gradients (mean loss) of params relative to cost
    """
    m = X.shape[1]

    # Get W1, W2 from params:
    W1 = params["W1"]
    W2 = params["W2"]

    # Get A1, A2, Z1, Z2 from cache:
    A1 = cache["A1"]
    A2 = cache["A2"]
    Z1 = cache["Z1"]
    Z2 = cache["Z2"]

    # Get layer 1 & 2 functions:
    L1_func = funcs["L1_func"]
    L2_func = funcs["L2_func"]

    # Calculate gradients:
    dZ2 = A2 - Y
    dW2 = np.dot(dZ2, A1.T) / m
    db2 = np.sum(dZ2, axis=1, keepdims=True) / m
    dZ1 = np.dot(W2.T, dZ2) * derivative(f=L1_func, fx=L1_func(Z1))
    dW1 = np.dot(dZ1, X.T) / m
    db1 = np.sum(dZ1, axis=1, keepdims=True) / m

    grads = {"dW1": dW1, "db1": db1, "dW2": dW2, "db2": db2}

    return grads
コード例 #2
0
    def backward_propagate(self, weights, cache, funcs, X, Y):
        m = X.shape[1]
        L = len(weights) // 2
        grads = {}

        dZl = cache[f"A{L}"] - Y
        grads[f"dW{L}"] = np.dot(dZl, cache[f"A{L-1}"].T) / m
        grads[f"db{L}"] = np.sum(dZl, axis=1, keepdims=True) / m
        for l in range(L - 1, 0, -1):
            funcl = funcs[f"L{l}_func"]
            dZl = np.dot(weights[f"W{l+1}"].T, dZl) * derivative(
                f=funcl, fx=funcl(cache[f"Z{l}"]))
            grads[f"dW{l}"] = np.dot(dZl, cache[f"A{l-1}"].T)
            grads[f"db{l}"] = np.sum(dZl, axis=1, keepdims=True) / m
        return grads
コード例 #3
0
def backward_propagate(params, cache, funcs, X, Y):
    m = X.shape[1]
    L = len(params) // 2

    grads = {}

    # Do first back prob separately, since calculation for dZL is different:
    dZl = cache[f"A{L}"] - Y
    grads[f"dW{L}"] = np.dot(dZl, cache[f"A{L-1}"].T) / m
    grads[f"db{L}"] = np.sum(dZl, axis=1, keepdims=True) / m

    for l in range(L - 1, 0, -1):
        # Define lth function for cleaner code:
        funcl = funcs[f"L{l}_func"]

        # perform a backprop step:
        dZl = np.dot(params[f"W{l+1}"].T, dZl) * derivative(
            f=funcl, fx=funcl(cache[f"Z{l}"]))
        grads[f"dW{l}"] = np.dot(dZl, cache[f"A{l-1}"].T) / m
        grads[f"db{l}"] = np.sum(dZl, axis=1, keepdims=True) / m

    return grads