Example #1
0
def _check_gradients(X, Y: Tensor2D, parameters: Parameters,
                     gradients: Parameters, lamb: float):
    epsilon = 1e-7
    parameters_ = deepcopy(parameters)
    numerical_gradients = {}
    for param_name, param_values in parameters_.items():
        print("Calculating numeric gradients for {}".format(param_name))
        param_shape = shape(param_values)
        numerical_gradients[param_name] = zeros(*param_shape)
        for i in range(param_shape[0]):
            for j in range(param_shape[1]):
                numerical_gradients[param_name][i][
                    j] = _single_param_numerical_gradient(
                        X, Y, parameters_, lamb, param_name, i, j, epsilon)

    gradients_vector = _params_to_single_vector(gradients)
    numerical_gradients_vector = _params_to_single_vector(numerical_gradients)

    assert shape(gradients_vector) == shape(numerical_gradients_vector)

    delta = l2_norm(minus(numerical_gradients_vector, gradients_vector)) / (
        l2_norm(numerical_gradients_vector) + l2_norm(gradients_vector))

    if delta > epsilon:
        print("Gradient check failed delta={} > {} !!!!!".format(
            delta, epsilon))
    else:
        print("Gradient check passed delta={}".format(delta))
Example #2
0
def _update_parameters(parameters, gradients: Parameters,
                       learning_rate: float) -> Parameters:
    updated_parameters = {}
    for param in ("W1", "B1", "W2", "B2", "W3", "B3"):
        updated_parameters[param] = minus(
            parameters[param],
            element_multiply([[learning_rate]], gradients["d" + param]))
    return updated_parameters
Example #3
0
def softmax(Z: Tensor2D, stable=True) -> Tensor2D:
    Z_shape = shape(Z)

    if stable:
        # stable softmax via https://eli.thegreenplace.net/2016/the-softmax-function-and-its-derivative/
        Z_max = max(Z[0])
        Z_minus_max = minus(Z, [[Z_max]])
        Z_exp = element_exp(Z_minus_max)
    else:
        Z_exp = element_exp(Z)

    Z_exp_col_sum = zeros(1, Z_shape[1])

    for i in range(Z_shape[0]):
        for j in range(Z_shape[1]):
            Z_exp_col_sum[0][j] += Z_exp[i][j]

    Z_softmax = zeros(*Z_shape)
    for i in range(Z_shape[0]):
        for j in range(Z_shape[1]):
            Z_softmax[i][j] = Z_exp[i][j] / Z_exp_col_sum[0][j]

    return Z_softmax
Example #4
0
def test_minus(A, B, expected_C):
    assert minus(A, B) == expected_C
Example #5
0
def _backward_propagation(X, Y: Tensor2D, parameters: Parameters, lamb: float,
                          cache: Parameters) -> Parameters:
    X_shape = shape(X)

    batch_size = X_shape[1]

    W1 = parameters["W1"]
    B1 = parameters["B1"]
    W2 = parameters["W2"]
    B2 = parameters["B2"]
    W3 = parameters["W3"]
    B3 = parameters["B3"]

    A0 = X
    Z1 = cache["Z1"]
    A1 = cache["A1"]
    Z2 = cache["Z2"]
    A2 = cache["A2"]
    Z3 = cache["Z3"]
    A3 = cache["A3"]
    Y_hat = A3

    # Layer 3 (output) derivatives
    dZ3 = minus(Y_hat, Y)
    assert shape(dZ3) == shape(Z3)
    dW3 = element_multiply([[1. / batch_size]],
                           matrix_multiply(dZ3, transpose(A2)))
    if lamb != 0.:
        dW3 = add(dW3, _regularization_gradient(lamb, batch_size, W3))
    assert shape(dW3) == shape(W3)
    dB3 = element_multiply([[1. / batch_size]], sum_rows(dZ3))
    assert shape(dB3) == shape(B3)

    # Layer 2 (hidden) derivatives
    dZ2 = element_multiply(matrix_multiply(transpose(W3), dZ3),
                           relu.relu_derivative(Z2))
    assert shape(dZ2) == shape(Z2)
    dW2 = element_multiply([[1. / batch_size]],
                           matrix_multiply(dZ2, transpose(A1)))
    if lamb != 0.:
        dW2 = add(dW2, _regularization_gradient(lamb, batch_size, W2))
    assert shape(dW2) == shape(W2)
    dB2 = element_multiply([[1. / batch_size]], sum_rows(dZ2))
    assert shape(dB2) == shape(B2)

    # Layer 1 (hidden) derivatives
    dZ1 = element_multiply(matrix_multiply(transpose(W2), dZ2),
                           relu.relu_derivative(Z1))
    assert shape(dZ1) == shape(Z1)
    dW1 = element_multiply([[1. / batch_size]],
                           matrix_multiply(dZ1, transpose(A0)))
    if lamb != 0.:
        dW1 = add(dW1, _regularization_gradient(lamb, batch_size, W1))
    assert shape(dW1) == shape(W1)
    dB1 = element_multiply([[1. / batch_size]], sum_rows(dZ1))
    assert shape(dB1) == shape(B1)

    # return gradients for weights and bias for each layer
    gradients = {
        "dW1": dW1,
        "dB1": dB1,
        "dW2": dW2,
        "dB2": dB2,
        "dW3": dW3,
        "dB3": dB3,
    }

    return gradients