Ejemplo n.º 1
0
def activation_cost_function(X, Y, weights_vector):
    weights = utils.vector_to_nn_weights(weights_vector, nn.layers)
    activations, zs = nn.forward_propagate(X, weights)
    gradients = nn.backward_propagate(X, activations, zs, Y, weights)
    gradients = utils.nn_weights_to_vector(gradients)
    j, _, _ = core.logistic_cost_function(X, activations[-1], Y)
    return j, gradients
def activation_cost_function(X, Y, weights):
    W, b = utils.vector_to_weights(weights, n, 1) # expand weights
    z = core.calculate_z(X, W, b)
    a = core.sigmoid(z)
    j, dW, db = core.logistic_cost_function(X, a, Y)
    """
    Flatten gradients
    Transposing them because I changed the minimization functions to work with NNs
    """
    gradients = utils.weights_to_vector(dW.T, db.T)
    return j, gradients
def gradient_check_nn(nn, X, weights, Y, epsilon=1e-7):
    weight_vector = utils.nn_weights_to_vector(weights)

    # Numerically computed gradients
    ncg_vector = numpy.zeros(weight_vector.shape)

    """
    Compute the gradients numerically
    """
    for i in range(0, weight_vector.shape[0]):
        e = numpy.zeros(weight_vector.shape[0])
        e[i] = epsilon
        thetaplus = weight_vector + e
        thetaminus = weight_vector - e
        _weights = utils.vector_to_nn_weights(thetaplus, nn.layers)
        activations, _ = nn.forward_propagate(X, _weights)
        jplus, _, _ = core.logistic_cost_function(X, activations[-1], Y)
        _weights = utils.vector_to_nn_weights(thetaminus, nn.layers)
        activations, _ = nn.forward_propagate(X, _weights)
        jminus, _, _ = core.logistic_cost_function(X, activations[-1], Y)
        ncg_vector[i] = (jplus - jminus) / (2.0 * epsilon)

    """
    Compute the gradients using differentiation
    """
    activations, zs = nn.forward_propagate(X, weights)
    # Differentiated gradients
    gradients = nn.backward_propagate(X, activations, zs, Y, weights)
    dg_vector = utils.nn_weights_to_vector(gradients)
    print(utils.vector_to_nn_weights(ncg_vector - dg_vector, nn.layers))

    diff = (
        numpy.linalg.norm(dg_vector - ncg_vector) /
        (numpy.linalg.norm(dg_vector) + numpy.linalg.norm(ncg_vector))
    )
    return diff
 def activation_cost_function(X, Y, W, b):
     z = core.calculate_z(X, W, b)
     a = core.sigmoid(z)
     j, dW, db = core.logistic_cost_function(X, a, Y)
     return j, dW.T, db.T  # Transposing the gradients because I changed the minimization functions to work with NNs
def gradient_check_simple_logistic(X, Y, W, b, epsilon=1e-7):
    """

    :param X:
    :param Y:
    :param W:
    :param b:
    :param epsilon: The bump value used when numerical
     computing each derivative
    :return: dW, db
    """

    input_size = W.shape[0]
    output_size = W.shape[1]
    weight_vector = utils.weights_to_vector(W, b)
    """
    Compute the gradients numerically
    
    https://www.coursera.org/learn/deep-neural-network/lecture/XzSSa/numerical-approximation-of-gradients
    http://ufldl.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization
    
    Also worth checking out the steps for gradient checking in the Jupyter notebook
    """
    def activate(X, W, b):
        z = core.calculate_z(X, W, b)
        a = core.sigmoid(z)
        return a

    # Numerically computed gradients
    ncg_vector = numpy.zeros(weight_vector.shape)

    for i in range(0, weight_vector.shape[0]):
        e = numpy.zeros(weight_vector.shape[0])
        e[i] = epsilon
        thetaplus = weight_vector + e
        _W, _b = utils.vector_to_weights(thetaplus, input_size, output_size)
        a = activate(X, _W, _b)
        jplus, _, _ = core.logistic_cost_function(X, a, Y)
        thetaminus = weight_vector - e
        _W, _b = utils.vector_to_weights(thetaminus, input_size, output_size)
        a = activate(X, _W, _b)
        jminus, _, _ = core.logistic_cost_function(X, a, Y)
        ncg_vector[i] = (jplus - jminus) / (2.0 * epsilon)

    """
    Compute the gradients using differentiation
    """
    def activation_cost_function(X, Y, W, b):
        z = core.calculate_z(X, W, b)
        a = core.sigmoid(z)
        j, dW, db = core.logistic_cost_function(X, a, Y)
        return j, dW.T, db.T  # Transposing the gradients because I changed the minimization functions to work with NNs

    # Differentiated gradients
    j, dW, db = activation_cost_function(X, Y, W, b)
    dg_vector = utils.weights_to_vector(dW, db)

    diff = (
        numpy.linalg.norm(dg_vector - ncg_vector) /
        (numpy.linalg.norm(dg_vector) + numpy.linalg.norm(ncg_vector))
    )

    return diff
Ejemplo n.º 6
0
 def test_cost(self):
     z = calculate_z(self.X, self.W, self.b)
     a = sigmoid(z)
     j, dW, db = logistic_cost_function(self.X, a, self.Y)
     expected = 0.048587
     numpy.testing.assert_almost_equal(j, expected, 5)