Beispiel #1
0
def check_nn_gradients(check_nn_lambda=0):
    input_layer_size = 3
    hidden_layer_size = 5
    num_labels = 3
    m = 5
    #  We generate some 'random' test data
    theta1 = debug_initialize_weights(hidden_layer_size, input_layer_size)
    theta2 = debug_initialize_weights(num_labels, hidden_layer_size)
    # Reusing debugInitializeWeights to generate X
    x = debug_initialize_weights(m, input_layer_size - 1)
    y = 1 + np.mod(np.arange(m) + 1, num_labels)
    # Unroll parameters
    nn_params = np.append(np.ravel(theta1, order='F'),
                          np.ravel(theta2, order='F'))
    cost, grad = nn_cost_function(nn_params, input_layer_size,
                                  hidden_layer_size, num_labels, x, y,
                                  check_nn_lambda)
    numgrad = compute_numerical_gradient(nn_cost_function, nn_params,
                                         input_layer_size, hidden_layer_size,
                                         num_labels, x, y, check_nn_lambda)
    # Visually examine the two gradient computations.  The two columns
    # you get should be very similar.
    print(
        np.append(numgrad.reshape(numgrad.size, 1),
                  grad.reshape(grad.size, 1),
                  axis=1))
    print('The above two columns you get should be very similar.\n' +
          '(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n')
    diff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad)
    print('If your backpropagation implementation is correct, then \n' +
          'the relative difference will be small (less than 1e-9). \n' +
          '\nRelative Difference: %e\n' % diff)
Beispiel #2
0
def check_gradients(check_nn_lambda=0):
    # Create small problem
    x_t = np.random.rand(4, 3)
    theta_t = np.random.rand(5, 3)

    # Zap out most entries
    y = np.dot(x_t, theta_t.T)
    rand_x_axis, rand_y_axis = np.where(
        np.random.rand(y.shape[0], y.shape[1]) > 0.5)
    y[rand_x_axis, rand_y_axis] = 0
    y_not_0_x_axis, y_not_0_y_axis = np.where(y == 0)

    r = np.ones(y.shape)
    r[y_not_0_x_axis, y_not_0_y_axis] = 0

    y[rand_x_axis, rand_y_axis] = 0

    # Run Gradient Checking
    x = np.random.randn(x_t.shape[0], x_t.shape[1])
    theta = np.random.randn(theta_t.shape[0], theta_t.shape[1])
    num_users = y.shape[1]
    num_movies = y.shape[0]
    num_features = theta_t.shape[1]

    numgrad = compute_numerical_gradient(
        cofi_cost_func,
        np.hstack((np.ravel(x, order='F'), np.ravel(theta, order='F'))), y, r,
        num_users, num_movies, num_features, check_nn_lambda)
    cost, grad = cofi_cost_func(
        np.hstack((np.ravel(x, order='F'), np.ravel(theta, order='F'))), y, r,
        num_users, num_movies, num_features, check_nn_lambda)
    disp([numgrad, grad])
    print('The above two columns you get should be very similar.\n'
          '(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n')

    diff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad)
    print('If your cost function implementation is correct, then \n'
          'the relative difference will be small (less than 1e-9). \n\n'
          'Relative Difference: %s\n' % diff)
Beispiel #3
0
def check_numerical_gradient(theta, X, levels=[64, 25, 64]):
    grad = J(theta, X, X, levels)[1]
    numeric_grad = compute_numerical_gradient(J, theta, X, X, levels)
    for i, g in enumerate(grad):
        print g, numeric_grad[i]
Beispiel #4
0
def check_numerical_gradient(theta, X, Y, num_classes, lbda=1e-4):
    args = (X, Y, lbda, num_classes)
    grad = J(theta, args)[1]
    numeric_grad = compute_numerical_gradient(J, theta, args)
    for i, g in enumerate(grad):
        print g, numeric_grad[i]