def check_nn_gradients(check_nn_lambda=0): input_layer_size = 3 hidden_layer_size = 5 num_labels = 3 m = 5 # We generate some 'random' test data theta1 = debug_initialize_weights(hidden_layer_size, input_layer_size) theta2 = debug_initialize_weights(num_labels, hidden_layer_size) # Reusing debugInitializeWeights to generate X x = debug_initialize_weights(m, input_layer_size - 1) y = 1 + np.mod(np.arange(m) + 1, num_labels) # Unroll parameters nn_params = np.append(np.ravel(theta1, order='F'), np.ravel(theta2, order='F')) cost, grad = nn_cost_function(nn_params, input_layer_size, hidden_layer_size, num_labels, x, y, check_nn_lambda) numgrad = compute_numerical_gradient(nn_cost_function, nn_params, input_layer_size, hidden_layer_size, num_labels, x, y, check_nn_lambda) # Visually examine the two gradient computations. The two columns # you get should be very similar. print( np.append(numgrad.reshape(numgrad.size, 1), grad.reshape(grad.size, 1), axis=1)) print('The above two columns you get should be very similar.\n' + '(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n') diff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad) print('If your backpropagation implementation is correct, then \n' + 'the relative difference will be small (less than 1e-9). \n' + '\nRelative Difference: %e\n' % diff)
def check_gradients(check_nn_lambda=0): # Create small problem x_t = np.random.rand(4, 3) theta_t = np.random.rand(5, 3) # Zap out most entries y = np.dot(x_t, theta_t.T) rand_x_axis, rand_y_axis = np.where( np.random.rand(y.shape[0], y.shape[1]) > 0.5) y[rand_x_axis, rand_y_axis] = 0 y_not_0_x_axis, y_not_0_y_axis = np.where(y == 0) r = np.ones(y.shape) r[y_not_0_x_axis, y_not_0_y_axis] = 0 y[rand_x_axis, rand_y_axis] = 0 # Run Gradient Checking x = np.random.randn(x_t.shape[0], x_t.shape[1]) theta = np.random.randn(theta_t.shape[0], theta_t.shape[1]) num_users = y.shape[1] num_movies = y.shape[0] num_features = theta_t.shape[1] numgrad = compute_numerical_gradient( cofi_cost_func, np.hstack((np.ravel(x, order='F'), np.ravel(theta, order='F'))), y, r, num_users, num_movies, num_features, check_nn_lambda) cost, grad = cofi_cost_func( np.hstack((np.ravel(x, order='F'), np.ravel(theta, order='F'))), y, r, num_users, num_movies, num_features, check_nn_lambda) disp([numgrad, grad]) print('The above two columns you get should be very similar.\n' '(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n') diff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad) print('If your cost function implementation is correct, then \n' 'the relative difference will be small (less than 1e-9). \n\n' 'Relative Difference: %s\n' % diff)
def check_numerical_gradient(theta, X, levels=[64, 25, 64]): grad = J(theta, X, X, levels)[1] numeric_grad = compute_numerical_gradient(J, theta, X, X, levels) for i, g in enumerate(grad): print g, numeric_grad[i]
def check_numerical_gradient(theta, X, Y, num_classes, lbda=1e-4): args = (X, Y, lbda, num_classes) grad = J(theta, args)[1] numeric_grad = compute_numerical_gradient(J, theta, args) for i, g in enumerate(grad): print g, numeric_grad[i]