def check_stacked_autoencoder(): """ # Check the gradients for the stacked autoencoder # # In general, we recommend that the creation of such files for checking # gradients when you write new cost functions. # :return: """ ## Setup random data / small model input_size = 64 hidden_size_L1 = 36 hidden_size_L2 = 25 lambda_ = 0.01 data = np.random.randn(input_size, 10) labels = np.random.randint(4, size=10) num_classes = 4 stack = [dict() for i in range(2)] stack[0]['w'] = 0.1 * np.random.randn(hidden_size_L1, input_size) stack[0]['b'] = np.random.randn(hidden_size_L1) stack[1]['w'] = 0.1 * np.random.randn(hidden_size_L2, hidden_size_L1) stack[1]['b'] = np.random.randn(hidden_size_L2) softmax_theta = 0.005 * np.random.randn(hidden_size_L2 * num_classes) params, net_config = stacked_autoencoder.stack2params(stack) stacked_theta = np.concatenate((softmax_theta, params)) cost, grad = stacked_autoencoder.stacked_autoencoder_cost(stacked_theta, input_size, hidden_size_L2, num_classes, net_config, lambda_, data, labels) # Check that the numerical and analytic gradients are the same J = lambda x: stacked_autoencoder.stacked_autoencoder_cost(x, input_size, hidden_size_L2, num_classes, net_config, lambda_, data, labels) num_grad = compute_gradient(J, stacked_theta) print num_grad, grad print "The above two columns you get should be very similar.\n" \ "(Left-Your Numerical Gradient, Right-Analytical Gradient)\n" diff = np.linalg.norm(num_grad - grad) / np.linalg.norm(num_grad + grad) print diff print "Norm of the difference between numerical and analytical num_grad (should be < 1e-9)\n"
# Initialize the stack using the parameters learned stack = [dict() for i in range(2)] stack[0]['w'] = sae1_opt_theta[0:hidden_size_L1 * input_size].reshape(hidden_size_L1, input_size) stack[0]['b'] = sae1_opt_theta[2 * hidden_size_L1 * input_size:2 * hidden_size_L1 * input_size + hidden_size_L1] stack[1]['w'] = sae2_opt_theta[0:hidden_size_L1 * hidden_size_L2].reshape(hidden_size_L2, hidden_size_L1) stack[1]['b'] = sae2_opt_theta[2 * hidden_size_L1 * hidden_size_L2:2 * hidden_size_L1 * hidden_size_L2 + hidden_size_L2] # Initialize the parameters for the deep model (stack_params, net_config) = stacked_autoencoder.stack2params(stack) stacked_autoencoder_theta = np.concatenate((softmax_theta.flatten(), stack_params)) J = lambda x: stacked_autoencoder.stacked_autoencoder_cost(x, input_size, hidden_size_L2, num_classes, net_config, lambda_, train_images, train_labels) options_ = {'maxiter': 400, 'disp': True} result = scipy.optimize.minimize(J, stacked_autoencoder_theta, method='L-BFGS-B', jac=True, options=options_) stacked_autoencoder_opt_theta = result.x print result # ====================================================================== # STEP 6: Test test_images = load_MNIST.load_MNIST_images('t10k-images.idx3-ubyte') test_labels = load_MNIST.load_MNIST_labels('t10k-labels.idx1-ubyte') # Two auto encoders without fine tuning