def check_stacked_autoencoder(): """ # Check the gradients for the stacked autoencoder # # In general, we recommend that the creation of such files for checking # gradients when you write new cost functions. # :return: """ ## Setup random data / small model input_size = 64 hidden_size_L1 = 36 hidden_size_L2 = 25 lambda_ = 0.01 data = np.random.randn(input_size, 10) labels = np.random.randint(4, size=10) num_classes = 4 stack = [dict() for i in range(2)] stack[0]['w'] = 0.1 * np.random.randn(hidden_size_L1, input_size) stack[0]['b'] = np.random.randn(hidden_size_L1) stack[1]['w'] = 0.1 * np.random.randn(hidden_size_L2, hidden_size_L1) stack[1]['b'] = np.random.randn(hidden_size_L2) softmax_theta = 0.005 * np.random.randn(hidden_size_L2 * num_classes) params, net_config = stacked_autoencoder.stack2params(stack) stacked_theta = np.concatenate((softmax_theta, params)) cost, grad = stacked_autoencoder.stacked_autoencoder_cost(stacked_theta, input_size, hidden_size_L2, num_classes, net_config, lambda_, data, labels) # Check that the numerical and analytic gradients are the same J = lambda x: stacked_autoencoder.stacked_autoencoder_cost(x, input_size, hidden_size_L2, num_classes, net_config, lambda_, data, labels) num_grad = compute_gradient(J, stacked_theta) print num_grad, grad print "The above two columns you get should be very similar.\n" \ "(Left-Your Numerical Gradient, Right-Analytical Gradient)\n" diff = np.linalg.norm(num_grad - grad) / np.linalg.norm(num_grad + grad) print diff print "Norm of the difference between numerical and analytical num_grad (should be < 1e-9)\n"
# ====================================================================== # STEP 5: Finetune softmax model # Implement the stacked_autoencoder_cost to give the combined cost of the whole model # then run this cell. # Initialize the stack using the parameters learned stack = [dict() for i in range(2)] stack[0]['w'] = sae1_opt_theta[0:hidden_size_L1 * input_size].reshape(hidden_size_L1, input_size) stack[0]['b'] = sae1_opt_theta[2 * hidden_size_L1 * input_size:2 * hidden_size_L1 * input_size + hidden_size_L1] stack[1]['w'] = sae2_opt_theta[0:hidden_size_L1 * hidden_size_L2].reshape(hidden_size_L2, hidden_size_L1) stack[1]['b'] = sae2_opt_theta[2 * hidden_size_L1 * hidden_size_L2:2 * hidden_size_L1 * hidden_size_L2 + hidden_size_L2] # Initialize the parameters for the deep model (stack_params, net_config) = stacked_autoencoder.stack2params(stack) stacked_autoencoder_theta = np.concatenate((softmax_theta.flatten(), stack_params)) J = lambda x: stacked_autoencoder.stacked_autoencoder_cost(x, input_size, hidden_size_L2, num_classes, net_config, lambda_, train_images, train_labels) options_ = {'maxiter': 400, 'disp': True} result = scipy.optimize.minimize(J, stacked_autoencoder_theta, method='L-BFGS-B', jac=True, options=options_) stacked_autoencoder_opt_theta = result.x print result # ====================================================================== # STEP 6: Test
# Implement the stacked_ae_cost to give the combined cost of the whole model then run this cell. # Initialize the stack using the parameters learned n_stack = 2 # Two layers stack = [{} for i in range(n_stack)] stack[0]['w'] = sae1_opt_theta[0:hidden_size_L1*input_size].reshape((hidden_size_L1, input_size)) stack[0]['b'] = sae1_opt_theta[2*hidden_size_L1*input_size: 2*hidden_size_L1*input_size + hidden_size_L1] stack[1]['w'] = sae2_opt_theta[0:hidden_size_L2*hidden_size_L1].reshape((hidden_size_L2, hidden_size_L1)) stack[1]['b'] = sae2_opt_theta[2*hidden_size_L2*hidden_size_L1: 2*hidden_size_L2*hidden_size_L1 + hidden_size_L2] # Initialize the parameters for the deep model stack_params, net_config = stack2params(stack) stacked_ae_theta = np.concatenate((softmax_opt_theta, stack_params)) # Instructions: Train the deep network, hidden size here refers to the # dimension of the input to the classifier, which corresponds # to "hidden_size_L2". J = lambda theta : stacked_ae_cost(theta, input_size, hidden_size_L2, n_classes, net_config, lambda_, train_data, train_labels) #check_stacked_ae_cost() # Verify the correctness # Find out the optimal theta options = {'maxiter': maxiter, 'disp': True} results = scipy.optimize.minimize(J, stacked_ae_theta, method='L-BFGS-B', jac=True, options=options) stacked_ae_opt_theta = results['x']
result = optimize.minimize(softmaxCostCallback, thetaParam, method='L-BFGS-B', jac=True, options=softmax_options) saeSoftmaxOptTheta = result.x[0:numClasses*hiddenSizeL2] save(saeSoftmaxOptThetaFilename, saeSoftmaxOptTheta) # Finetune softmax model stack = [stacked_autoencoder.Layer(1), stacked_autoencoder.Layer(2)] stack[0].W = sae1OptTheta[0:hiddenSizeL1*inputSize].reshape(hiddenSizeL1, inputSize) stack[0].b = sae1OptTheta[2*hiddenSizeL1*inputSize:2*hiddenSizeL1*inputSize+hiddenSizeL1] stack[1].W = sae2OptTheta[0:hiddenSizeL2*hiddenSizeL1].reshape(hiddenSizeL2, hiddenSizeL1) stack[1].b = sae2OptTheta[2*hiddenSizeL2*hiddenSizeL1:2*hiddenSizeL2*hiddenSizeL1+hiddenSizeL2] (stackParams, netConfig) = stacked_autoencoder.stack2params(stack) stackedAETheta = concatenate([saeSoftmaxOptTheta, stackParams]) saeOptThetaFilename = results_dir + 'saeOptTheta.npy' if os.path.exists(saeOptThetaFilename): stackedAEOptTheta = load(saeOptThetaFilename) else: def stackedAutoencoderCostCallback(x): return stacked_autoencoder.cost(x, inputSize, hiddenSizeL2, numClasses, netConfig, lambdaParam, trainData, trainLabels, corruptionLevel) result = optimize.minimize(stackedAutoencoderCostCallback, stackedAETheta, method='L-BFGS-B', jac=True, options=options) stackedAEOptTheta = result.x save(saeOptThetaFilename, stackedAEOptTheta)