# Load MNIST database files # Load MNIST database files train_data = load_MNIST_images('data/mnist/train-images-idx3-ubyte') train_labels = load_MNIST_labels('data/mnist/train-labels-idx1-ubyte') """ STEP 2: Train the first sparse autoencoder This trains the first sparse autoencoder on the unlabelled STL training images. If you've correctly implemented sparse_autoencoder_cost, you don't need to change anything here. """ # Randomly initialize the parameters sae1_theta = initialize_parameters(hidden_size_L1, input_size) # Instructions: Train the first layer sparse autoencoder, this layer has # an hidden size of "hidden_size_L1" # You should store the optimal parameters in sae1_opt_theta J = lambda theta : sparse_autoencoder_cost(theta, input_size, hidden_size_L1, lambda_, sparsity_param, beta, train_data) options = {'maxiter': maxiter, 'disp': True} results = scipy.optimize.minimize(J, sae1_theta, method='L-BFGS-B', jac=True, options=options) sae1_opt_theta = results['x'] print("Show the results of optimization as following.\n") print(results)
image = display_network( patches[:, [np.random.randint(n_patches) for i in range(200)]]) plt.figure() plt.imsave('sparse_autoencoder_minist_patches.png', image, cmap=plt.cm.gray) plt.imshow(image, cmap=plt.cm.gray) visible_size = patches.shape[0] # Number of input units hidden_size = 196 # Number of hidden units weight_decay_param = 3e-3 # Weight decay parameter, which is the lambda in lecture notes beta = 3 # Weight of sparsity penalty term sparsity_param = 0.1 # Desired average activation of the hidden units. # Randomly initialize the fitting parameters theta = initialize_parameters(hidden_size, visible_size) J = lambda theta: sparse_autoencoder_cost( theta, visible_size, hidden_size, weight_decay_param, sparsity_param, beta, patches) # The number of maximun iterations is set as 400, # which is good enough to get reasonable results. options = {'maxiter': 400, 'disp': True, 'gtol': 1e-5, 'ftol': 2e-9} results = scipy.optimize.minimize(J, theta, method='L-BFGS-B', jac=True, options=options) opt_theta = results['x']
beta = 5 # weight of sparsity penalty term epsilon = 0.1 # epsilon for ZCA whitening """ STEP 1: Create and modify sparse_autoencoder_linear_cost to use a linear decoder, and check gradients """ # To speed up gradient checking, we will use a reduced network and some # dummy patches debug = False if debug: debug_hidden_size = 5 debug_visible_size = 8 patches = np.random.rand(8, 10) theta = initialize_parameters(debug_hidden_size, debug_visible_size) cost, grad = sparse_autoencoder_linear_cost(theta, debug_visible_size, debug_hidden_size, lambda_, sparsity_param, beta, patches) # Check that the numerical and analytic gradients are the same J = lambda theta: sparse_autoencoder_linear_cost( theta, debug_visible_size, debug_hidden_size, lambda_, sparsity_param, beta, patches)[0] nume_grad = compute_numerical_gradient(J, theta) # Use this to visually compare the gradients side by side for i in range(grad.size): print("{0:20.12f} {1:20.12f}".format(nume_grad[i], grad[i]))
unlabeled_data = mnist_data[:, unlabeled_set] # Output Some Statistics print('# examples in unlabeled set: {}'.format(unlabeled_data.shape[1])) print('# examples in supervised training set: {}'.format(train_data.shape[1])) print('# examples in supervised testing set: {}\n'.format(test_data.shape[1])) """ STEP 2: Train the sparse autoencoder This trains the sparse autoencoder on the unlabeled training images. """ # Randomly initialize the parameters theta = initialize_parameters(hidden_size, input_size) # Find optimal theta by running the sparse autoencoder on # unlabeled training images J = lambda theta : sparse_autoencoder_cost(theta, input_size, hidden_size, lambda_, sparsity_param, beta, unlabeled_data) options = {'maxiter': maxiter, 'disp': True} results = scipy.optimize.minimize(J, theta, method='L-BFGS-B', jac=True, options=options) opt_theta = results['x'] print("Show the results of optimization as following.\n") print(results) # Visualize weights W1 = opt_theta[0:hidden_size*input_size].reshape((hidden_size, input_size))
test_labels = mnist_labels[test_set] unlabeled_data = mnist_data[:, unlabeled_set] # Output Some Statistics print('# examples in unlabeled set: {}'.format(unlabeled_data.shape[1])) print('# examples in supervised training set: {}'.format(train_data.shape[1])) print('# examples in supervised testing set: {}\n'.format(test_data.shape[1])) """ STEP 2: Train the sparse autoencoder This trains the sparse autoencoder on the unlabeled training images. """ # Randomly initialize the parameters theta = initialize_parameters(hidden_size, input_size) # Find optimal theta by running the sparse autoencoder on # unlabeled training images J = lambda theta: sparse_autoencoder_cost(theta, input_size, hidden_size, lambda_, sparsity_param, beta, unlabeled_data) options = {'maxiter': maxiter, 'disp': True} results = scipy.optimize.minimize(J, theta, method='L-BFGS-B', jac=True, options=options) opt_theta = results['x']
epsilon = 0.1 # epsilon for ZCA whitening """ STEP 1: Create and modify sparse_autoencoder_linear_cost to use a linear decoder, and check gradients """ # To speed up gradient checking, we will use a reduced network and some # dummy patches debug = False if debug: debug_hidden_size = 5 debug_visible_size = 8 patches = np.random.rand(8, 10) theta = initialize_parameters(debug_hidden_size, debug_visible_size) cost, grad = sparse_autoencoder_linear_cost(theta, debug_visible_size, debug_hidden_size, lambda_, sparsity_param, beta, patches) # Check that the numerical and analytic gradients are the same J = lambda theta : sparse_autoencoder_linear_cost(theta, debug_visible_size, debug_hidden_size, lambda_, sparsity_param, beta, patches)[0] nume_grad = compute_numerical_gradient(J, theta) # Use this to visually compare the gradients side by side for i in range(grad.size): print("{0:20.12f} {1:20.12f}".format(nume_grad[i], grad[i])) print('The above two columns you get should be very similar.\n(Left-Your Numerical Gradient, Right-Analytical Gradient)\n')
# Randomly sample 200 patches and save as an image file image = display_network(patches[:, [np.random.randint(n_patches) for i in range(200)]]) plt.figure() plt.imsave('sparse_autoencoder_minist_patches.png', image, cmap=plt.cm.gray) plt.imshow(image, cmap=plt.cm.gray) visible_size = patches.shape[0] # Number of input units hidden_size = 196 # Number of hidden units weight_decay_param = 3e-3 # Weight decay parameter, which is the lambda in lecture notes beta = 3 # Weight of sparsity penalty term sparsity_param = 0.1 # Desired average activation of the hidden units. # Randomly initialize the fitting parameters theta = initialize_parameters(hidden_size, visible_size) J = lambda theta : sparse_autoencoder_cost(theta, visible_size, hidden_size, weight_decay_param, sparsity_param, beta, patches) # The number of maximun iterations is set as 400, # which is good enough to get reasonable results. options = {'maxiter': 400, 'disp': True, 'gtol': 1e-5, 'ftol': 2e-9} results = scipy.optimize.minimize(J, theta, method='L-BFGS-B', jac=True, options=options) opt_theta = results['x'] print("Show the results of optimization as following.\n") print(results) # Visualization W1 = opt_theta[0:hidden_size*visible_size].reshape((hidden_size, visible_size))