# Load MNIST database files
# Load MNIST database files
train_data   = load_MNIST_images('data/mnist/train-images-idx3-ubyte')
train_labels = load_MNIST_labels('data/mnist/train-labels-idx1-ubyte')


"""
STEP 2: Train the first sparse autoencoder

  This trains the first sparse autoencoder on the unlabelled STL training images.
  If you've correctly implemented sparse_autoencoder_cost, you don't need
  to change anything here.
"""

# Randomly initialize the parameters
sae1_theta = initialize_parameters(hidden_size_L1, input_size)

#  Instructions: Train the first layer sparse autoencoder, this layer has
#                an hidden size of "hidden_size_L1"
#                You should store the optimal parameters in sae1_opt_theta

J = lambda theta : sparse_autoencoder_cost(theta, input_size, hidden_size_L1, lambda_, sparsity_param, beta, train_data)

options = {'maxiter': maxiter, 'disp': True}

results = scipy.optimize.minimize(J, sae1_theta, method='L-BFGS-B', jac=True, options=options)
sae1_opt_theta = results['x']

print("Show the results of optimization as following.\n")
print(results)
Beispiel #2
0
image = display_network(
    patches[:, [np.random.randint(n_patches) for i in range(200)]])

plt.figure()
plt.imsave('sparse_autoencoder_minist_patches.png', image, cmap=plt.cm.gray)
plt.imshow(image, cmap=plt.cm.gray)

visible_size = patches.shape[0]  # Number of input units
hidden_size = 196  # Number of hidden units

weight_decay_param = 3e-3  # Weight decay parameter, which is the lambda in lecture notes
beta = 3  # Weight of sparsity penalty term
sparsity_param = 0.1  # Desired average activation of the hidden units.

#  Randomly initialize the fitting parameters
theta = initialize_parameters(hidden_size, visible_size)

J = lambda theta: sparse_autoencoder_cost(
    theta, visible_size, hidden_size, weight_decay_param, sparsity_param, beta,
    patches)

# The number of maximun iterations is set as 400,
# which is good enough to get reasonable results.
options = {'maxiter': 400, 'disp': True, 'gtol': 1e-5, 'ftol': 2e-9}
results = scipy.optimize.minimize(J,
                                  theta,
                                  method='L-BFGS-B',
                                  jac=True,
                                  options=options)
opt_theta = results['x']
beta = 5  # weight of sparsity penalty term

epsilon = 0.1  # epsilon for ZCA whitening
"""
  STEP 1: Create and modify sparse_autoencoder_linear_cost to use a linear decoder,
          and check gradients
"""

# To speed up gradient checking, we will use a reduced network and some
# dummy patches
debug = False
if debug:
    debug_hidden_size = 5
    debug_visible_size = 8
    patches = np.random.rand(8, 10)
    theta = initialize_parameters(debug_hidden_size, debug_visible_size)

    cost, grad = sparse_autoencoder_linear_cost(theta, debug_visible_size,
                                                debug_hidden_size, lambda_,
                                                sparsity_param, beta, patches)

    # Check that the numerical and analytic gradients are the same
    J = lambda theta: sparse_autoencoder_linear_cost(
        theta, debug_visible_size, debug_hidden_size, lambda_, sparsity_param,
        beta, patches)[0]

    nume_grad = compute_numerical_gradient(J, theta)

    # Use this to visually compare the gradients side by side
    for i in range(grad.size):
        print("{0:20.12f} {1:20.12f}".format(nume_grad[i], grad[i]))
Beispiel #4
0
unlabeled_data = mnist_data[:, unlabeled_set]

# Output Some Statistics
print('# examples in unlabeled set: {}'.format(unlabeled_data.shape[1]))
print('# examples in supervised training set: {}'.format(train_data.shape[1]))
print('# examples in supervised testing set: {}\n'.format(test_data.shape[1]))


"""
STEP 2: Train the sparse autoencoder

  This trains the sparse autoencoder on the unlabeled training images.
"""

#  Randomly initialize the parameters
theta = initialize_parameters(hidden_size, input_size)

#  Find optimal theta by running the sparse autoencoder on
#  unlabeled training images
J = lambda theta : sparse_autoencoder_cost(theta, input_size, hidden_size,
    lambda_, sparsity_param, beta, unlabeled_data)

options = {'maxiter': maxiter, 'disp': True}
results = scipy.optimize.minimize(J, theta, method='L-BFGS-B', jac=True, options=options)
opt_theta = results['x']

print("Show the results of optimization as following.\n")
print(results)

# Visualize weights
W1 = opt_theta[0:hidden_size*input_size].reshape((hidden_size, input_size))
test_labels = mnist_labels[test_set]

unlabeled_data = mnist_data[:, unlabeled_set]

# Output Some Statistics
print('# examples in unlabeled set: {}'.format(unlabeled_data.shape[1]))
print('# examples in supervised training set: {}'.format(train_data.shape[1]))
print('# examples in supervised testing set: {}\n'.format(test_data.shape[1]))
"""
STEP 2: Train the sparse autoencoder

  This trains the sparse autoencoder on the unlabeled training images.
"""

#  Randomly initialize the parameters
theta = initialize_parameters(hidden_size, input_size)

#  Find optimal theta by running the sparse autoencoder on
#  unlabeled training images
J = lambda theta: sparse_autoencoder_cost(theta, input_size, hidden_size,
                                          lambda_, sparsity_param, beta,
                                          unlabeled_data)

options = {'maxiter': maxiter, 'disp': True}
results = scipy.optimize.minimize(J,
                                  theta,
                                  method='L-BFGS-B',
                                  jac=True,
                                  options=options)
opt_theta = results['x']
epsilon = 0.1          # epsilon for ZCA whitening

"""
  STEP 1: Create and modify sparse_autoencoder_linear_cost to use a linear decoder,
          and check gradients
"""

# To speed up gradient checking, we will use a reduced network and some
# dummy patches
debug = False
if debug:
    debug_hidden_size = 5
    debug_visible_size = 8
    patches = np.random.rand(8, 10)
    theta = initialize_parameters(debug_hidden_size, debug_visible_size)

    cost, grad = sparse_autoencoder_linear_cost(theta,
                     debug_visible_size, debug_hidden_size, lambda_, sparsity_param, beta, patches)

    # Check that the numerical and analytic gradients are the same
    J = lambda theta : sparse_autoencoder_linear_cost(theta,
                            debug_visible_size, debug_hidden_size, lambda_, sparsity_param, beta, patches)[0]

    nume_grad = compute_numerical_gradient(J, theta)

    # Use this to visually compare the gradients side by side
    for i in range(grad.size):
        print("{0:20.12f} {1:20.12f}".format(nume_grad[i], grad[i]))
    print('The above two columns you get should be very similar.\n(Left-Your Numerical Gradient, Right-Analytical Gradient)\n')
Beispiel #7
0
# Randomly sample 200 patches and save as an image file
image = display_network(patches[:, [np.random.randint(n_patches) for i in range(200)]])

plt.figure()
plt.imsave('sparse_autoencoder_minist_patches.png', image, cmap=plt.cm.gray)
plt.imshow(image, cmap=plt.cm.gray)

visible_size = patches.shape[0] # Number of input units
hidden_size = 196               # Number of hidden units

weight_decay_param = 3e-3       # Weight decay parameter, which is the lambda in lecture notes
beta = 3                        # Weight of sparsity penalty term
sparsity_param = 0.1            # Desired average activation of the hidden units.

#  Randomly initialize the fitting parameters
theta = initialize_parameters(hidden_size, visible_size)

J = lambda theta : sparse_autoencoder_cost(theta, visible_size, hidden_size, weight_decay_param, sparsity_param, beta, patches)

# The number of maximun iterations is set as 400,
# which is good enough to get reasonable results.
options = {'maxiter': 400, 'disp': True, 'gtol': 1e-5, 'ftol': 2e-9}
results = scipy.optimize.minimize(J, theta, method='L-BFGS-B', jac=True, options=options)
opt_theta = results['x']

print("Show the results of optimization as following.\n")
print(results)

# Visualization
W1 = opt_theta[0:hidden_size*visible_size].reshape((hidden_size, visible_size))