Esempio n. 1
0
def check_stacked_autoencoder():
    """
    # Check the gradients for the stacked autoencoder
    #
    # In general, we recommend that the creation of such files for checking
    # gradients when you write new cost functions.
    #

    :return:
    """
    ## Setup random data / small model

    input_size = 64
    hidden_size_L1 = 36
    hidden_size_L2 = 25
    lambda_ = 0.01
    data = np.random.randn(input_size, 10)
    labels = np.random.randint(4, size=10)
    num_classes = 4

    stack = [dict() for i in range(2)]
    stack[0]['w'] = 0.1 * np.random.randn(hidden_size_L1, input_size)
    stack[0]['b'] = np.random.randn(hidden_size_L1)
    stack[1]['w'] = 0.1 * np.random.randn(hidden_size_L2, hidden_size_L1)
    stack[1]['b'] = np.random.randn(hidden_size_L2)
    softmax_theta = 0.005 * np.random.randn(hidden_size_L2 * num_classes)

    params, net_config = stacked_autoencoder.stack2params(stack)

    stacked_theta = np.concatenate((softmax_theta, params))

    cost, grad = stacked_autoencoder.stacked_autoencoder_cost(stacked_theta, input_size,
                                                              hidden_size_L2, num_classes,
                                                              net_config, lambda_, data, labels)

    # Check that the numerical and analytic gradients are the same
    J = lambda x: stacked_autoencoder.stacked_autoencoder_cost(x, input_size, hidden_size_L2,
                                                               num_classes, net_config, lambda_,
                                                               data, labels)
    num_grad = compute_gradient(J, stacked_theta)

    print num_grad, grad
    print "The above two columns you get should be very similar.\n" \
          "(Left-Your Numerical Gradient, Right-Analytical Gradient)\n"

    diff = np.linalg.norm(num_grad - grad) / np.linalg.norm(num_grad + grad)
    print diff
    print "Norm of the difference between numerical and analytical num_grad (should be < 1e-9)\n"
Esempio n. 2
0

# Initialize the stack using the parameters learned
stack = [dict() for i in range(2)]
stack[0]['w'] = sae1_opt_theta[0:hidden_size_L1 * input_size].reshape(hidden_size_L1, input_size)
stack[0]['b'] = sae1_opt_theta[2 * hidden_size_L1 * input_size:2 * hidden_size_L1 * input_size + hidden_size_L1]
stack[1]['w'] = sae2_opt_theta[0:hidden_size_L1 * hidden_size_L2].reshape(hidden_size_L2, hidden_size_L1)
stack[1]['b'] = sae2_opt_theta[2 * hidden_size_L1 * hidden_size_L2:2 * hidden_size_L1 * hidden_size_L2 + hidden_size_L2]

# Initialize the parameters for the deep model
(stack_params, net_config) = stacked_autoencoder.stack2params(stack)

stacked_autoencoder_theta = np.concatenate((softmax_theta.flatten(), stack_params))

J = lambda x: stacked_autoencoder.stacked_autoencoder_cost(x, input_size, hidden_size_L2,
                                                           num_classes, net_config, lambda_,
                                                           train_images, train_labels)

options_ = {'maxiter': 400, 'disp': True}
result = scipy.optimize.minimize(J, stacked_autoencoder_theta, method='L-BFGS-B', jac=True, options=options_)
stacked_autoencoder_opt_theta = result.x

print result

# ======================================================================
# STEP 6: Test

test_images = load_MNIST.load_MNIST_images('t10k-images.idx3-ubyte')
test_labels = load_MNIST.load_MNIST_labels('t10k-labels.idx1-ubyte')

# Two auto encoders without fine tuning