Ejemplo n.º 1
0
def test_sae_cost():
    threshold = 1e-9 * (num_samples / 50.0)
    theta = sparse_autoencoder.initialize_params(hidden_size, visible_size)

    sae_cost = partial(base_sae_cost, weight_decay=weight_decay, beta=beta)
    cost, grad, ngrad = check_grad(sae_cost, theta, threshold)

    # test that if gradient is wrong, we fail
    bad_grad = np.array(grad)
    bad_grad[2] = 1000
    assert diff_grad(ngrad, bad_grad) > threshold
    bad_grad2 = 2 * np.array(grad)
    assert diff_grad(ngrad, bad_grad2) > threshold

    # test that weight params actually do something
    if weight_decay > 0:
        noweight_sae_cost = partial(base_sae_cost, weight_decay=0, beta=beta)
        noweight_cost, noweight_grad, _ = check_grad(noweight_sae_cost, theta, threshold)
        print "noweight cost:", noweight_cost
        diff = diff_grad(grad, noweight_grad)
        print "noweight diff:", diff
        assert diff > threshold

    # test that sparsity works
    if beta > 0:
        nosparsity_sae_cost = partial(base_sae_cost, weight_decay=weight_decay, beta=0)
        nosparsity_cost, nosparsity_grad, _ = check_grad(nosparsity_sae_cost, theta, threshold)
        print "nosparsity cost:", nosparsity_cost
        diff = diff_grad(grad, nosparsity_grad)
        print "nosparsity diff:", diff
        assert diff > threshold
Ejemplo n.º 2
0
    # Network Architecture 
    visible_size = data.shape[1]
    hidden_size = 300
    
    # Training params
    weight_decay = 3e-3
    sparsity_param = 0.1
    beta = 3
    max_iter = 500            # Maximum number of iterations of L-BFGS to run 

    # Get the data
    num_samples = data.shape[0]
    

    # set up L-BFGS args
    theta = sparse_autoencoder.initialize_params(hidden_size, visible_size)
    sae_cost = partial(sparse_autoencoder.cost,
                        visible_size=visible_size, 
                        hidden_size=hidden_size,
                        weight_decay = weight_decay,
                        beta=beta,
                        sparsity_param=sparsity_param,
                        data=data.T)

    # Train!
    trained, cost, d = scipy.optimize.lbfgsb.fmin_l_bfgs_b(sae_cost, 
                                                           theta, 
                                                           maxfun=max_iter, 
                                                           m=100,
                                                           factr=1.0,
                                                           pgtol=1e-100,
Ejemplo n.º 3
0
    softmax_weight_decay = 1e-4
    l2_weight_decay = 3e-3
    l3_weight_decay = 3e-3
    sparsity_param = 0.1
    beta = 3
    max_iter = 400
    num_samples = 1000000

    get_data = sample_images.get_mnist_data
    train_patches, train_labels = get_data('../data/mnist.pkl.gz',
                                           train=True,
                                           num_samples=num_samples)

    print 'will train layer 2 model'
    # set up L-BFGS args
    theta = sparse_autoencoder.initialize_params(hidden_size, visible_size)
    sae_cost = partial(sparse_autoencoder.cost,
                       visible_size=visible_size,
                       hidden_size=hidden_size,
                       weight_decay=l2_weight_decay,
                       beta=beta,
                       sparsity_param=sparsity_param,
                       data=train_patches)

    # Train!
    l2_model, cost, d = scipy.optimize.lbfgsb.fmin_l_bfgs_b(sae_cost,
                                                            theta,
                                                            maxfun=max_iter,
                                                            m=1,
                                                            factr=10.0,
                                                            pgtol=1e-8,
# Sparsity parameter (desired average activation of each hidden layer neuron)
ρ = 0.1
# Weight decay parameter
λ = 3e-3
# Weight of sparsity penalty term
β = 3

############################################################################################
############################################################################################

############################################################################################
# =============================== Train ================================================== #
############################################################################################

# Initialize parameters of the model
θ = sparse_autoencoder.initialize_params(hidden_size, input_size)
# Declare cost function
J = lambda θ: sparse_autoencoder.calc_cost_n_gradient(
    θ, input_size, hidden_size, λ, ρ, β, images)
# Set training options
options_ = {'maxiter': 1000, 'disp': True}
# Minimize cost function J by modifying parameters θ using L-BFGS-B optimization algo
result = scipy.optimize.minimize(J,
                                 θ,
                                 method='L-BFGS-B',
                                 jac=True,
                                 options=options_)
# Get optimized parameter vector
opt_θ = result.x

print(result)