def ex(inputs): inputs = utils.remove_dc(inputs) inputs, zca = utils.zca_white(inputs, 0.1) batches = data.BatchIterator(inputs, 100) num_vis = inputs.shape[1] num_hid = 400 epochs = 100 momentum = 0 initial_params = grbm.initial_params(num_hid, num_vis, 0.001, 1.0) neg_free_energy_grad = functools.partial(grbm.neg_free_energy_grad, learn_sigma=False) def f(params, inputs): return rbm.cd(params, inputs, grbm.sample_h_noisy_relu, grbm.sample_v, neg_free_energy_grad) learning_rate = 0.005 output_dir = utils.make_output_directory(OUTPUT_PATH) save_params = parameters.save_hook(output_dir) error_history = [] sparsity_history = [] def post_epoch(*args): W_norm = utils.rescale(args[0].W) utils.save_image(utils.tile(W_norm), os.path.join(output_dir, ('w%i.png' % args[1]))) # Estimate sparsity from subset of data. h_mean = grbm.sample_h_noisy_relu(args[0], inputs[0:5000], True)[1] mean_activation = np.mean(h_mean > 0) print 'approx mean activation: %f' % mean_activation # The callback from optimize.sgd needs modifying so that it # passes the reconstrcution error as an argument to make this # work. (This was used when I did the original experiments.) # error_history.append(args[2]) sparsity_history.append(mean_activation) save_params(args[0], args[1]) params = optimize.sgd(f, initial_params, batches, epochs, learning_rate, momentum, post_epoch=post_epoch) with(open(os.path.join(output_dir, 'history.pickle'), 'wb')) as f: pickle.dump(error_history, f, -1) pickle.dump(sparsity_history, f, -1) return params, error_history, sparsity_history
def ex1(inputs): """ Gaussian/Bernoulli RBM. """ # Can learns edge detector like filters although learning is quite # slow and learning is very sensitive to meta-parameter selection. # Momentum seems neccesary, without it it's difficult to learn # anything. # When learning on whitened data setting the fudge factor to # something around 0.1 was important. Setting it much too much # lower causes point filters to be learned. # Learning does happen if you don't use whitening, but the # features tend to be less localized when compared to the learned # with whitening. Interestingly the reconstruction error is lower # without whitening, but I suspect I'm comparing apples with # oranges there. # With only 25 hidden units I couldn't find a way to learn # anything much. Contrast this to an autoencoder which does seem # to learn filters in a similar situation. # error (100 epochs) = 25.492607 # error (500 epochs) = 24.096789 # See ex1.png. inputs = utils.remove_dc(inputs) inputs, zca = utils.zca_white(inputs, 0.1) batches = data.BatchIterator(inputs, 50) num_vis = 64 num_hid = 100 epochs = 500 initial_params = grbm.initial_params(num_hid, num_vis, 0.05) sample_v = functools.partial(grbm.sample_v, add_noise=False) neg_free_energy_grad = functools.partial(grbm.neg_free_energy_grad, learn_sigma=False) def f(params, inputs): return rbm.cd(params, inputs, grbm.sample_h, sample_v, neg_free_energy_grad) learning_rate = 0.01 momentum = meta.step(0.5, 0.9, 5) return optimize.sgd(f, initial_params, batches, epochs, learning_rate, momentum)
def ex3(inputs): """ Gaussian/NReLU RBM with learned visible variances. """ # I found it essential to add noise/sample from the visible units # during reconstruction. If I don't do this the variances increase # at each epoch (I'd expect them to decrease during learning from # their initial value of one) as does the error. # This result was obtained by running SGD without using momentum. # The default momentum schedule set-up a big oscillation which # caused the error to increase over a few epochs after which we # learning appeared to be stuck out on a plateau. More modest # schedules (such as 0.1 for the first 10 epochs, 0.2 thereafter) # allow learning but they don't result in any improvement in # error. # The variances learned are all very similar. Their mean is 0.39, # their standard deviation is 0.04. # A quick test suggests that smaller initial weights lead to a # slightly lower reconstruction error. # error (100 epochs) = 7.401834 # error (500 epochs) = 7.245722 # See ex3.png. inputs = utils.remove_dc(inputs) inputs, zca = utils.zca_white(inputs, 0.1) batches = data.BatchIterator(inputs, 50) num_vis = 64 num_hid = 100 epochs = 500 initial_params = grbm.initial_params(num_hid, num_vis, 0.05) def f(params, inputs): return rbm.cd(params, inputs, grbm.sample_h_noisy_relu, grbm.sample_v, grbm.neg_free_energy_grad) learning_rate = 0.01 momentum = 0 return optimize.sgd(f, initial_params, batches, epochs, learning_rate, momentum)
def ex(inputs): inputs = zero_mean(inputs) inputs, zca = utils.zca_white(inputs, 0.1) batches = data.BatchIterator(inputs, 100) num_vis = inputs.shape[1] num_hid = 400 epochs = 100 momentum = 0 initial_params = grbm.initial_params(num_hid, num_vis, 0.001, 0.4) neg_free_energy_grad = functools.partial(grbm.neg_free_energy_grad, learn_sigma=False) def f(params, inputs): return rbm.cd(params, inputs, grbm.sample_h_noisy_relu, grbm.sample_v, neg_free_energy_grad) learning_rate = 0.005 output_dir = utils.make_output_directory(OUTPUT_PATH) save_params = parameters.save_hook(output_dir) def post_epoch(*args): save_params(*args) # Save visualization weights. W_norm = utils.rescale(args[0].W) img = Image.fromarray( np.uint8(utils.tile(W_norm, channel_count=3) * 255)) img.save(os.path.join(output_dir, ('w%i.png' % args[1]))) # Estimate sparsity from subset of data. h_mean = grbm.sample_h_noisy_relu(args[0], inputs[0:5000], True)[1] mean_activation = np.mean(h_mean > 0) print 'approx mean activation: %f' % mean_activation return optimize.sgd(f, initial_params, batches, epochs, learning_rate, momentum, post_epoch=post_epoch)
def ex2(inputs): """ Gaussian/NReLU RBM. """ # Using noisy rectified linear units for the visibles speeds up # learning dramatically. The reconstruction error after a single # epoch is lower (21.6986) than after 500 epochs in ex1. # The filters learned have less noisy backgrounds than those # learned in ex1. # error (100 epochs) = 15.941531 # error (500 epochs) = 15.908922 # See ex2.png. inputs = utils.remove_dc(inputs) inputs, zca = utils.zca_white(inputs, 0.1) batches = data.BatchIterator(inputs, 50) num_vis = 64 num_hid = 100 epochs = 500 initial_params = grbm.initial_params(num_hid, num_vis, 0.05) sample_v = functools.partial(grbm.sample_v, add_noise=False) neg_free_energy_grad = functools.partial(grbm.neg_free_energy_grad, learn_sigma=False) def f(params, inputs): return rbm.cd(params, inputs, grbm.sample_h_noisy_relu, sample_v, neg_free_energy_grad) learning_rate = 0.01 momentum = meta.step(0.5, 0.9, 5) return optimize.sgd(f, initial_params, batches, epochs, learning_rate, momentum)
inputs, targets, labels = data.balance_classes(inputs, labels, mnist.NUM_CLASSES) n = 54200 inputs = inputs[0:n] targets = targets[0:n] labels = labels[0:n] # These layers differ slightly from those in the paper. My main # motivation is to avoid having a square weight matrix between hidden # layers to avoid matrix transpose errors. num_vis = inputs.shape[1] num_hid1 = 529 # 23^2 num_hid2 = 484 # 22^2 num_top = 1936 # 44^2 batches = data.BatchIterator(inputs) initial_params = rbm.initial_params(num_hid1, num_vis) params = sgd(rbm_obj, initial_params, batches, momentum) inputs = logistic(inputs.dot(params.W.T) + params.h_bias) batches = data.BatchIterator(inputs) initial_params = rbm.initial_params(num_hid2, num_hid1) params = sgd(rbm_obj, initial_params, batches, momentum) inputs = logistic(inputs.dot(params.W.T) + params.h_bias) batches = data.BatchIterator(np.hstack((targets, inputs))) initial_params = rbm.initial_params(num_top, num_hid2 + mnist.NUM_CLASSES) def post_epoch(*args): print 'Mean hidden activation prob. is %.2f' % pcd.q
from ml import mnist, utils, optimize, rbm, data, parameters, meta from ml import regularization as reg print '***eval***' DATA_PATH = os.path.expanduser('~/Development/ml/datasets') OUTPUT_PATH = os.path.expanduser('~/Development/ml/output') np.random.seed(32) # data inputs = mnist.load_inputs(DATA_PATH, mnist.TRAIN_INPUTS) labels = mnist.load_labels(DATA_PATH, mnist.TRAIN_LABELS) inputs = data.balance_classes(inputs, labels, mnist.NUM_CLASSES).inputs batches = data.BatchIterator(inputs[0:54200]) # IDEA: Have a bit of code which loads the current file (as text) and # dumps and code between two specific comments into a meta.txt file in # the output directory. # meta num_vis = inputs.shape[1] num_hid = 49 epochs = 10 k = 1 # applies to cd only initial_params = rbm.initial_params(num_hid, num_vis) weight_constraint = None # weight_decay = reg.l2(0.0002) # weight_decay = None # momentum = meta.linear(0.5, 0.9, 10)
import os.path import functools import numpy as np from ml import softmax, optimize, utils, mnist, data, meta from ml import regularization as reg DATA_PATH = os.path.expanduser('~/Development/ml/datasets') train, valid = mnist.load_training_dataset(DATA_PATH) batches = data.BatchIterator(train) num_classes = mnist.NUM_CLASSES num_dims = train.inputs.shape[1] initial_params = softmax.initial_params(num_classes, num_dims) weight_decay = None # reg.l2(1e-4) epochs = 50 learning_rate = 0.1 momentum = 0 def f(params, data): return softmax.cost(params, data.inputs, data.targets, weight_decay) train_accuracy = functools.partial(softmax.accuracy, train.inputs, train.labels) valid_accuracy = functools.partial(softmax.accuracy, valid.inputs, valid.labels) train_error = utils.call_func_hook(train_accuracy)
return error, grad np.random.seed(1234) # data inputs = mnist.load_inputs(DATA_PATH, mnist.TRAIN_INPUTS) labels = mnist.load_labels(DATA_PATH, mnist.TRAIN_LABELS) inputs, targets, labels = data.balance_classes(inputs, labels, mnist.NUM_CLASSES) n = 54200 inputs = inputs[0:n] targets = targets[0:n] labels = labels[0:n] dataset = data.dataset(inputs, targets, labels) batches = data.BatchIterator(dataset) # meta epochs = 50 learning_rate = 0.1 cd_k = 15 weight_decay = reg.l2(0.0002) momentum = 0 #0.1 # Load and stack params from initial pre-training. # initial_params = dbn.params_from_rbms([parameters.load(OUTPUT_PATH, t) # for t in (1358445776, 1358451846, 1358456203)]) # Params after first 50 epochs of fine tuning. (lr 0.1, p 0.0, cd_k=10) initial_params = parameters.load(OUTPUT_PATH, timestamp=1358541318)