def sgd(f, initial_params, batches, momentum, hook=None): output_dir = utils.make_output_directory(OUTPUT_PATH) save_params = parameters.save_hook(output_dir) def post_epoch(*args): save_params(*args) if hook is not None: hook(*args) return optimize.sgd(f, initial_params, batches, epochs, learning_rate, momentum, post_epoch=post_epoch)
def ex(inputs): inputs = utils.remove_dc(inputs) inputs, zca = utils.zca_white(inputs, 0.1) batches = data.BatchIterator(inputs, 100) num_vis = inputs.shape[1] num_hid = 400 epochs = 100 momentum = 0 initial_params = grbm.initial_params(num_hid, num_vis, 0.001, 1.0) neg_free_energy_grad = functools.partial(grbm.neg_free_energy_grad, learn_sigma=False) def f(params, inputs): return rbm.cd(params, inputs, grbm.sample_h_noisy_relu, grbm.sample_v, neg_free_energy_grad) learning_rate = 0.005 output_dir = utils.make_output_directory(OUTPUT_PATH) save_params = parameters.save_hook(output_dir) error_history = [] sparsity_history = [] def post_epoch(*args): W_norm = utils.rescale(args[0].W) utils.save_image(utils.tile(W_norm), os.path.join(output_dir, ('w%i.png' % args[1]))) # Estimate sparsity from subset of data. h_mean = grbm.sample_h_noisy_relu(args[0], inputs[0:5000], True)[1] mean_activation = np.mean(h_mean > 0) print 'approx mean activation: %f' % mean_activation # The callback from optimize.sgd needs modifying so that it # passes the reconstrcution error as an argument to make this # work. (This was used when I did the original experiments.) # error_history.append(args[2]) sparsity_history.append(mean_activation) save_params(args[0], args[1]) params = optimize.sgd(f, initial_params, batches, epochs, learning_rate, momentum, post_epoch=post_epoch) with(open(os.path.join(output_dir, 'history.pickle'), 'wb')) as f: pickle.dump(error_history, f, -1) pickle.dump(sparsity_history, f, -1) return params, error_history, sparsity_history
def ex1(inputs): """ Gaussian/Bernoulli RBM. """ # Can learns edge detector like filters although learning is quite # slow and learning is very sensitive to meta-parameter selection. # Momentum seems neccesary, without it it's difficult to learn # anything. # When learning on whitened data setting the fudge factor to # something around 0.1 was important. Setting it much too much # lower causes point filters to be learned. # Learning does happen if you don't use whitening, but the # features tend to be less localized when compared to the learned # with whitening. Interestingly the reconstruction error is lower # without whitening, but I suspect I'm comparing apples with # oranges there. # With only 25 hidden units I couldn't find a way to learn # anything much. Contrast this to an autoencoder which does seem # to learn filters in a similar situation. # error (100 epochs) = 25.492607 # error (500 epochs) = 24.096789 # See ex1.png. inputs = utils.remove_dc(inputs) inputs, zca = utils.zca_white(inputs, 0.1) batches = data.BatchIterator(inputs, 50) num_vis = 64 num_hid = 100 epochs = 500 initial_params = grbm.initial_params(num_hid, num_vis, 0.05) sample_v = functools.partial(grbm.sample_v, add_noise=False) neg_free_energy_grad = functools.partial(grbm.neg_free_energy_grad, learn_sigma=False) def f(params, inputs): return rbm.cd(params, inputs, grbm.sample_h, sample_v, neg_free_energy_grad) learning_rate = 0.01 momentum = meta.step(0.5, 0.9, 5) return optimize.sgd(f, initial_params, batches, epochs, learning_rate, momentum)
def ex3(inputs): """ Gaussian/NReLU RBM with learned visible variances. """ # I found it essential to add noise/sample from the visible units # during reconstruction. If I don't do this the variances increase # at each epoch (I'd expect them to decrease during learning from # their initial value of one) as does the error. # This result was obtained by running SGD without using momentum. # The default momentum schedule set-up a big oscillation which # caused the error to increase over a few epochs after which we # learning appeared to be stuck out on a plateau. More modest # schedules (such as 0.1 for the first 10 epochs, 0.2 thereafter) # allow learning but they don't result in any improvement in # error. # The variances learned are all very similar. Their mean is 0.39, # their standard deviation is 0.04. # A quick test suggests that smaller initial weights lead to a # slightly lower reconstruction error. # error (100 epochs) = 7.401834 # error (500 epochs) = 7.245722 # See ex3.png. inputs = utils.remove_dc(inputs) inputs, zca = utils.zca_white(inputs, 0.1) batches = data.BatchIterator(inputs, 50) num_vis = 64 num_hid = 100 epochs = 500 initial_params = grbm.initial_params(num_hid, num_vis, 0.05) def f(params, inputs): return rbm.cd(params, inputs, grbm.sample_h_noisy_relu, grbm.sample_v, grbm.neg_free_energy_grad) learning_rate = 0.01 momentum = 0 return optimize.sgd(f, initial_params, batches, epochs, learning_rate, momentum)
def ex(inputs): inputs = zero_mean(inputs) inputs, zca = utils.zca_white(inputs, 0.1) batches = data.BatchIterator(inputs, 100) num_vis = inputs.shape[1] num_hid = 400 epochs = 100 momentum = 0 initial_params = grbm.initial_params(num_hid, num_vis, 0.001, 0.4) neg_free_energy_grad = functools.partial(grbm.neg_free_energy_grad, learn_sigma=False) def f(params, inputs): return rbm.cd(params, inputs, grbm.sample_h_noisy_relu, grbm.sample_v, neg_free_energy_grad) learning_rate = 0.005 output_dir = utils.make_output_directory(OUTPUT_PATH) save_params = parameters.save_hook(output_dir) def post_epoch(*args): save_params(*args) # Save visualization weights. W_norm = utils.rescale(args[0].W) img = Image.fromarray( np.uint8(utils.tile(W_norm, channel_count=3) * 255)) img.save(os.path.join(output_dir, ('w%i.png' % args[1]))) # Estimate sparsity from subset of data. h_mean = grbm.sample_h_noisy_relu(args[0], inputs[0:5000], True)[1] mean_activation = np.mean(h_mean > 0) print 'approx mean activation: %f' % mean_activation return optimize.sgd(f, initial_params, batches, epochs, learning_rate, momentum, post_epoch=post_epoch)
def ex(inputs): inputs = zero_mean(inputs) inputs, zca = utils.zca_white(inputs, 0.1) batches = data.BatchIterator(inputs, 100) num_vis = inputs.shape[1] num_hid = 400 epochs = 100 momentum = 0 initial_params = grbm.initial_params(num_hid, num_vis, 0.001, 0.4) neg_free_energy_grad = functools.partial(grbm.neg_free_energy_grad, learn_sigma=False) def f(params, inputs): return rbm.cd(params, inputs, grbm.sample_h_noisy_relu, grbm.sample_v, neg_free_energy_grad) learning_rate = 0.005 output_dir = utils.make_output_directory(OUTPUT_PATH) save_params = parameters.save_hook(output_dir) def post_epoch(*args): save_params(*args) # Save visualization weights. W_norm = utils.rescale(args[0].W) img = Image.fromarray(np.uint8(utils.tile(W_norm, channel_count=3) * 255)) img.save(os.path.join(output_dir, ('w%i.png' % args[1]))) # Estimate sparsity from subset of data. h_mean = grbm.sample_h_noisy_relu(args[0], inputs[0:5000], True)[1] mean_activation = np.mean(h_mean > 0) print 'approx mean activation: %f' % mean_activation return optimize.sgd(f, initial_params, batches, epochs, learning_rate, momentum, post_epoch=post_epoch)
def ex2(inputs): """ Gaussian/NReLU RBM. """ # Using noisy rectified linear units for the visibles speeds up # learning dramatically. The reconstruction error after a single # epoch is lower (21.6986) than after 500 epochs in ex1. # The filters learned have less noisy backgrounds than those # learned in ex1. # error (100 epochs) = 15.941531 # error (500 epochs) = 15.908922 # See ex2.png. inputs = utils.remove_dc(inputs) inputs, zca = utils.zca_white(inputs, 0.1) batches = data.BatchIterator(inputs, 50) num_vis = 64 num_hid = 100 epochs = 500 initial_params = grbm.initial_params(num_hid, num_vis, 0.05) sample_v = functools.partial(grbm.sample_v, add_noise=False) neg_free_energy_grad = functools.partial(grbm.neg_free_energy_grad, learn_sigma=False) def f(params, inputs): return rbm.cd(params, inputs, grbm.sample_h_noisy_relu, sample_v, neg_free_energy_grad) learning_rate = 0.01 momentum = meta.step(0.5, 0.9, 5) return optimize.sgd(f, initial_params, batches, epochs, learning_rate, momentum)
g = itertools.islice(g, count) g = itertools.imap(operator.itemgetter(1), g) g = itertools.imap(utils.tile, g) utils.save_images(g, tempfile.mkdtemp(dir=OUTPUT_PATH)) # def f(params, inputs): # return rbm.cd(params, inputs, # rbm.sample_h, # rbm.sample_v, # rbm.neg_free_energy_grad, # weight_decay=weight_decay, # k=k) output_dir = utils.make_output_directory(OUTPUT_PATH) save_params = parameters.save_hook(output_dir) f = rbm.pcd(rbm.sample_h, rbm.sample_v, rbm.neg_free_energy_grad, weight_decay) def post_epoch(*args): #save_params(*args) #print 'Mean hidden activation prob. is %f.' % f.q pass params = optimize.sgd(f, initial_params, batches, epochs, learning_rate, momentum, weight_constraint=weight_constraint, post_epoch=post_epoch)
# def f(params, inputs): # return rbm.cd(params, inputs, # rbm.sample_h, # rbm.sample_v, # rbm.neg_free_energy_grad, # weight_decay=weight_decay, # k=k) output_dir = utils.make_output_directory(OUTPUT_PATH) save_params = parameters.save_hook(output_dir) f = rbm.pcd(rbm.sample_h, rbm.sample_v, rbm.neg_free_energy_grad, weight_decay) def post_epoch(*args): #save_params(*args) #print 'Mean hidden activation prob. is %f.' % f.q pass params = optimize.sgd(f, initial_params, batches, epochs, learning_rate, momentum, weight_constraint=weight_constraint, post_epoch=post_epoch)
num_classes = mnist.NUM_CLASSES num_dims = train.inputs.shape[1] initial_params = softmax.initial_params(num_classes, num_dims) weight_decay = None # reg.l2(1e-4) epochs = 50 learning_rate = 0.1 momentum = 0 def f(params, data): return softmax.cost(params, data.inputs, data.targets, weight_decay) train_accuracy = functools.partial(softmax.accuracy, train.inputs, train.labels) valid_accuracy = functools.partial(softmax.accuracy, valid.inputs, valid.labels) train_error = utils.call_func_hook(train_accuracy) valid_error = utils.call_func_hook(valid_accuracy) def post_epoch(*args): train_error(*args) valid_error(*args) params = optimize.sgd(f, initial_params, batches, epochs, learning_rate, momentum=momentum, post_epoch=post_epoch)