Esempio n. 1
0
def ex(inputs):
    inputs = utils.remove_dc(inputs)
    inputs, zca = utils.zca_white(inputs, 0.1)
    batches = data.BatchIterator(inputs, 100)
    num_vis = inputs.shape[1]
    num_hid = 400
    epochs = 100
    momentum = 0

    initial_params = grbm.initial_params(num_hid, num_vis, 0.001, 1.0)

    neg_free_energy_grad = functools.partial(grbm.neg_free_energy_grad,
                                             learn_sigma=False)

    def f(params, inputs):
        return rbm.cd(params, inputs,
                      grbm.sample_h_noisy_relu, grbm.sample_v,
                      neg_free_energy_grad)
    
    learning_rate = 0.005

    output_dir = utils.make_output_directory(OUTPUT_PATH)
    save_params = parameters.save_hook(output_dir)
    error_history = []
    sparsity_history = []

    def post_epoch(*args):
        W_norm = utils.rescale(args[0].W)
        utils.save_image(utils.tile(W_norm),
                         os.path.join(output_dir, ('w%i.png' % args[1])))

        # Estimate sparsity from subset of data.
        h_mean = grbm.sample_h_noisy_relu(args[0], inputs[0:5000], True)[1]
        mean_activation = np.mean(h_mean > 0)
        print 'approx mean activation: %f' % mean_activation
        
        # The callback from optimize.sgd needs modifying so that it
        # passes the reconstrcution error as an argument to make this
        # work. (This was used when I did the original experiments.)
        # error_history.append(args[2])
        sparsity_history.append(mean_activation)
        
        save_params(args[0], args[1])

    params = optimize.sgd(f, initial_params, batches,
                          epochs, learning_rate,
                          momentum,
                          post_epoch=post_epoch)

    with(open(os.path.join(output_dir, 'history.pickle'), 'wb')) as f:
        pickle.dump(error_history, f, -1)
        pickle.dump(sparsity_history, f, -1)

    return params, error_history, sparsity_history
Esempio n. 2
0
def ex1(inputs):
    """
    Gaussian/Bernoulli RBM.
    """
    # Can learns edge detector like filters although learning is quite
    # slow and learning is very sensitive to meta-parameter selection.

    # Momentum seems neccesary, without it it's difficult to learn
    # anything.

    # When learning on whitened data setting the fudge factor to
    # something around 0.1 was important. Setting it much too much
    # lower causes point filters to be learned.

    # Learning does happen if you don't use whitening, but the
    # features tend to be less localized when compared to the learned
    # with whitening. Interestingly the reconstruction error is lower
    # without whitening, but I suspect I'm comparing apples with
    # oranges there.

    # With only 25 hidden units I couldn't find a way to learn
    # anything much. Contrast this to an autoencoder which does seem
    # to learn filters in a similar situation.

    # error (100 epochs) = 25.492607
    # error (500 epochs) = 24.096789

    # See ex1.png.

    inputs = utils.remove_dc(inputs)
    inputs, zca = utils.zca_white(inputs, 0.1)
    batches = data.BatchIterator(inputs, 50)
    num_vis = 64
    num_hid = 100
    epochs = 500
    initial_params = grbm.initial_params(num_hid, num_vis, 0.05)

    sample_v = functools.partial(grbm.sample_v, add_noise=False)
    neg_free_energy_grad = functools.partial(grbm.neg_free_energy_grad,
                                             learn_sigma=False)

    def f(params, inputs):
        return rbm.cd(params, inputs, grbm.sample_h, sample_v,
                      neg_free_energy_grad)

    learning_rate = 0.01
    momentum = meta.step(0.5, 0.9, 5)
    return optimize.sgd(f, initial_params, batches, epochs, learning_rate,
                        momentum)
Esempio n. 3
0
def ex3(inputs):
    """
    Gaussian/NReLU RBM with learned visible variances.
    """
    # I found it essential to add noise/sample from the visible units
    # during reconstruction. If I don't do this the variances increase
    # at each epoch (I'd expect them to decrease during learning from
    # their initial value of one) as does the error.

    # This result was obtained by running SGD without using momentum.
    # The default momentum schedule set-up a big oscillation which
    # caused the error to increase over a few epochs after which we
    # learning appeared to be stuck out on a plateau. More modest
    # schedules (such as 0.1 for the first 10 epochs, 0.2 thereafter)
    # allow learning but they don't result in any improvement in
    # error.

    # The variances learned are all very similar. Their mean is 0.39,
    # their standard deviation is 0.04.

    # A quick test suggests that smaller initial weights lead to a
    # slightly lower reconstruction error.

    # error (100 epochs) = 7.401834
    # error (500 epochs) = 7.245722

    # See ex3.png.

    inputs = utils.remove_dc(inputs)
    inputs, zca = utils.zca_white(inputs, 0.1)
    batches = data.BatchIterator(inputs, 50)
    num_vis = 64
    num_hid = 100
    epochs = 500
    initial_params = grbm.initial_params(num_hid, num_vis, 0.05)

    def f(params, inputs):
        return rbm.cd(params, inputs, grbm.sample_h_noisy_relu, grbm.sample_v,
                      grbm.neg_free_energy_grad)

    learning_rate = 0.01
    momentum = 0
    return optimize.sgd(f, initial_params, batches, epochs, learning_rate,
                        momentum)
Esempio n. 4
0
def ex(inputs):
    inputs = zero_mean(inputs)
    inputs, zca = utils.zca_white(inputs, 0.1)
    batches = data.BatchIterator(inputs, 100)
    num_vis = inputs.shape[1]
    num_hid = 400
    epochs = 100
    momentum = 0

    initial_params = grbm.initial_params(num_hid, num_vis, 0.001, 0.4)

    neg_free_energy_grad = functools.partial(grbm.neg_free_energy_grad,
                                             learn_sigma=False)

    def f(params, inputs):
        return rbm.cd(params, inputs, grbm.sample_h_noisy_relu, grbm.sample_v,
                      neg_free_energy_grad)

    learning_rate = 0.005

    output_dir = utils.make_output_directory(OUTPUT_PATH)
    save_params = parameters.save_hook(output_dir)

    def post_epoch(*args):
        save_params(*args)
        # Save visualization weights.
        W_norm = utils.rescale(args[0].W)
        img = Image.fromarray(
            np.uint8(utils.tile(W_norm, channel_count=3) * 255))
        img.save(os.path.join(output_dir, ('w%i.png' % args[1])))
        # Estimate sparsity from subset of data.
        h_mean = grbm.sample_h_noisy_relu(args[0], inputs[0:5000], True)[1]
        mean_activation = np.mean(h_mean > 0)
        print 'approx mean activation: %f' % mean_activation

    return optimize.sgd(f,
                        initial_params,
                        batches,
                        epochs,
                        learning_rate,
                        momentum,
                        post_epoch=post_epoch)
Esempio n. 5
0
def ex2(inputs):
    """
    Gaussian/NReLU RBM.
    """
    # Using noisy rectified linear units for the visibles speeds up
    # learning dramatically. The reconstruction error after a single
    # epoch is lower (21.6986) than after 500 epochs in ex1.

    # The filters learned have less noisy backgrounds than those
    # learned in ex1.

    # error (100 epochs) = 15.941531
    # error (500 epochs) = 15.908922

    # See ex2.png.

    inputs = utils.remove_dc(inputs)
    inputs, zca = utils.zca_white(inputs, 0.1)
    batches = data.BatchIterator(inputs, 50)
    num_vis = 64
    num_hid = 100
    epochs = 500
    initial_params = grbm.initial_params(num_hid, num_vis, 0.05)

    sample_v = functools.partial(grbm.sample_v, add_noise=False)
    neg_free_energy_grad = functools.partial(grbm.neg_free_energy_grad,
                                             learn_sigma=False)

    def f(params, inputs):
        return rbm.cd(params, inputs, grbm.sample_h_noisy_relu, sample_v,
                      neg_free_energy_grad)

    learning_rate = 0.01
    momentum = meta.step(0.5, 0.9, 5)
    return optimize.sgd(f, initial_params, batches, epochs, learning_rate,
                        momentum)
Esempio n. 6
0
inputs, targets, labels = data.balance_classes(inputs, labels,
                                               mnist.NUM_CLASSES)
n = 54200
inputs = inputs[0:n]
targets = targets[0:n]
labels = labels[0:n]

# These layers differ slightly from those in the paper. My main
# motivation is to avoid having a square weight matrix between hidden
# layers to avoid matrix transpose errors.
num_vis = inputs.shape[1]
num_hid1 = 529  # 23^2
num_hid2 = 484  # 22^2
num_top = 1936  # 44^2

batches = data.BatchIterator(inputs)
initial_params = rbm.initial_params(num_hid1, num_vis)
params = sgd(rbm_obj, initial_params, batches, momentum)

inputs = logistic(inputs.dot(params.W.T) + params.h_bias)
batches = data.BatchIterator(inputs)
initial_params = rbm.initial_params(num_hid2, num_hid1)
params = sgd(rbm_obj, initial_params, batches, momentum)

inputs = logistic(inputs.dot(params.W.T) + params.h_bias)
batches = data.BatchIterator(np.hstack((targets, inputs)))
initial_params = rbm.initial_params(num_top, num_hid2 + mnist.NUM_CLASSES)


def post_epoch(*args):
    print 'Mean hidden activation prob. is %.2f' % pcd.q
Esempio n. 7
0
from ml import mnist, utils, optimize, rbm, data, parameters, meta
from ml import regularization as reg

print '***eval***'

DATA_PATH = os.path.expanduser('~/Development/ml/datasets')
OUTPUT_PATH = os.path.expanduser('~/Development/ml/output')

np.random.seed(32)

# data
inputs = mnist.load_inputs(DATA_PATH, mnist.TRAIN_INPUTS)
labels = mnist.load_labels(DATA_PATH, mnist.TRAIN_LABELS)
inputs = data.balance_classes(inputs, labels, mnist.NUM_CLASSES).inputs
batches = data.BatchIterator(inputs[0:54200])

# IDEA: Have a bit of code which loads the current file (as text) and
# dumps and code between two specific comments into a meta.txt file in
# the output directory.

# meta
num_vis = inputs.shape[1]
num_hid = 49
epochs = 10
k = 1  # applies to cd only
initial_params = rbm.initial_params(num_hid, num_vis)
weight_constraint = None
# weight_decay = reg.l2(0.0002)
# weight_decay = None
# momentum = meta.linear(0.5, 0.9, 10)
Esempio n. 8
0
import os.path
import functools
import numpy as np
from ml import softmax, optimize, utils, mnist, data, meta
from ml import regularization as reg

DATA_PATH = os.path.expanduser('~/Development/ml/datasets')

train, valid = mnist.load_training_dataset(DATA_PATH)
batches = data.BatchIterator(train)

num_classes = mnist.NUM_CLASSES
num_dims = train.inputs.shape[1]

initial_params = softmax.initial_params(num_classes, num_dims)

weight_decay = None # reg.l2(1e-4)
epochs = 50
learning_rate = 0.1
momentum = 0

def f(params, data):
    return softmax.cost(params, data.inputs, data.targets, weight_decay)

train_accuracy = functools.partial(softmax.accuracy,
                                   train.inputs, train.labels)

valid_accuracy = functools.partial(softmax.accuracy,
                                   valid.inputs, valid.labels)

train_error = utils.call_func_hook(train_accuracy)
Esempio n. 9
0
    return error, grad


np.random.seed(1234)

# data
inputs = mnist.load_inputs(DATA_PATH, mnist.TRAIN_INPUTS)
labels = mnist.load_labels(DATA_PATH, mnist.TRAIN_LABELS)
inputs, targets, labels = data.balance_classes(inputs, labels,
                                               mnist.NUM_CLASSES)
n = 54200
inputs = inputs[0:n]
targets = targets[0:n]
labels = labels[0:n]
dataset = data.dataset(inputs, targets, labels)
batches = data.BatchIterator(dataset)

# meta
epochs = 50
learning_rate = 0.1
cd_k = 15
weight_decay = reg.l2(0.0002)
momentum = 0  #0.1

# Load and stack params from initial pre-training.
# initial_params = dbn.params_from_rbms([parameters.load(OUTPUT_PATH, t)
#                                        for t in (1358445776, 1358451846, 1358456203)])

# Params after first 50 epochs of fine tuning. (lr 0.1, p 0.0, cd_k=10)
initial_params = parameters.load(OUTPUT_PATH, timestamp=1358541318)