Ejemplo n.º 1
0
def get_dataset_mnist():

    train_path = 'mnist_train.pkl'
    test_path = 'mnist_test.pkl'

    if os.path.exists(train_path) and \
            os.path.exists(test_path):
        print 'loading preprocessed data'
        trainset = serial.load(train_path)
        testset = serial.load(test_path)

    else:
        print 'loading raw data...'
        trainset = mnist.MNIST(which_set="train", one_hot=True)
        testset = mnist.MNIST(which_set="test", one_hot=True)

        serial.save('mnist_train.pkl', trainset)
        serial.save('mnist_test.pkl', testset)

        # this path will be used for visualizing weights after training is done
        trainset.yaml_src = '!pkl: "%s"' % train_path
        testset.yaml_src = '!pkl: "%s"' % test_path

    return trainset, testset
Ejemplo n.º 2
0
from pylearn2.utils import serial
from pylearn2.datasets import mnist
import numpy as N
from scipy import io

train = mnist.MNIST(which_set="train")

D = io.loadmat('X.mat')['X']

s = D.std(axis=1)

D = D[s > .224, :]

D = D[0:50000, :]

m = D.shape[0]
print D.shape
V = D.reshape(m, 20, 20)

P = N.zeros((m, 10, 10, 1))

for i in xrange(m):

    cropped = V[i, :, :]

    shrunk1 = cropped[0:20:2, :] + cropped[1:20:2, :]
    shrunk2 = shrunk1[:, 0:20:2] + shrunk1[:, 1:20:2]

    P[i, :, :, 0] = shrunk2 / 4.
#
Ejemplo n.º 3
0
def gendata(enable, os, downsample, textid=None, seed=2313, verbose=False):
    """
    Generate the MNIST+ dataset.
    :param enable: dictionary of flags with keys ['texture', 'azimuth',
    'rotation', 'elevation'] to enable/disable a given factor of variation.
    :param textid: if enable['texture'], id number of the Brodatz texture to
    load. If textid is None, we load a random texture for each MNIST image.
    :param os: output size (width and height) of MNIST+ images.
    :param downsample: factor by which to downsample texture.
    :param seed: integer for seeding RNG.
    :param verbose: bool
    """
    rng = numpy.random.RandomState(seed)

    data = mnist.MNIST('train')
    test = mnist.MNIST('test')
    data.X = numpy.vstack((data.X, test.X))
    data.y = numpy.hstack((data.y, test.y))
    del test

    output = {}
    output['data'] = numpy.zeros((len(data.X), os * os))
    output['label'] = numpy.zeros(len(data.y))
    if enable['azimuth']:
        output['azimuth'] = numpy.zeros(len(data.y))
    if enable['elevation']:
        output['elevation'] = numpy.zeros(len(data.y))
    if enable['rotation']:
        output['rotation'] = numpy.zeros(len(data.y))
    if enable['texture']:
        output['texture_id'] = numpy.zeros(len(data.y))
        output['texture_pos'] = numpy.zeros((len(data.y), 2))

    for i in xrange(len(data.X)):

        # get MNIST image
        frgd_img = to_img(data.X[i], 28)
        frgd_img = frgd_img.convert('L')

        if enable['rotation']:
            rot = rng.randint(0, 360)
            output['rotation'][i] = rot
            frgd_img = frgd_img.rotate(rot, Image.BILINEAR)

        frgd_img = frgd_img.resize((os, os), Image.BILINEAR)

        if enable['texture']:

            if textid is None:
                # extract patch from texture database. Note that texture #14
                # does not exist.
                textid = 14
                while textid == 14:
                    textid = rng.randint(1, 113)

            patch_img, (px, py) = extract_patch(textid, os, downsample)
            patch_arr = to_array(patch_img)

            # store output details
            output['texture_id'][i] = textid
            output['texture_pos'][i] = (px, py)

            # generate binary mask for digit outline
            frgd_arr = to_array(frgd_img)
            mask_arr = frgd_arr > 0.1

            # copy contents of masked-MNIST image into background texture
            blend_arr = copy(patch_arr)
            blend_arr[mask_arr] = frgd_arr[mask_arr]

            # this now because the image to emboss
            frgd_img = to_img(blend_arr, os)

        azi = 45
        if enable['azimuth']:
            azi = rng.randint(0, 360)
            output['azimuth'][i] = azi
        ele = 18.
        if enable['elevation']:
            ele = rng.randint(0, 60)
            output['elevation'][i] = ele

        mboss_img = emboss(frgd_img, azi=azi, ele=ele)
        mboss_arr = to_array(mboss_img)

        output['data'][i] = mboss_arr
        output['label'][i] = data.y[i]

        if verbose:
            pl.imshow(mboss_arr.reshape(os, os))
            pl.gray()
            pl.show()

    fname = 'mnistplus'
    if enable['azimuth']:
        fname += "_azi"
    if enable['rotation']:
        fname += "_rot"
    if enable['texture']:
        fname += "_tex"
    fp = open(fname + '.pkl', 'w')
    pickle.dump(output, fp, protocol=pickle.HIGHEST_PROTOCOL)
    fp.close()
Ejemplo n.º 4
0
misclass_cost = T.neq(T.argmax(y_true, axis=1), T.argmax(y_s, axis=1)).mean()

params = mnist_net.params
gparams = T.grad(cost, wrt=params)
updates = peano.optimizer.adam_update(params, gparams)

learn_mlp_fn = theano.function(inputs=[v, y_true],
                               outputs=cost,
                               updates=updates)

misclass_mlp_fn = theano.function(inputs=[v, y_true], outputs=misclass_cost)

from pylearn2.datasets import mnist
from pylearn2.space import CompositeSpace, VectorSpace

ds = mnist.MNIST(which_set='train', start=0, stop=50000)
val = mnist.MNIST(which_set='train', start=50000, stop=60000)
val_X, val_y = val.get_data()
val_y = np.squeeze(np.eye(10)[val_y]).astype(dtype)

data_space = VectorSpace(dim=784)
label_space = VectorSpace(dim=10)

for i in range(200):
    cost = 0.
    misclass = 0.
    ds_iter = ds.iterator(mode='sequential',
                          batch_size=100,
                          data_specs=(CompositeSpace(
                              (data_space, label_space)), ('features',
                                                           'targets')))
Ejemplo n.º 5
0
    parser = optparse.OptionParser()
    parser.add_option('-m',
                      '--model',
                      action='store',
                      type='string',
                      dest='path')
    parser.add_option('--large',
                      action='store_true',
                      dest='large',
                      default=False)
    parser.add_option('--seed',
                      action='store',
                      type='int',
                      dest='seed',
                      default=980293841)
    (opts, args) = parser.parse_args()

    # Load model and retrieve parameters.
    model = serial.load(opts.path)
    model.do_theano()
    # Load dataset.
    trainset = mnist.MNIST('train', binarize=True)
    testset = mnist.MNIST('test', binarize=True)

    estimate_likelihood(model,
                        trainset,
                        testset,
                        large_ais=opts.large,
                        seed=opts.seed)
Ejemplo n.º 6
0
# default log-partition
log_za = 0
for n_ui in model.n_u:
    log_za += n_ui * numpy.log(2)
log_z = log_za + dlogz

print 'log_za = ', log_za
print 'log_z = ', log_z
print 'var_dlogz = ',  var_dlogz

##############################
# COMPUTE TEST SET LIKELIHOOD
##############################
from pylearn2.datasets import mnist
assert opts.dataset in ['train','test']
data = mnist.MNIST(opts.dataset, binarize=True)

i = 0.
nll = 0

for i in xrange(0, len(data.X), model.batch_size):

    # recast data as floatX and apply preprocessing if required
    x = numpy.array(data.X[i:i + model.batch_size, :], dtype=floatX)

    # perform inference
    model.setup_pos_func(x)
    psamples = inference_fn()

    # entropy of h(q) adds contribution to variational lower-bound
    hq = 0
Ejemplo n.º 7
0
# default log-partition
log_za = numpy.sum(numpy.log(1 + numpy.exp(model.bias[1].get_value())))
log_za += model.n_u[0] * numpy.log(2)
log_za += model.n_u[2] * numpy.log(2)
log_z = log_za + dlogz

print 'log_za = ', log_za
print 'log_z = ', log_z
print 'var_dlogz = ', var_dlogz

##############################
# COMPUTE TEST SET LIKELIHOOD
##############################
from pylearn2.datasets import mnist
assert opts.dataset in ['train', 'test']
data = mnist.MNIST(opts.dataset)

i = 0.
nll = 0

for i in xrange(0, len(data.X), model.batch_size):

    # recast data as floatX and apply preprocessing if required
    x = numpy.array(data.X[i:i + model.batch_size, :], dtype=floatX)

    # perform inference
    psamples = inference_fn(x)

    # entropy of h(q) adds contribution to variational lower-bound
    hq = 0
    for psample in psamples[1:]: