def get_dataset_mnist(): train_path = 'mnist_train.pkl' test_path = 'mnist_test.pkl' if os.path.exists(train_path) and \ os.path.exists(test_path): print 'loading preprocessed data' trainset = serial.load(train_path) testset = serial.load(test_path) else: print 'loading raw data...' trainset = mnist.MNIST(which_set="train", one_hot=True) testset = mnist.MNIST(which_set="test", one_hot=True) serial.save('mnist_train.pkl', trainset) serial.save('mnist_test.pkl', testset) # this path will be used for visualizing weights after training is done trainset.yaml_src = '!pkl: "%s"' % train_path testset.yaml_src = '!pkl: "%s"' % test_path return trainset, testset
from pylearn2.utils import serial from pylearn2.datasets import mnist import numpy as N from scipy import io train = mnist.MNIST(which_set="train") D = io.loadmat('X.mat')['X'] s = D.std(axis=1) D = D[s > .224, :] D = D[0:50000, :] m = D.shape[0] print D.shape V = D.reshape(m, 20, 20) P = N.zeros((m, 10, 10, 1)) for i in xrange(m): cropped = V[i, :, :] shrunk1 = cropped[0:20:2, :] + cropped[1:20:2, :] shrunk2 = shrunk1[:, 0:20:2] + shrunk1[:, 1:20:2] P[i, :, :, 0] = shrunk2 / 4. #
def gendata(enable, os, downsample, textid=None, seed=2313, verbose=False): """ Generate the MNIST+ dataset. :param enable: dictionary of flags with keys ['texture', 'azimuth', 'rotation', 'elevation'] to enable/disable a given factor of variation. :param textid: if enable['texture'], id number of the Brodatz texture to load. If textid is None, we load a random texture for each MNIST image. :param os: output size (width and height) of MNIST+ images. :param downsample: factor by which to downsample texture. :param seed: integer for seeding RNG. :param verbose: bool """ rng = numpy.random.RandomState(seed) data = mnist.MNIST('train') test = mnist.MNIST('test') data.X = numpy.vstack((data.X, test.X)) data.y = numpy.hstack((data.y, test.y)) del test output = {} output['data'] = numpy.zeros((len(data.X), os * os)) output['label'] = numpy.zeros(len(data.y)) if enable['azimuth']: output['azimuth'] = numpy.zeros(len(data.y)) if enable['elevation']: output['elevation'] = numpy.zeros(len(data.y)) if enable['rotation']: output['rotation'] = numpy.zeros(len(data.y)) if enable['texture']: output['texture_id'] = numpy.zeros(len(data.y)) output['texture_pos'] = numpy.zeros((len(data.y), 2)) for i in xrange(len(data.X)): # get MNIST image frgd_img = to_img(data.X[i], 28) frgd_img = frgd_img.convert('L') if enable['rotation']: rot = rng.randint(0, 360) output['rotation'][i] = rot frgd_img = frgd_img.rotate(rot, Image.BILINEAR) frgd_img = frgd_img.resize((os, os), Image.BILINEAR) if enable['texture']: if textid is None: # extract patch from texture database. Note that texture #14 # does not exist. textid = 14 while textid == 14: textid = rng.randint(1, 113) patch_img, (px, py) = extract_patch(textid, os, downsample) patch_arr = to_array(patch_img) # store output details output['texture_id'][i] = textid output['texture_pos'][i] = (px, py) # generate binary mask for digit outline frgd_arr = to_array(frgd_img) mask_arr = frgd_arr > 0.1 # copy contents of masked-MNIST image into background texture blend_arr = copy(patch_arr) blend_arr[mask_arr] = frgd_arr[mask_arr] # this now because the image to emboss frgd_img = to_img(blend_arr, os) azi = 45 if enable['azimuth']: azi = rng.randint(0, 360) output['azimuth'][i] = azi ele = 18. if enable['elevation']: ele = rng.randint(0, 60) output['elevation'][i] = ele mboss_img = emboss(frgd_img, azi=azi, ele=ele) mboss_arr = to_array(mboss_img) output['data'][i] = mboss_arr output['label'][i] = data.y[i] if verbose: pl.imshow(mboss_arr.reshape(os, os)) pl.gray() pl.show() fname = 'mnistplus' if enable['azimuth']: fname += "_azi" if enable['rotation']: fname += "_rot" if enable['texture']: fname += "_tex" fp = open(fname + '.pkl', 'w') pickle.dump(output, fp, protocol=pickle.HIGHEST_PROTOCOL) fp.close()
misclass_cost = T.neq(T.argmax(y_true, axis=1), T.argmax(y_s, axis=1)).mean() params = mnist_net.params gparams = T.grad(cost, wrt=params) updates = peano.optimizer.adam_update(params, gparams) learn_mlp_fn = theano.function(inputs=[v, y_true], outputs=cost, updates=updates) misclass_mlp_fn = theano.function(inputs=[v, y_true], outputs=misclass_cost) from pylearn2.datasets import mnist from pylearn2.space import CompositeSpace, VectorSpace ds = mnist.MNIST(which_set='train', start=0, stop=50000) val = mnist.MNIST(which_set='train', start=50000, stop=60000) val_X, val_y = val.get_data() val_y = np.squeeze(np.eye(10)[val_y]).astype(dtype) data_space = VectorSpace(dim=784) label_space = VectorSpace(dim=10) for i in range(200): cost = 0. misclass = 0. ds_iter = ds.iterator(mode='sequential', batch_size=100, data_specs=(CompositeSpace( (data_space, label_space)), ('features', 'targets')))
parser = optparse.OptionParser() parser.add_option('-m', '--model', action='store', type='string', dest='path') parser.add_option('--large', action='store_true', dest='large', default=False) parser.add_option('--seed', action='store', type='int', dest='seed', default=980293841) (opts, args) = parser.parse_args() # Load model and retrieve parameters. model = serial.load(opts.path) model.do_theano() # Load dataset. trainset = mnist.MNIST('train', binarize=True) testset = mnist.MNIST('test', binarize=True) estimate_likelihood(model, trainset, testset, large_ais=opts.large, seed=opts.seed)
# default log-partition log_za = 0 for n_ui in model.n_u: log_za += n_ui * numpy.log(2) log_z = log_za + dlogz print 'log_za = ', log_za print 'log_z = ', log_z print 'var_dlogz = ', var_dlogz ############################## # COMPUTE TEST SET LIKELIHOOD ############################## from pylearn2.datasets import mnist assert opts.dataset in ['train','test'] data = mnist.MNIST(opts.dataset, binarize=True) i = 0. nll = 0 for i in xrange(0, len(data.X), model.batch_size): # recast data as floatX and apply preprocessing if required x = numpy.array(data.X[i:i + model.batch_size, :], dtype=floatX) # perform inference model.setup_pos_func(x) psamples = inference_fn() # entropy of h(q) adds contribution to variational lower-bound hq = 0
# default log-partition log_za = numpy.sum(numpy.log(1 + numpy.exp(model.bias[1].get_value()))) log_za += model.n_u[0] * numpy.log(2) log_za += model.n_u[2] * numpy.log(2) log_z = log_za + dlogz print 'log_za = ', log_za print 'log_z = ', log_z print 'var_dlogz = ', var_dlogz ############################## # COMPUTE TEST SET LIKELIHOOD ############################## from pylearn2.datasets import mnist assert opts.dataset in ['train', 'test'] data = mnist.MNIST(opts.dataset) i = 0. nll = 0 for i in xrange(0, len(data.X), model.batch_size): # recast data as floatX and apply preprocessing if required x = numpy.array(data.X[i:i + model.batch_size, :], dtype=floatX) # perform inference psamples = inference_fn(x) # entropy of h(q) adds contribution to variational lower-bound hq = 0 for psample in psamples[1:]: