"""
This script makes a dataset of two million approximately whitened patches,
extracted at random uniformly from the CIFAR-100 train dataset.

This script is intended to reproduce the preprocessing used by Adam Coates
et. al. in their work from the first half of 2011 on the CIFAR-10 and
STL-10 datasets.
"""
from __future__ import print_function

from pylearn2.utils import serial
from pylearn2.datasets import preprocessing
from pylearn2.datasets.cifar100 import CIFAR100
from pylearn2.utils import string

data_dir = string.preprocess('${PYLEARN2_DATA_PATH}')

print('Loading CIFAR-100 train dataset...')
data = CIFAR100(which_set='train')

print("Preparing output directory...")
patch_dir = data_dir + '/cifar100/cifar100_patches_8x8'
serial.mkdir(patch_dir)
README = open(patch_dir + '/README', 'w')

README.write("""
The .pkl files in this directory may be opened in python using
cPickle, pickle, or pylearn2.serial.load.

data.pkl contains a pylearn2 Dataset object defining an unlabeled
dataset of 2 million 8x8 approximately whitened, contrast-normalized
Exemple #2
0
This script makes a dataset of two million approximately whitened patches, extracted at random uniformly
from a downsampled version of the STL-10 unlabeled and train dataset.

It assumes that you have already run make_downsampled_stl10.py, which downsamples the STL-10 images to
1/3 of their original resolution.

This script is intended to reproduce the preprocessing used by Adam Coates et. al. in their work from
the first half of 2011. It does not contrast-normalize the patches prior to whitening them.
"""

from pylearn2.utils import serial
from pylearn2.datasets import preprocessing
from pylearn2.utils import string, serial
import numpy as np

data_dir = string.preprocess('${PYLEARN2_DATA_PATH}/stl10')

print 'Loading STL10-10 unlabeled and train datasets...'
downsampled_dir = data_dir + '/stl10_32x32'

data = serial.load(downsampled_dir + '/unlabeled.pkl')
supplement = serial.load(downsampled_dir + '/train.pkl')

print 'Concatenating datasets...'
data.set_design_matrix(np.concatenate((data.X, supplement.X), axis=0))
del supplement

print "Preparing output directory..."
data_dir = string.preprocess('${GOODFELI_TMP}')
patch_dir = data_dir + '/stl10_patches_no_shelling'
serial.mkdir(patch_dir)
Exemple #3
0




"""
Makes a version of the STL-10 dataset that has been downsampled by a factor of
3 along both axes.
"""

from pylearn2.datasets.cifar10 import CIFAR10
from pylearn2.utils import string_utils as string

print 'Preparing output directory...'

data_dir = string.preprocess('/u/kruegerd/repo/current/pylearn2/pylearn2/datasets/cifar10')
downsampled_dir = data_dir
serial.mkdir( downsampled_dir )

#Unlabeled dataset is huge, so do it in chunks
#(After downsampling it should be small enough to work with)
final_unlabeled = np.zeros((100*1000,32*32*3),dtype='float32')

for i in xrange(10):
    print 'Loading unlabeled chunk '+str(i+1)+'/10...'
    unlabeled = STL10(which_set = 'unlabeled', center = True,
            example_range = (i * 10000, (i+1) * 10000))

    print 'Preprocessing unlabeled chunk...'
    print 'before ',(unlabeled.X.min(),unlabeled.X.max())
    unlabeled.apply_preprocessor(preprocessor)
Exemple #4
0
"""
This script makes a dataset of 32x32 approximately whitened CIFAR-10 images.

"""

from pylearn2.utils import serial
from pylearn2.datasets import preprocessing
from pylearn2.utils import string
from pylearn2.datasets.cifar100 import CIFAR100

data_dir = string.preprocess('${PYLEARN2_DATA_PATH}/cifar100')

print 'Loading CIFAR-100 train dataset...'
train = CIFAR100(which_set='train')

print "Preparing output directory..."
output_dir = data_dir + '/whitened'
serial.mkdir(output_dir)
README = open(output_dir + '/README', 'w')

README.write("""
The .pkl files in this directory may be opened in python using
cPickle, pickle, or pylearn2.serial.load.

train.pkl, and test.pkl each contain
a pylearn2 Dataset object defining a labeled
dataset of an approximately whitened version of the CIFAR-100
dataset. train.pkl contains labeled train examples. test.pkl
contains labeled test examples.

preprocessor.pkl contains a pylearn2 ZCA object that was used