Example #1
0
 def setUp(self):
     """Load the train and test sets; check for nan and inf."""
     skip_if_no_data()
     self.train_set = CIFAR100(which_set='train')
     self.test_set = CIFAR100(which_set='test')
     assert not np.any(np.isnan(self.train_set.X))
     assert not np.any(np.isinf(self.train_set.X))
     assert not np.any(np.isnan(self.test_set.X))
     assert not np.any(np.isinf(self.test_set.X))
Example #2
0
def main():
    data_dir = string_utils.preprocess('${PYLEARN2_DATA_PATH}/cifar100')

    print('Loading CIFAR-100 train dataset...')
    train = CIFAR100(which_set='train', gcn=55.)

    print("Preparing output directory...")
    output_dir = data_dir + '/pylearn2_gcn_whitened'
    serial.mkdir(output_dir)
    README = open(output_dir + '/README', 'w')

    README.write(
        textwrap.dedent("""
    The .pkl files in this directory may be opened in python using
    cPickle, pickle, or pylearn2.serial.load.

    train.pkl, and test.pkl each contain
    a pylearn2 Dataset object defining a labeled
    dataset of a 32x32 contrast normalized,
    approximately whitened version of the CIFAR-100 dataset.
    train.pkl contains labeled train examples.
    test.pkl contains labeled test examples.

    preprocessor.pkl contains a pylearn2 ZCA object that was used
    to approximately whiten the images. You may want to use this
    object later to preprocess other images.

    They were created with the pylearn2 script make_cifar100_gcn_whitened.py.

    All other files in this directory, including this README, were
    created by the same script and are necessary for the other files
    to function correctly.
    """))

    README.close()

    print("Learning the preprocessor \
           and preprocessing the unsupervised train data...")
    preprocessor = preprocessing.ZCA()
    train.apply_preprocessor(preprocessor=preprocessor, can_fit=True)

    print('Saving the training data')
    train.use_design_loc(output_dir + '/train.npy')
    serial.save(output_dir + '/train.pkl', train)

    print("Loading the test data")
    test = CIFAR100(which_set='test', gcn=55.)

    print("Preprocessing the test data")
    test.apply_preprocessor(preprocessor=preprocessor, can_fit=False)

    print("Saving the test data")
    test.use_design_loc(output_dir + '/test.npy')
    serial.save(output_dir + '/test.pkl', test)

    serial.save(output_dir + '/preprocessor.pkl', preprocessor)
Example #3
0
def get_labels_and_fold_indices(cifar10, cifar100, stl10):
    assert stl10 or cifar10 or cifar100
    assert stl10+cifar10+cifar100 == 1

    if stl10:
        print 'loading entire stl-10 train set just to get the labels and folds'
        stl10 = serial.load("${PYLEARN2_DATA_PATH}/stl10/stl10_32x32/train.pkl")
        train_y = stl10.y

        fold_indices = stl10.fold_indices
    elif cifar10 or cifar100:
        if cifar10:
            print 'loading entire cifar10 train set just to get the labels'
            cifar = CIFAR10(which_set = 'train')
        else:
            assert cifar100
            print 'loading entire cifar100 train set just to get the labels'
            cifar = CIFAR100(which_set = 'train')
            cifar.y = cifar.y_fine
        train_y = cifar.y
        assert train_y is not None

        fold_indices = np.zeros((5,40000),dtype='uint16')
        idx_list = np.cast['uint16'](np.arange(1,50001)) #mimic matlab format of stl10
        for i in xrange(5):
            mask = idx_list < i * 10000 + 1
            mask += idx_list >= (i+1) * 10000 + 1
            fold_indices[i,:] = idx_list[mask]
        assert fold_indices.min() == 1
        assert fold_indices.max() == 50000


    return train_y, fold_indices
Example #4
0
    def test_iterator(self):
        """
        Tests that batches returned by an iterator with topological
        data_specs are the same as the ones returned by calling
        get_topological_view on the dataset with the corresponding order
        """
        batch_size = 100
        b01c_X = self.test_set.X[0:batch_size, :]
        b01c_topo = self.test_set.get_topological_view(b01c_X)
        b01c_b01c_it = self.test_set.iterator(
            mode='sequential',
            batch_size=batch_size,
            data_specs=(Conv2DSpace(shape=(32, 32),
                                    num_channels=3,
                                    axes=('b', 0, 1, 'c')),
                        'features'))
        b01c_b01c = b01c_b01c_it.next()
        assert np.all(b01c_topo == b01c_b01c)

        c01b_test = CIFAR100(which_set='test', axes=('c', 0, 1, 'b'))
        c01b_X = c01b_test.X[0:batch_size, :]
        c01b_topo = c01b_test.get_topological_view(c01b_X)
        c01b_c01b_it = c01b_test.iterator(
            mode='sequential',
            batch_size=batch_size,
            data_specs=(Conv2DSpace(shape=(32, 32),
                                    num_channels=3,
                                    axes=('c', 0, 1, 'b')),
                        'features'))
        c01b_c01b = c01b_c01b_it.next()
        assert np.all(c01b_topo == c01b_c01b)

        # Also check that samples from iterators with the same data_specs
        # with Conv2DSpace do not depend on the axes of the dataset
        b01c_c01b_it = self.test_set.iterator(
            mode='sequential',
            batch_size=batch_size,
            data_specs=(Conv2DSpace(shape=(32, 32),
                                    num_channels=3,
                                    axes=('c', 0, 1, 'b')),
                        'features'))
        b01c_c01b = b01c_c01b_it.next()
        assert np.all(b01c_c01b == c01b_c01b)

        c01b_b01c_it = c01b_test.iterator(
            mode='sequential',
            batch_size=batch_size,
            data_specs=(Conv2DSpace(shape=(32, 32),
                                    num_channels=3,
                                    axes=('b', 0, 1, 'c')),
                        'features'))
        c01b_b01c = c01b_b01c_it.next()
        assert np.all(c01b_b01c == b01c_b01c)
def main():
    data_dir = string_utils.preprocess('${PYLEARN2_DATA_PATH}')

    print('Loading CIFAR-100 train dataset...')
    data = CIFAR100(which_set='train')

    print("Preparing output directory...")
    patch_dir = data_dir + '/cifar100/cifar100_patches'
    serial.mkdir(patch_dir)
    README = open(patch_dir + '/README', 'w')

    README.write(
        textwrap.dedent("""
    The .pkl files in this directory may be opened in python using
    cPickle, pickle, or pylearn2.serial.load.

    data.pkl contains a pylearn2 Dataset object defining an unlabeled
    dataset of 2 million 6x6 approximately whitened, contrast-normalized
    patches drawn uniformly at random from the CIFAR-100 train set.

    preprocessor.pkl contains a pylearn2 Pipeline object that was used
    to extract the patches and approximately whiten / contrast normalize
    them. This object is necessary when extracting features for
    supervised learning or test set classification, because the
    extracted features must be computed using inputs that have been
    whitened with the ZCA matrix learned and stored by this Pipeline.

    They were created with the pylearn2 script make_cifar100_patches.py.

    All other files in this directory, including this README, were
    created by the same script and are necessary for the other files
    to function correctly.
    """))

    README.close()

    print("Preprocessing the data...")
    pipeline = preprocessing.Pipeline()
    pipeline.items.append(
        preprocessing.ExtractPatches(patch_shape=(6, 6),
                                     num_patches=2 * 1000 * 1000))
    pipeline.items.append(
        preprocessing.GlobalContrastNormalization(sqrt_bias=10., use_std=True))
    pipeline.items.append(preprocessing.ZCA())
    data.apply_preprocessor(preprocessor=pipeline, can_fit=True)

    data.use_design_loc(patch_dir + '/data.npy')

    serial.save(patch_dir + '/data.pkl', data)

    serial.save(patch_dir + '/preprocessor.pkl', pipeline)
Example #6
0
def get_labels():

    cifar100 = CIFAR100(which_set="train")
    train_y = cifar100.y_coarse

    assert train_y.shape == (50000, )

    for i in xrange(50000):
        if train_y[i] not in [3, 4, 6, 7, 11, 12]:
            train_y[i] = 0

    tlc = TL_Challenge(which_set='train')
    test_y = tlc.y_coarse

    return train_y, test_y
Example #7
0
def get_test_labels(cifar10, cifar100, stl10):
    assert cifar10 + cifar100 +  stl10 == 1

    if stl10:
        print 'loading entire stl-10 test set just to get the labels'
        stl10 = serial.load("${PYLEARN2_DATA_PATH}/stl10/stl10_32x32/test.pkl")
        return stl10.y
    if cifar10:
        print 'loading entire cifar10 test set just to get the labels'
        cifar10 = CIFAR10(which_set = 'test')
        return np.asarray(cifar10.y)
    if cifar100:
        print 'loading entire cifar100 test set just to get the fine labels'
        cifar100 = CIFAR100(which_set = 'test')
        return np.asarray(cifar100.y_fine)
    assert False
Example #8
0
 def test_topo_c01b(self):
     """
     Tests that a topological batch with axes ('c',0,1,'b')
     can be dimshuffled back to match the standard ('b',0,1,'c')
     format.
     """
     batch_size = 100
     c01b_test = CIFAR100(which_set='test', axes=('c', 0, 1, 'b'))
     c01b_X = c01b_test.X[0:batch_size, :]
     c01b = c01b_test.get_topological_view(c01b_X)
     assert c01b.shape == (3, 32, 32, batch_size)
     b01c = c01b.transpose(3, 1, 2, 0)
     b01c_X = self.test_set.X[0:batch_size, :]
     assert c01b_X.shape == b01c_X.shape
     assert np.all(c01b_X == b01c_X)
     b01c_direct = self.test_set.get_topological_view(b01c_X)
     assert b01c_direct.shape == b01c.shape
     assert np.all(b01c_direct == b01c)
Example #9
0
 def get_ddm_cifar100(self, ddm_id):
     row = self.db.executeSQL(
         """
     SELECT  which_set, center, gcn, toronto_prepro, axes,
             start, stop, one_hot
     FROM hps3.ddm_cifar100
     WHERE ddm_id = %s
     """, (ddm_id, ), self.db.FETCH_ONE)
     if not row or row is None:
         raise HPSData("No cifar100 ddm for ddm_id="\
             +str(ddm_id))
     (which_set, center, gcn, toronto_prepro, axes_char, start, \
         stop, one_hot) = row
     axes = self.get_axes(axes_char)
     return CIFAR100(which_set=which_set,
                     center=center,
                     gcn=gcn,
                     toronto_prepro=toronto_prepro,
                     axes=axes,
                     start=start,
                     stop=stop,
                     one_hot=one_hot)
Example #10
0
import numpy as np
from pylearn2.datasets.cifar100 import CIFAR100

print 'loading cifar100...'
y = CIFAR100(which_set='train').y_fine

print 'loading full'
X = np.load('/data/lisatmp2/goodfeli/bet_the_farm.npy')

print 'loading restricted'
Z = np.load('/data/lisatmp2/goodfeli/hack.npy')

idx = 0
for i in xrange(50000):
    if y[i] < 2:
        cur_X = X[i, :, :, :]
        cur_Z = Z[idx, :, :, :]
        diffs = cur_X - cur_Z
        max_diff = np.abs(diffs).max()
        print i, '\t', max_diff
        idx += 1
Example #11
0
import sys

f = open(sys.argv[2])
l = f.readlines()
f.close()
s = '\n'.join(l)
args = eval(s)


clf = TheanoSGDClassifier(100, ** args)
from pylearn2.datasets.cifar100 import CIFAR100
X = np.load(sys.argv[1])
if len(X.shape) == 4:
    X = X.reshape(X.shape[0], X.shape[1] * X.shape[2] * X.shape[3])
y = CIFAR100(which_set="train").y_fine.astype(int)
print 'fit'
clf.fit(X, y)

del X
del y

y = np.asarray(CIFAR100(which_set="test").y_fine).astype(int)
print 'loading test data'
X = np.load(sys.argv[3])
X = X.reshape(X.shape[0], X.shape[1] * X.shape[2] * X.shape[3])

print 'evaluating svm'
yhat = clf.predict(X)

print (yhat == y).mean()
Example #12
0
            (-1, 3, 32, 32))
        valid_set.X = np.reshape(
            np.subtract(np.multiply(2. / 255., valid_set.X), 1.),
            (-1, 3, 32, 32))
        test_set.X = np.reshape(
            np.subtract(np.multiply(2. / 255., test_set.X), 1.),
            (-1, 3, 32, 32))

    elif args.dataset == 'cifar100':
        print('Loading CIFAR-100 dataset...')
        from pylearn2.datasets.cifar100 import CIFAR100
        pylearn_path = os.environ['PYLEARN2_DATA_PATH']
        path = os.path.join(pylearn_path, 'cifar100', 'cifar-100-python')
        if not os.path.exists(path):
            cmd = subprocess.call('scripts/download_cifar100.sh')
        train_set = CIFAR100(which_set="train", start=0, stop=45000)
        valid_set = CIFAR100(which_set="train", start=45000, stop=50000)
        test_set = CIFAR100(which_set="test")
        classes = 100
        save_path = "../weights/cifar100-w1a1.npz"
        print("save_path = " + str(save_path))
        train_set.X = np.reshape(
            np.subtract(np.multiply(2. / 255., train_set.X), 1.),
            (-1, 3, 32, 32))
        valid_set.X = np.reshape(
            np.subtract(np.multiply(2. / 255., valid_set.X), 1.),
            (-1, 3, 32, 32))
        test_set.X = np.reshape(
            np.subtract(np.multiply(2. / 255., test_set.X), 1.),
            (-1, 3, 32, 32))
Example #13
0
"""
This script makes a dataset of 32x32 approximately whitened CIFAR-10 images.

"""

from __future__ import print_function

from pylearn2.utils import serial
from pylearn2.datasets import preprocessing
from pylearn2.utils import string_utils as string
from pylearn2.datasets.cifar100 import CIFAR100

data_dir = string.preprocess('${PYLEARN2_DATA_PATH}/cifar100')

print('Loading CIFAR-100 train dataset...')
train = CIFAR100(which_set='train')

print("Preparing output directory...")
output_dir = data_dir + '/whitened'
serial.mkdir(output_dir)
README = open(output_dir + '/README', 'w')

README.write("""
The .pkl files in this directory may be opened in python using
cPickle, pickle, or pylearn2.serial.load.

train.pkl, and test.pkl each contain
a pylearn2 Dataset object defining a labeled
dataset of an approximately whitened version of the CIFAR-100
dataset. train.pkl contains labeled train examples. test.pkl
contains labeled test examples.
Example #14
0
    parser = argparse.ArgumentParser(description='Testing Script')
    parser.add_argument('--dataset', '-d', default="cifar10", help='dataset to use cifar10, cifar100, mnist')
    parser.add_argument('--model', '-m', default="cnv", help='model to use resnet, lenet, inception, cnv')
    args = parser.parse_args()
    batch_size = 10000
    if args.dataset == 'cifar10':
        print('Loading CIFAR-10 dataset...')
        from pylearn2.datasets.cifar10 import CIFAR10
        test_set = CIFAR10(which_set="test", start=0, stop = batch_size)
        classes = 10
        test_set.X = np.reshape(np.subtract(np.multiply(2./255,test_set.X),1.),(-1,3,32,32))

    elif args.dataset == 'cifar100':
        print('Loading CIFAR-100 dataset...')
        from pylearn2.datasets.cifar100 import CIFAR100
        test_set = CIFAR100(which_set="test", start=0, stop = batch_size)
        classes = 100
        test_set.X = np.reshape(np.subtract(np.multiply(2./255,test_set.X),1.),(-1,3,32,32))

    elif args.dataset == 'mnist':
    	print('Loading MNIST dataset...')
    	from pylearn2.datasets.mnist import MNIST
    	test_set = MNIST(which_set="test", start=0, stop = batch_size)
    	classes = 10
	test_set.X = 2* test_set.X.reshape(-1, 1, 28, 28) - 1.
    
    # flatten targets
    test_set.y = np.hstack(test_set.y)

    # one hot
    test_set.y = np.float32(np.eye(classes)[test_set.y])  
This script is intended to reproduce the preprocessing used by Adam Coates
et. al. in their work from the first half of 2011 on the CIFAR-10 and
STL-10 datasets.
"""
from __future__ import print_function

from pylearn2.utils import serial
from pylearn2.datasets import preprocessing
from pylearn2.datasets.cifar100 import CIFAR100
from pylearn2.utils import string

data_dir = string.preprocess('${PYLEARN2_DATA_PATH}')

print('Loading CIFAR-100 train dataset...')
data = CIFAR100(which_set='train')

print("Preparing output directory...")
patch_dir = data_dir + '/cifar100/cifar100_patches_8x8'
serial.mkdir(patch_dir)
README = open(patch_dir + '/README', 'w')

README.write("""
The .pkl files in this directory may be opened in python using
cPickle, pickle, or pylearn2.serial.load.

data.pkl contains a pylearn2 Dataset object defining an unlabeled
dataset of 2 million 8x8 approximately whitened, contrast-normalized
patches drawn uniformly at random from the CIFAR-100 train set.

preprocessor.pkl contains a pylearn2 Pipeline object that was used
Example #16
0
"""
This script makes a dataset of 32x32 contrast normalized, approximately
whitened CIFAR-100 images.

"""

from pylearn2.utils import serial
from pylearn2.datasets import preprocessing
from pylearn2.utils import string_utils
from pylearn2.datasets.cifar100 import CIFAR100

data_dir = string_utils.preprocess('${PYLEARN2_DATA_PATH}/cifar100')

print 'Loading CIFAR-100 train dataset...'
train = CIFAR100(which_set = 'train', gcn = 55.)

print "Preparing output directory..."
output_dir = data_dir + '/pylearn2_gcn_whitened'
serial.mkdir( output_dir )
README = open(output_dir + '/README','w')

README.write("""
The .pkl files in this directory may be opened in python using
cPickle, pickle, or pylearn2.serial.load.

train.pkl, and test.pkl each contain
a pylearn2 Dataset object defining a labeled
dataset of a 32x32 contrast normalized, approximately whitened version of the CIFAR-100
dataset. train.pkl contains labeled train examples. test.pkl
contains labeled test examples.
Example #17
0
from pylearn2.utils import serial
from pylearn2.datasets.cifar100 import CIFAR100
from pylearn2.datasets.tl_challenge import TL_Challenge
from pylearn2.datasets import preprocessing
import os
import numpy as np

goodfeli_tmp = os.environ['GOODFELI_TMP']

train = CIFAR100(which_set="train")

aug = TL_Challenge(which_set="unlabeled")
aug2 = TL_Challenge(which_set="train")

train.set_design_matrix(np.concatenate((train.X, aug.X, aug2.X), axis=0))

del aug
del aug2

pipeline = preprocessing.Pipeline()
pipeline.items.append(
    preprocessing.ExtractPatches(patch_shape=(6, 6), num_patches=2000000))
pipeline.items.append(preprocessing.GlobalContrastNormalization())
pipeline.items.append(preprocessing.ZCA())

train.apply_preprocessor(preprocessor=pipeline, can_fit=True)

train.use_design_loc(goodfeli_tmp + '/tl_challenge_patches_2M_6x6_design.npy')

serial.save(goodfeli_tmp + '/tl_challenge_patches_2M_6x6.pkl', train)