def __init__(self, which_set, label_type=None, azimuth=False, rotation=False, texture=False, center=False, contrast_normalize=False, seed=132987): assert which_set in ['train', 'valid', 'test'] assert label_type in [ None, 'label', 'azimuth', 'rotation', 'texture_id' ] # load data fname = '${PYLEARN2_DATA_PATH}/mnistplus/mnistplus' if azimuth: fname += '_azi' if rotation: fname += '_rot' if texture: fname += '_tex' data = load(fname + '.pkl') # get images and cast to floatX data_x = np.cast[config.floatX](data['data']) data_x = data_x[MNISTPlus.idx[which_set]] if contrast_normalize: meanx = np.mean(data_x, axis=1)[:, None] stdx = np.std(data_x, axis=1)[:, None] data_x = (data_x - meanx) / stdx if center: data_x -= np.mean(data_x, axis=0) # get labels data_y = None if label_type is not None: data_y = data[label_type] # convert to float for performing regression if label_type in ['azimuth', 'rotation']: data_y = np.cast[config.floatX](data_y / 360.) # retrieve only subset of data data_y = data_y[MNISTPlus.idx[which_set]] # create view converting for retrieving topological view view_converter = dense_design_matrix.DefaultViewConverter((48, 48)) # init the super class super(MNISTPlus, self).__init__(X=data_x, y=data_y, y_labels=np.max(data_y) + 1, view_converter=view_converter) assert not contains_nan(self.X)
def __init__(self, which_set, center=False): assert which_set in ['train', 'test'] path = "${PYLEARN2_DATA_PATH}/cifar100/cifar-100-python/" + which_set obj = serial.load(path) X = obj['data'] assert X.max() == 255. assert X.min() == 0. X = N.cast['float32'](X) y = None #not implemented yet if center: X -= 127.5 view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3)) super(CIFAR100, self).__init__(X=X, y=y, view_converter=view_converter) assert not N.any(N.isnan(self.X)) self.y_fine = N.asarray(obj['fine_labels']) self.y_coarse = N.asarray(obj['coarse_labels'])
def __init__(self, which_set, center=False, one_hot=False): path = "${PYLEARN2_DATA_PATH}/mnist/mnist_rotation_back_image/" \ + which_set obj = serial.load(path) X = obj['data'] X = N.cast['float32'](X) y = N.asarray(obj['labels']) self.one_hot = one_hot if one_hot: one_hot = N.zeros((y.shape[0], 10), dtype='float32') for i in xrange(y.shape[0]): one_hot[i, y[i]] = 1. y = one_hot if center: X -= X.mean(axis=0) view_converter = dense_design_matrix.DefaultViewConverter((28, 28, 1)) super(MNIST_rotated_background, self).__init__(X=X, y=y, view_converter=view_converter) assert not N.any(N.isnan(self.X))
def __init__(self, which_set, one_hot=False): assert which_set in ['train', 'valid', 'test'] data = icml07.icml07_loaders() data = data['rectangles'] data_x, data_y = data.load_from_numpy() if which_set == 'train': data_x = data_x[:1000] data_y = data_y[:1000] elif which_set == 'valid': data_x = data_x[1000:1000 + 200] data_y = data_y[1000:1000 + 200] else: data_x = data_x[1000 + 200:1000 + 200 + 50000] data_y = data_y[1000 + 200:1000 + 200 + 50000] assert data_x.shape[0] == data_y.shape[0] self.one_hot = one_hot if one_hot: one_hot = numpy.zeros((data_y.shape[0], 2), dtype='float32') for i in xrange(data_y.shape[0]): one_hot[i, data_y[i]] = 1. data_y = one_hot view_converter = dense_design_matrix.DefaultViewConverter((28, 28, 1)) super(Rectangles, self).__init__(X=data_x, y=data_y, view_converter=view_converter) assert not numpy.any(numpy.isnan(self.X))
def __init__(self, which_set, center=False, multi_target=False): """ :param which_set: one of ['train','test'] :param center: data is in range [0,256], center=True subtracts 127.5. :param multi_target: load extra information as additional labels. """ assert which_set in ['train', 'test'] X = NORBSmall.load(which_set, 'dat') # put things in pylearn2's DenseDesignMatrix format X = numpy.cast['float32'](X) X = X.reshape(-1, 2 * 96 * 96) #this is uint8 y = NORBSmall.load(which_set, 'cat') if multi_target: y_extra = NORBSmall.load(which_set, 'info') y = numpy.hstack((y[:, numpy.newaxis], y_extra)) if center: X -= 127.5 view_converter = dense_design_matrix.DefaultViewConverter((96, 96, 2)) super(NORBSmall, self).__init__(X=X, y=y, view_converter=view_converter)
def __init__(self, which_set, which_experiment, start=None, stop=None, axes=('b', 0, 1, 'c'), preprocessor = None): # note: there is no such thing as the cifar10 validation set; # pylearn1 defined one but really it should be user-configurable # (as it is here) assert which_set in ['train', 'test'] assert which_experiment in ['S100', 'ADD3_10_S100', 'ADD3_10_S250', 'ADD3_ALL_S100', 'RM3_S100', 'RP3_S100'] self.experiment = which_experiment data_dir = string_utils.preprocess('${PYLEARN2_DATA_PATH}') experiment_folder_string = "experiment_"+string.lower(which_experiment) path = os.path.join(data_dir,"cifar10",experiment_folder_string,which_set+".pkl") meta_path = os.path.join(data_dir,"cifar10",experiment_folder_string,"meta") self.axes = axes # we also expose the following details: self.img_shape = (3, 32, 32) self.img_size = numpy.prod(self.img_shape) meta = serial.load(meta_path) #self.label_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] self.label_names = meta['label_names'] self.n_classes = len(self.label_names) obj = serial.load(path) X = obj['data'] if(which_set == 'train'): ntrain = X.shape[0] if(which_set == 'test'): ntest = X.shape[0] assert X.max() == 255. assert X.min() == 0. X = numpy.cast['float32'](X) y = numpy.asarray(obj['labels']).astype('uint8') if which_set == 'test': y = y.reshape((y.shape[0], 1)) if start is not None: # This needs to come after the prepro so that it doesn't # change the pixel means computed above for toronto_prepro assert start >= 0 assert stop > start assert stop <= X.shape[0] X = X[start:stop, :] y = y[start:stop, :] assert X.shape[0] == y.shape[0] view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3), axes) super(Experiment, self).__init__(X=X, y=y, y_labels=self.n_classes, view_converter=view_converter, axes=self.axes) assert not contains_nan(self.X)
def __init__(self): view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3)) super(DebugDataset, self).__init__(X=N.asarray([[1.0, 0.0], [0.0, 1.0]]), view_converter=view_converter) assert not N.any(N.isnan(self.X))
def __init__(self): X = 1. - N.load("/data/lisa/data/wiskott/wiskott_fish_layer0_15_standard_64x64_shuffled.npy") view_converter = dense_design_matrix.DefaultViewConverter((64,64,1)) super(Wiskott,self).__init__(X = X, view_converter = view_converter) assert not N.any(N.isnan(self.X))
def __init__(self, which_set, one_hot=False, axes=['b', 0, 1, 'c']): """ .. todo:: WRITEME """ self.args = locals() assert which_set in self.data_split.keys() path = serial.preprocess( "${PYLEARN2_DATA_PATH}/ocr_letters/letter.data") with open(path, 'r') as data_f: data = data_f.readlines() data = [line.split("\t") for line in data] data_x = [map(int, item[6:-1]) for item in data] data_letters = [item[1] for item in data] data_fold = [int(item[5]) for item in data] letters = list(numpy.unique(data_letters)) data_y = [letters.index(item) for item in data_letters] if which_set == 'train': split = slice(0, self.data_split['train']) elif which_set == 'valid': split = slice(self.data_split['train'], self.data_split['train'] + self.data_split['valid']) elif which_set == 'test': split = slice(self.data_split['train'] + self.data_split['valid'], (self.data_split['train'] + self.data_split['valid'] + self.data_split['test'])) data_x = numpy.asarray(data_x[split]) data_y = numpy.asarray(data_y[split]) data_fold = numpy.asarray(data_y[split]) assert data_x.shape[0] == data_y.shape[0] assert data_x.shape[0] == self.data_split[which_set] self.one_hot = one_hot if one_hot: one_hot = numpy.zeros( (data_y.shape[0], len(letters)), dtype='float32') for i in xrange(data_y.shape[0]): one_hot[i, data_y[i]] = 1. data_y = one_hot view_converter = dense_design_matrix.DefaultViewConverter( (16, 8, 1), axes) super(OCR, self).__init__( X=data_x, y=data_y, view_converter=view_converter) assert not contains_nan(self.X) self.fold = data_fold
def __init__(self): path = "${PYLEARN2_DATA_PATH}/wiskott/wiskott"\ + "_fish_layer0_15_standard_64x64_shuffled.npy" X = 1. - load(path) view_converter = dense_design_matrix.DefaultViewConverter((64, 64, 1)) super(Wiskott, self).__init__(X=X, view_converter=view_converter) assert not N.any(N.isnan(self.X))
def __init__(self, which_set): conf = utils.get_config() paths = utils.get_paths() region_size = conf['region_size'] self.h5file = tables.open_file(paths[which_set]) node = self.h5file.root.Data axes = ('b', 0, 1, 'c') channels = node.X.shape[1] / (region_size * region_size) view_converter = dense_design_matrix.DefaultViewConverter( (region_size, region_size, channels), axes) super(BCDR, self).__init__( X=node.X, view_converter=view_converter, y=node.y)
def __init__(self, start=None, stop=None, shuffle=False, rng=None, seed=132987, center=False, scale=False, axes=('b', 0, 1, 'c'), preprocessor=None, which_ds='kaggle'): data_x, data_y = self.load_data(which=which_ds, center=center, scale=scale) tfd = TFD('train', one_hot=1, scale=scale) data_x = np.concatenate((data_x, tfd.X)) data_y = np.concatenate((data_y, tfd.y)) tfd = TFD('valid', one_hot=1, scale=scale) data_x = np.concatenate((data_x, tfd.X)) data_y = np.concatenate((data_y, tfd.y)) if shuffle: rng = rng if rng else np.random.RandomState(seed) rand_idx = rng.permutation(len(data_x)) data_x = data_x[rand_idx] data_y = data_y[rand_idx] if start is not None or stop is not None: if start is None: start = 0 else: assert start >= 0 if stop is None: stop = -1 if stop != -1: assert stop > start data_x = data_x[start:stop] data_y = data_y[start:stop] if center: data_x -= 0.5 self.axes = axes view_converter = dense_design_matrix.DefaultViewConverter((48, 48, 1), axes) super(GoogleTFDDataset, self).__init__(X=data_x, y=data_y, view_converter=view_converter) assert not np.any(np.isnan(self.X)) if preprocessor is not None: preprocessor.apply(self)
def __init__(self, which_set=None, file=None, center = False, rescale = False, gcn = None, one_hot = False, start = None, stop = None, axes=('b', 0, 1, 'c'), toronto_prepro = False, preprocessor = None): # note: there is no such thing as the cifar10 validation set; # pylearn1 defined one but really it should be user-configurable # (as it is here) self.axes = axes # we define here: dtype = 'uint8' ntrain = 0 nvalid = 0 # artefact, we won't use it ntest = 300000 # we also expose the following details: self.img_shape = (3,32,32) self.img_size = N.prod(self.img_shape) #self.n_classes = 10 #self.label_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', #'dog', 'frog','horse','ship','truck'] # prepare loading #fnames = ['data_batch_%i' % i for i in range(1,6)] #lenx = N.ceil((ntrain + nvalid) / 10000.)*10000 #x = N.zeros((lenx,self.img_size), dtype=dtype) #y = N.zeros(lenx, dtype=dtype) X=np.load(file).astype(np.float32) # load train data if center: X -= 127.5 self.center = center if rescale: X /= 127.5 self.rescale = rescale self.toronto_prepro = toronto_prepro self.gcn = gcn if gcn is not None: gcn = float(gcn) X = global_contrast_normalize(X, scale=gcn) view_converter = dense_design_matrix.DefaultViewConverter((32,32,3), axes) super(CIFAR10_TEST, self).__init__(X=X, view_converter=view_converter) assert not np.any(np.isnan(self.X)) if preprocessor: preprocessor.apply(self)
def __init__(self, which_set, center=False, scale=False, start=None, stop=None, axes=('b', 0, 1, 'c'), preprocessor=None): """ A version of SVHN dataset that loads everything into the memory instead of using pytables. """ assert which_set in self.mapper.keys() self.__dict__.update(locals()) del self.self path = '${PYLEARN2_DATA_PATH}/SVHN/format2/' # load data path = preprocess(path) data_x, data_y = self.make_data(which_set, path) # rescale or center if permitted if center and scale: data_x -= 127.5 data_x /= 127.5 elif center: data_x -= 127.5 elif scale: data_x /= 255. view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3), axes) super(SVHN_On_Memory, self).__init__(X=data_x, y=data_y, view_converter=view_converter) if preprocessor: if which_set in ['train', 'train_all', 'splitted_train']: can_fit = True else: can_fit = False preprocessor.apply(self, can_fit) del data_x, data_y gc.collect()
def __init__(self, which_set, center = False): path = "${PYLEARN2_DATA_PATH}/mnist/mnist_rotation_back_image/"+which_set obj = serial.load(path) X = obj['data'] X = N.cast['float32'](X) y = N.asarray(obj['labels']) if center: X -= X.mean(axis=0) view_converter = dense_design_matrix.DefaultViewConverter((28,28,1)) super(MNIST,self).__init__(X = X, y = y, view_converter = view_converter) assert not N.any(N.isnan(self.X))
def __init__(self, start=None, stop=None, shuffle=False, rng=None, seed=132987, center=False, axes=('b', 0, 1, 'c'), preprocessor=None): path = "/data/lisa/data/faces/GoogleDataset/Clean/latest.pkl" data = serial.load(path) data_x = data[0] data_y = data[1] assert len(data_x) == len(data_y) if shuffle: rng = rng if rng else np.random.RandomState(seed) rand_idx = rng.permutation(len(data_x)) data_x = data_x[rand_idx] data_y = data_y[rand_idx] if start is not None or stop is not None: if start is None: start = 0 else: assert start >= 0 if stop is None: stop = -1 if stop != -1: assert stop > start data_x = data_x[start:stop] data_y = data_y[start:stop] if center: data_x -= 0.5 self.axes = axes view_converter = dense_design_matrix.DefaultViewConverter((48, 48, 1), axes) super(GoogleDataset, self).__init__(X=data_x, y=data_y, view_converter=view_converter) assert not np.any(np.isnan(self.X)) if preprocessor is not None: preprocessor.apply(self)
def __init__(self, path=None, start=None, stop=None, shuffle=True, rng=None, seed=132987, center=False, scale=False, axes=('b', 0, 1, 'c'), preprocessor=None, which_set='test'): if path is None: path = '/data/lisa/data/faces/EmotiW/preproc/' path = '/Tmp/zumerjer/' mode = 'r' else: mode = 'r' path = preprocess(path) #if which_set == 'valid': # which_set = 'val' file_n = "{}{}.h5".format(path, which_set) if os.path.isfile(file_n): make_new = False else: make_new = True if make_new: self.make_data(path, shuffle, rng, seed, which_set, start, stop) self.h5file = tables.openFile(file_n, mode=mode) data = self.h5file.getNode('/', "Data") if not make_new and (start != None or stop != None): raise ValueError("Ah ah") self.axes = axes view_converter = dense_design_matrix.DefaultViewConverter((96, 96, 3), axes) super(ComboDatasetPyTable, self).__init__(X=data.X, y=data.y, view_converter=view_converter) assert not np.any(np.isnan(self.X)) if preprocessor is not None: preprocessor.apply(self)
def test_zero_image(self): """ Test on zero-value image if cause any division by zero """ X = as_floatX(np.zeros((5, 32 * 32 * 3))) axes = ['b', 0, 1, 'c'] view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3), axes) dataset = DenseDesignMatrix(X=X, view_converter=view_converter) dataset.axes = axes preprocessor = LeCunLCN(img_shape=[32, 32]) dataset.apply_preprocessor(preprocessor) result = dataset.get_design_matrix() assert isfinite(result)
def __init__(self, which_set, center=False): #dear pylearn.datasets.cifar: there is no such thing as the cifar10 validation set. quit pretending that there is. orig = cifar10.cifar10(ntrain=50000, nvalid=0, ntest=10000) Xs = {'train': orig.train.x, 'test': orig.test.x} X = N.cast['float32'](Xs[which_set]) if center: X -= 127.5 view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3)) super(CIFAR10, self).__init__(X=X, view_converter=view_converter) assert not N.any(N.isnan(self.X))
def __init__(self, which_set, center=False): orig = icml07.MNIST_rotated_background(n_train=10000, n_valid=2000, n_test=10000) sets = {'train': orig.train, 'valid': orig.valid, 'test': orig.test} X = numpy.cast['float32'](sets[which_set].x) y = sets[which_set].y view_converter = dense_design_matrix.DefaultViewConverter((28, 28, 1)) super(MNIST_rotated_background, self).__init__(X=X, y=y, view_converter=view_converter) assert not numpy.any(numpy.isnan(self.X))
def test_channel(self): """ Test if works fine withe different number of channel as argument """ rng = np.random.RandomState([1, 2, 3]) X = as_floatX(rng.randn(5, 32 * 32 * 3)) axes = ['b', 0, 1, 'c'] view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3), axes) dataset = DenseDesignMatrix(X=X, view_converter=view_converter) dataset.axes = axes preprocessor = LeCunLCN(img_shape=[32, 32], channels=[1, 2]) dataset.apply_preprocessor(preprocessor) result = dataset.get_design_matrix() assert isfinite(result)
def __init__(self, which_set, center = False): #dear pylearn.datasets.MNIST: there is no such thing as the MNIST validation set. quit pretending that there is. orig = i_hate_python.train_valid_test(ntrain=60000,nvalid=0,ntest=10000) Xs = { 'train' : orig.train.x, 'test' : orig.test.x } X = N.cast['float32'](Xs[which_set]) if center: assert False view_converter = dense_design_matrix.DefaultViewConverter((28,28,1)) super(MNIST,self).__init__(X = X, view_converter = view_converter) assert not N.any(N.isnan(self.X))
def test_rgb_yuv(): """ Test on a random image if the per-processor loads and works without anyerror and doesn't result in any nan or inf values """ rng = np.random.RandomState([1, 2, 3]) X = as_floatX(rng.randn(5, 32 * 32 * 3)) axes = ['b', 0, 1, 'c'] view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3), axes) dataset = DenseDesignMatrix(X=X, view_converter=view_converter) dataset.axes = axes preprocessor = RGB_YUV() dataset.apply_preprocessor(preprocessor) result = dataset.get_design_matrix() assert isfinite(result)
def test_random_image(self): """ Test on a random image if the per-processor loads and works without anyerror and doesn't result in any nan or inf values """ rng = np.random.RandomState([1, 2, 3]) X = as_floatX(rng.randn(5, 32 * 32 * 3)) axes = ['b', 0, 1, 'c'] view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3), axes) dataset = DenseDesignMatrix(X=X, view_converter=view_converter) dataset.axes = axes preprocessor = LeCunLCN(img_shape=[32, 32]) dataset.apply_preprocessor(preprocessor) result = dataset.get_design_matrix() assert not np.any(np.isnan(result)) assert not np.any(np.isinf(result))
def __init__(self, which_set, multi_target=False): """ :param which_set: one of ['train', 'test'] :param multi_target: If True, each label is an integer labeling the image catergory. If False, each label is a vector: [category, instance, lighting, elevation, azimuth]. All labels are given as integers. Use the categories, elevation_degrees, and azimuth_degrees arrays to map from these integers to actual values. :param multi_target: If False, labels will be integers indicating object category. If True, labels will be vectors of integers, indicating [ category, instance, elevation, azimuth, lighting ]. """ assert which_set in ['train', 'test'] self.which_set = which_set X = SmallNORB.load(which_set, 'dat') # Casts to the GPU-supported float type, using theano._asarray(), a # safer alternative to numpy.asarray(). X = theano._asarray(X, theano.config.floatX) # Formats data as rows in a matrix, for DenseDesignMatrix X = X.reshape(-1, 2*96*96) # This is uint8 y = SmallNORB.load(which_set, 'cat') if multi_target: y_extra = SmallNORB.load(which_set, 'info') y = numpy.hstack((y[:, numpy.newaxis], y_extra)) view_converter = dense_design_matrix.DefaultViewConverter((2, 96, 96)) # TODO: let labels be accessible by key, like y.category, y.elevation, # etc. super(SmallNORB, self).__init__(X=X, y=y, view_converter=view_converter)
def __init__(self, which_set, center = False): if which_set not in ['train','test']: if which_set == 'valid': raise ValueError("There is no such thing as the MNIST " "validation set. MNIST consists of 60,000 train examples and 10,000 test" " examples. If you wish to use a validation set you should divide the train " "set yourself. The pylearn2 dataset implements and will only ever implement " "the standard train / test split used in the literature.") raise ValueError('Unrecognized which_set value "%s".' % (which_set,)+'". Valid values are ["train","test"].') path = "${PYLEARN2_DATA_PATH}/mnist/mnist-python/%s.pkl" % which_set obj = serial.load(path) X = obj['data'] X = N.cast['float32'](X) y = N.asarray(obj['labels']) assert len(X.shape) == 2 assert X.shape[1] == 784 if which_set == 'train': assert X.shape[0] == 60000 elif which_set == 'test': assert X.shape[0] == 10000 else: assert False if center: X -= X.mean(axis=0) view_converter = dense_design_matrix.DefaultViewConverter((28,28,1)) super(MNIST,self).__init__(X = X, y = y, view_converter = view_converter) assert not N.any(N.isnan(self.X))
def __init__(self, which_set, center=False, multi_target=False): assert which_set in ['train', 'test'] X = NORBSmall.load(which_set, 'dat') # put things in pylearn2's DenseDesignMatrix format X = np.cast['float32'](X) X = X.reshape(-1, 2 * 96 * 96) # this is uint8 y = NORBSmall.load(which_set, 'cat') if multi_target: y_extra = NORBSmall.load(which_set, 'info') y = np.hstack((y[:, np.newaxis], y_extra)) if center: X -= 127.5 view_converter = dense_design_matrix.DefaultViewConverter((96, 96, 2)) super(NORBSmall, self).__init__(X=X, y=y, y_labels=np.max(y) + 1, view_converter=view_converter)
def __init__(self, which_set, center=False, gcn=False, one_hot=False, seed=132987): assert which_set in ['Train', 'Val'] self.rng = numpy.random.RandomState(seed) self.which_set = which_set self.center = center self.gcn = gcn self.one_hot = one_hot (X, y), self.meta = load_all_frames(which_set) ## filter out pure-black images ### X = (X / 255).astype(config.floatX) y = y.astype(config.floatX) if gcn: goodidx = numpy.where(numpy.sum(X, axis=1) != 0) meanx = numpy.mean(X, axis=1)[:, None] stdx = numpy.std(X, axis=1)[:, None] X[goodidx] = (X[goodidx] - meanx[goodidx]) / stdx[goodidx] if center: X -= numpy.mean(X, axis=0) if one_hot: one_hot = numpy.zeros((y.shape[0], 7), dtype='float32') for i in xrange(y.shape[0]): one_hot[i, y[i]] = 1. y = one_hot view_converter = dense_design_matrix.DefaultViewConverter((48, 48, 1)) super(EmotiwFaces, self).__init__(X=X, y=y, view_converter=view_converter)
def __init__(self, which_set, multi_target=False): """ :param which_set: one of ['train', 'test'] :param multi_target: If True, each label is an integer labeling the image catergory. If False, each label is a vector: [category, instance, lighting, elevation, azimuth]. All labels are given as integers. Use the categories, elevation_degrees, and azimuth_degrees arrays to map from these integers to actual values. :param multi_target: If False, labels will be integers indicating object category. If True, labels will be vectors of integers, indicating [ category, instance, elevation, azimuth, lighting ]. """ assert which_set in ['train', 'test'] self.which_set = which_set X = SmallNORB.load(which_set, 'dat') # put things in pylearn2's DenseDesignMatrix format X = numpy.cast['float32'](X) X = X.reshape(-1, 2 * 96 * 96) #this is uint8 y = SmallNORB.load(which_set, 'cat') if multi_target: y_extra = SmallNORB.load(which_set, 'info') y = numpy.hstack((y[:, numpy.newaxis], y_extra)) view_converter = dense_design_matrix.DefaultViewConverter((2, 96, 96)) # TODO: let labels be accessible by key, like y.category, y.elevation, # etc. super(SmallNORB, self).__init__(X=X, y=y, view_converter=view_converter)
def __init__(self, which_set, center=False, gcn=None): assert which_set in ['train', 'test'] path = "${PYLEARN2_DATA_PATH}/cifar100/cifar-100-python/" + which_set obj = serial.load(path) X = obj['data'] assert X.max() == 255. assert X.min() == 0. X = N.cast['float32'](X) y = None #not implemented yet self.center = center if center: X -= 127.5 self.gcn = gcn if gcn is not None: assert isinstance(gcn, float) X = (X.T - X.mean(axis=1)).T X = (X.T / np.sqrt(np.square(X).sum(axis=1))).T X *= gcn view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3)) super(CIFAR100, self).__init__(X=X, y=y, view_converter=view_converter) assert not N.any(N.isnan(self.X)) self.y_fine = N.asarray(obj['fine_labels']) self.y_coarse = N.asarray(obj['coarse_labels']) self.y = self.y_fine