def get_training_subset(train_X, name): if name == 'omnivore': coarse_class = 11 num_ex = 40 else: coarse_class = 4 num_ex = 25 tlc = TL_Challenge(which_set='train') mask = tlc.y_coarse == coarse_class sub_X = train_X[mask, :] sub_y = tlc.y_fine[mask] assert sub_X.shape[0] == num_ex assert sub_y.shape == (num_ex, ) fold_indices = np.zeros((num_ex, num_ex - 1), dtype='uint16') idx_list = np.cast['uint16'](np.arange(1, num_ex + 1)) #mimic matlab format of stl10 for i in xrange(num_ex): mask = idx_list < i + 1 mask += idx_list >= (i + 1) + 1 fold_indices[i, :] = idx_list[mask] assert fold_indices.min() == 1 assert fold_indices.max() == num_ex return sub_X, sub_y, fold_indices
class TestTL_Challenge(unittest.TestCase): def setUp(self): skip_if_no_data() self.train = TL_Challenge(which_set='train') self.unlabeled = TL_Challenge(which_set='unlabeled') self.test = TL_Challenge(which_set='test') def test_topo(self): """Tests that a topological batch has 4 dimensions""" topo = self.train.get_batch_topo(1) assert topo.ndim == 4
def get_labels(): cifar100 = CIFAR100(which_set="train") train_y = cifar100.y_coarse assert train_y.shape == (50000, ) for i in xrange(50000): if train_y[i] not in [3, 4, 6, 7, 11, 12]: train_y[i] = 0 tlc = TL_Challenge(which_set='train') test_y = tlc.y_coarse return train_y, test_y
def get_labels_and_fold_indices(): tlc = TL_Challenge(which_set='train') train_y = tlc.y_fine assert train_y is not None fold_indices = np.zeros((120, 119), dtype='uint16') idx_list = np.cast['uint16'](np.arange(1, 121)) #mimic matlab format of stl10 for i in xrange(120): mask = idx_list < i + 1 mask += idx_list >= (i + 1) + 1 fold_indices[i, :] = idx_list[mask] assert fold_indices.min() == 1 assert fold_indices.max() == 120 return tlc.y_fine, tlc.y_coarse, fold_indices
def setUp(self): skip_if_no_data() self.train = TL_Challenge(which_set='train') self.unlabeled = TL_Challenge(which_set='unlabeled') self.test = TL_Challenge(which_set='test')
def test_topo(self): """Tests that a topological batch has 4 dimensions""" train = TL_Challenge(which_set='train') topo = train.get_batch_topo(1) assert topo.ndim == 4
def test_load(self): TL_Challenge(which_set='unlabeled') TL_Challenge(which_set='test')
from pylearn2.utils import serial from pylearn2.datasets.cifar100 import CIFAR100 from pylearn2.datasets.tl_challenge import TL_Challenge from pylearn2.datasets import preprocessing import os import numpy as np goodfeli_tmp = os.environ['GOODFELI_TMP'] train = CIFAR100(which_set="train") aug = TL_Challenge(which_set="unlabeled") aug2 = TL_Challenge(which_set="train") train.set_design_matrix(np.concatenate((train.X, aug.X, aug2.X), axis=0)) del aug del aug2 pipeline = preprocessing.Pipeline() pipeline.items.append( preprocessing.ExtractPatches(patch_shape=(6, 6), num_patches=2000000)) pipeline.items.append(preprocessing.GlobalContrastNormalization()) pipeline.items.append(preprocessing.ZCA()) train.apply_preprocessor(preprocessor=pipeline, can_fit=True) train.use_design_loc(goodfeli_tmp + '/tl_challenge_patches_2M_6x6_design.npy') serial.save(goodfeli_tmp + '/tl_challenge_patches_2M_6x6.pkl', train)