Ejemplo n.º 1
0
    def __init__(self,
                 which_set,
                 label_type=None,
                 azimuth=False,
                 rotation=False,
                 texture=False,
                 center=False,
                 contrast_normalize=False,
                 seed=132987):
        assert which_set in ['train', 'valid', 'test']
        assert label_type in [
            None, 'label', 'azimuth', 'rotation', 'texture_id'
        ]

        # load data
        fname = '${PYLEARN2_DATA_PATH}/mnistplus/mnistplus'
        if azimuth:
            fname += '_azi'
        if rotation:
            fname += '_rot'
        if texture:
            fname += '_tex'

        data = load(fname + '.pkl')

        # get images and cast to floatX
        data_x = np.cast[config.floatX](data['data'])
        data_x = data_x[MNISTPlus.idx[which_set]]

        if contrast_normalize:
            meanx = np.mean(data_x, axis=1)[:, None]
            stdx = np.std(data_x, axis=1)[:, None]
            data_x = (data_x - meanx) / stdx

        if center:
            data_x -= np.mean(data_x, axis=0)

        # get labels
        data_y = None
        if label_type is not None:

            data_y = data[label_type]

            # convert to float for performing regression
            if label_type in ['azimuth', 'rotation']:
                data_y = np.cast[config.floatX](data_y / 360.)

            # retrieve only subset of data
            data_y = data_y[MNISTPlus.idx[which_set]]

        # create view converting for retrieving topological view
        view_converter = dense_design_matrix.DefaultViewConverter((48, 48))

        # init the super class
        super(MNISTPlus, self).__init__(X=data_x,
                                        y=data_y,
                                        y_labels=np.max(data_y) + 1,
                                        view_converter=view_converter)

        assert not contains_nan(self.X)
Ejemplo n.º 2
0
    def __init__(self, which_set, center=False):

        assert which_set in ['train', 'test']

        path = "${PYLEARN2_DATA_PATH}/cifar100/cifar-100-python/" + which_set

        obj = serial.load(path)
        X = obj['data']

        assert X.max() == 255.
        assert X.min() == 0.

        X = N.cast['float32'](X)
        y = None  #not implemented yet

        if center:
            X -= 127.5

        view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3))

        super(CIFAR100, self).__init__(X=X, y=y, view_converter=view_converter)

        assert not N.any(N.isnan(self.X))

        self.y_fine = N.asarray(obj['fine_labels'])
        self.y_coarse = N.asarray(obj['coarse_labels'])
Ejemplo n.º 3
0
    def __init__(self, which_set, center=False, one_hot=False):
        path = "${PYLEARN2_DATA_PATH}/mnist/mnist_rotation_back_image/" \
            + which_set

        obj = serial.load(path)
        X = obj['data']
        X = N.cast['float32'](X)
        y = N.asarray(obj['labels'])

        self.one_hot = one_hot
        if one_hot:
            one_hot = N.zeros((y.shape[0], 10), dtype='float32')
            for i in xrange(y.shape[0]):
                one_hot[i, y[i]] = 1.
            y = one_hot

        if center:
            X -= X.mean(axis=0)

        view_converter = dense_design_matrix.DefaultViewConverter((28, 28, 1))

        super(MNIST_rotated_background,
              self).__init__(X=X, y=y, view_converter=view_converter)

        assert not N.any(N.isnan(self.X))
Ejemplo n.º 4
0
    def __init__(self, which_set, one_hot=False):

        assert which_set in ['train', 'valid', 'test']

        data = icml07.icml07_loaders()
        data = data['rectangles']
        data_x, data_y = data.load_from_numpy()

        if which_set == 'train':
            data_x = data_x[:1000]
            data_y = data_y[:1000]
        elif which_set == 'valid':
            data_x = data_x[1000:1000 + 200]
            data_y = data_y[1000:1000 + 200]
        else:
            data_x = data_x[1000 + 200:1000 + 200 + 50000]
            data_y = data_y[1000 + 200:1000 + 200 + 50000]

        assert data_x.shape[0] == data_y.shape[0]

        self.one_hot = one_hot
        if one_hot:
            one_hot = numpy.zeros((data_y.shape[0], 2), dtype='float32')
            for i in xrange(data_y.shape[0]):
                one_hot[i, data_y[i]] = 1.
            data_y = one_hot

        view_converter = dense_design_matrix.DefaultViewConverter((28, 28, 1))
        super(Rectangles, self).__init__(X=data_x,
                                         y=data_y,
                                         view_converter=view_converter)

        assert not numpy.any(numpy.isnan(self.X))
Ejemplo n.º 5
0
    def __init__(self, which_set, center=False, multi_target=False):
        """
        :param which_set: one of ['train','test']
        :param center: data is in range [0,256], center=True subtracts 127.5.
        :param multi_target: load extra information as additional labels.
        """
        assert which_set in ['train', 'test']

        X = NORBSmall.load(which_set, 'dat')

        # put things in pylearn2's DenseDesignMatrix format
        X = numpy.cast['float32'](X)
        X = X.reshape(-1, 2 * 96 * 96)

        #this is uint8
        y = NORBSmall.load(which_set, 'cat')
        if multi_target:
            y_extra = NORBSmall.load(which_set, 'info')
            y = numpy.hstack((y[:, numpy.newaxis], y_extra))

        if center:
            X -= 127.5

        view_converter = dense_design_matrix.DefaultViewConverter((96, 96, 2))

        super(NORBSmall, self).__init__(X=X,
                                        y=y,
                                        view_converter=view_converter)
Ejemplo n.º 6
0
    def __init__(self, which_set, which_experiment, start=None, stop=None, axes=('b', 0, 1, 'c'), preprocessor = None):
        # note: there is no such thing as the cifar10 validation set;
        # pylearn1 defined one but really it should be user-configurable
        # (as it is here)
        assert which_set in ['train', 'test']
        assert which_experiment in ['S100', 'ADD3_10_S100', 'ADD3_10_S250', 'ADD3_ALL_S100', 'RM3_S100', 'RP3_S100']
        self.experiment = which_experiment

        data_dir = string_utils.preprocess('${PYLEARN2_DATA_PATH}')
        experiment_folder_string = "experiment_"+string.lower(which_experiment)
        path = os.path.join(data_dir,"cifar10",experiment_folder_string,which_set+".pkl")
        meta_path = os.path.join(data_dir,"cifar10",experiment_folder_string,"meta")
        

        self.axes = axes

        # we also expose the following details:
        self.img_shape = (3, 32, 32)
        self.img_size = numpy.prod(self.img_shape)
        
        
        meta = serial.load(meta_path)
        #self.label_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
        self.label_names = meta['label_names']
        self.n_classes = len(self.label_names)

        

        obj = serial.load(path)
        X = obj['data']
        if(which_set == 'train'):
            ntrain = X.shape[0]
        if(which_set == 'test'):
            ntest = X.shape[0]

        assert X.max() == 255.
        assert X.min() == 0.

        X = numpy.cast['float32'](X)
        y = numpy.asarray(obj['labels']).astype('uint8')
        
        if which_set == 'test':
            y = y.reshape((y.shape[0], 1))

        if start is not None:
            # This needs to come after the prepro so that it doesn't
            # change the pixel means computed above for toronto_prepro
            assert start >= 0
            assert stop > start
            assert stop <= X.shape[0]
            X = X[start:stop, :]
            y = y[start:stop, :]
        assert X.shape[0] == y.shape[0]

        view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3), axes)

        super(Experiment, self).__init__(X=X, y=y, y_labels=self.n_classes, view_converter=view_converter, axes=self.axes)

        assert not contains_nan(self.X)
Ejemplo n.º 7
0
    def __init__(self):

        view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3))

        super(DebugDataset, self).__init__(X=N.asarray([[1.0, 0.0], [0.0,
                                                                     1.0]]),
                                           view_converter=view_converter)

        assert not N.any(N.isnan(self.X))
Ejemplo n.º 8
0
    def __init__(self):

        X = 1. - N.load("/data/lisa/data/wiskott/wiskott_fish_layer0_15_standard_64x64_shuffled.npy")


        view_converter = dense_design_matrix.DefaultViewConverter((64,64,1))

        super(Wiskott,self).__init__(X = X, view_converter = view_converter)

        assert not N.any(N.isnan(self.X))
Ejemplo n.º 9
0
    def __init__(self, which_set, one_hot=False, axes=['b', 0, 1, 'c']):
        """
        .. todo::

            WRITEME
        """
        self.args = locals()

        assert which_set in self.data_split.keys()

        path = serial.preprocess(
            "${PYLEARN2_DATA_PATH}/ocr_letters/letter.data")
        with open(path, 'r') as data_f:
            data = data_f.readlines()
            data = [line.split("\t") for line in data]

        data_x = [map(int, item[6:-1]) for item in data]
        data_letters = [item[1] for item in data]
        data_fold = [int(item[5]) for item in data]

        letters = list(numpy.unique(data_letters))
        data_y = [letters.index(item) for item in data_letters]

        if which_set == 'train':
            split = slice(0, self.data_split['train'])
        elif which_set == 'valid':
            split = slice(self.data_split['train'], self.data_split['train'] +
                          self.data_split['valid'])
        elif which_set == 'test':
            split = slice(self.data_split['train'] + self.data_split['valid'],
                          (self.data_split['train'] +
                           self.data_split['valid'] +
                           self.data_split['test']))

        data_x = numpy.asarray(data_x[split])
        data_y = numpy.asarray(data_y[split])
        data_fold = numpy.asarray(data_y[split])
        assert data_x.shape[0] == data_y.shape[0]
        assert data_x.shape[0] == self.data_split[which_set]

        self.one_hot = one_hot
        if one_hot:
            one_hot = numpy.zeros(
                (data_y.shape[0], len(letters)), dtype='float32')
            for i in xrange(data_y.shape[0]):
                one_hot[i, data_y[i]] = 1.
            data_y = one_hot

        view_converter = dense_design_matrix.DefaultViewConverter(
            (16, 8, 1), axes)
        super(OCR, self).__init__(
            X=data_x, y=data_y, view_converter=view_converter)

        assert not contains_nan(self.X)
        self.fold = data_fold
Ejemplo n.º 10
0
    def __init__(self):
        path = "${PYLEARN2_DATA_PATH}/wiskott/wiskott"\
             + "_fish_layer0_15_standard_64x64_shuffled.npy"

        X = 1. - load(path)

        view_converter = dense_design_matrix.DefaultViewConverter((64, 64, 1))

        super(Wiskott, self).__init__(X=X, view_converter=view_converter)

        assert not N.any(N.isnan(self.X))
Ejemplo n.º 11
0
 def __init__(self, which_set):
     conf = utils.get_config()
     paths = utils.get_paths()
     region_size = conf['region_size']
     self.h5file = tables.open_file(paths[which_set])
     node = self.h5file.root.Data
     axes = ('b', 0, 1, 'c')
     channels = node.X.shape[1] / (region_size * region_size)
     view_converter = dense_design_matrix.DefaultViewConverter(
         (region_size, region_size, channels), axes)
     super(BCDR, self).__init__(
         X=node.X, view_converter=view_converter, y=node.y)
Ejemplo n.º 12
0
    def __init__(self,
                 start=None,
                 stop=None,
                 shuffle=False,
                 rng=None,
                 seed=132987,
                 center=False,
                 scale=False,
                 axes=('b', 0, 1, 'c'),
                 preprocessor=None,
                 which_ds='kaggle'):

        data_x, data_y = self.load_data(which=which_ds,
                                        center=center,
                                        scale=scale)
        tfd = TFD('train', one_hot=1, scale=scale)
        data_x = np.concatenate((data_x, tfd.X))
        data_y = np.concatenate((data_y, tfd.y))
        tfd = TFD('valid', one_hot=1, scale=scale)
        data_x = np.concatenate((data_x, tfd.X))
        data_y = np.concatenate((data_y, tfd.y))

        if shuffle:
            rng = rng if rng else np.random.RandomState(seed)
            rand_idx = rng.permutation(len(data_x))
            data_x = data_x[rand_idx]
            data_y = data_y[rand_idx]

        if start is not None or stop is not None:
            if start is None:
                start = 0
            else:
                assert start >= 0
            if stop is None:
                stop = -1
            if stop != -1:
                assert stop > start
            data_x = data_x[start:stop]
            data_y = data_y[start:stop]

        if center:
            data_x -= 0.5

        self.axes = axes
        view_converter = dense_design_matrix.DefaultViewConverter((48, 48, 1),
                                                                  axes)
        super(GoogleTFDDataset, self).__init__(X=data_x,
                                               y=data_y,
                                               view_converter=view_converter)
        assert not np.any(np.isnan(self.X))

        if preprocessor is not None:
            preprocessor.apply(self)
Ejemplo n.º 13
0
    def __init__(self, which_set=None, file=None, center = False, rescale = False, gcn = None, one_hot = False, start = None, stop = None, axes=('b', 0, 1, 'c'), toronto_prepro = False, preprocessor = None):
        # note: there is no such thing as the cifar10 validation set;
        # pylearn1 defined one but really it should be user-configurable
        # (as it is here)

        self.axes = axes

        # we define here:
        dtype  = 'uint8'
        ntrain = 0
        nvalid = 0  # artefact, we won't use it
        ntest  = 300000

        # we also expose the following details:
        self.img_shape = (3,32,32)
        self.img_size = N.prod(self.img_shape)
        #self.n_classes = 10
        #self.label_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
                            #'dog', 'frog','horse','ship','truck']
        # prepare loading
        #fnames = ['data_batch_%i' % i for i in range(1,6)]
        #lenx = N.ceil((ntrain + nvalid) / 10000.)*10000
        #x = N.zeros((lenx,self.img_size), dtype=dtype)
        #y = N.zeros(lenx, dtype=dtype)
        X=np.load(file).astype(np.float32)
        # load train data

        if center:
            X -= 127.5
        self.center = center

        if rescale:
            X /= 127.5
        self.rescale = rescale

        self.toronto_prepro = toronto_prepro

        self.gcn = gcn
        if gcn is not None:
            gcn = float(gcn)
            X = global_contrast_normalize(X, scale=gcn)

        view_converter = dense_design_matrix.DefaultViewConverter((32,32,3), axes)

        super(CIFAR10_TEST, self).__init__(X=X, view_converter=view_converter)

        assert not np.any(np.isnan(self.X))

        if preprocessor:
            preprocessor.apply(self)
Ejemplo n.º 14
0
    def __init__(self,
                 which_set,
                 center=False,
                 scale=False,
                 start=None,
                 stop=None,
                 axes=('b', 0, 1, 'c'),
                 preprocessor=None):
        """
        A version of SVHN dataset that loads everything into the memory
        instead of using pytables.
        """

        assert which_set in self.mapper.keys()

        self.__dict__.update(locals())
        del self.self

        path = '${PYLEARN2_DATA_PATH}/SVHN/format2/'

        # load data
        path = preprocess(path)
        data_x, data_y = self.make_data(which_set, path)

        # rescale or center if permitted
        if center and scale:
            data_x -= 127.5
            data_x /= 127.5
        elif center:
            data_x -= 127.5
        elif scale:
            data_x /= 255.

        view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3),
                                                                  axes)
        super(SVHN_On_Memory, self).__init__(X=data_x,
                                             y=data_y,
                                             view_converter=view_converter)

        if preprocessor:
            if which_set in ['train', 'train_all', 'splitted_train']:
                can_fit = True
            else:
                can_fit = False
            preprocessor.apply(self, can_fit)

        del data_x, data_y
        gc.collect()
Ejemplo n.º 15
0
    def __init__(self, which_set, center = False):
        path = "${PYLEARN2_DATA_PATH}/mnist/mnist_rotation_back_image/"+which_set

        obj = serial.load(path)
        X = obj['data']
        X = N.cast['float32'](X)
        y = N.asarray(obj['labels'])

        if center:
            X -= X.mean(axis=0)

        view_converter = dense_design_matrix.DefaultViewConverter((28,28,1))

        super(MNIST,self).__init__(X = X, y = y, view_converter = view_converter)

        assert not N.any(N.isnan(self.X))
Ejemplo n.º 16
0
    def __init__(self,
                 start=None,
                 stop=None,
                 shuffle=False,
                 rng=None,
                 seed=132987,
                 center=False,
                 axes=('b', 0, 1, 'c'),
                 preprocessor=None):

        path = "/data/lisa/data/faces/GoogleDataset/Clean/latest.pkl"
        data = serial.load(path)
        data_x = data[0]
        data_y = data[1]
        assert len(data_x) == len(data_y)

        if shuffle:
            rng = rng if rng else np.random.RandomState(seed)
            rand_idx = rng.permutation(len(data_x))
            data_x = data_x[rand_idx]
            data_y = data_y[rand_idx]

        if start is not None or stop is not None:
            if start is None:
                start = 0
            else:
                assert start >= 0
            if stop is None:
                stop = -1
            if stop != -1:
                assert stop > start
            data_x = data_x[start:stop]
            data_y = data_y[start:stop]

        if center:
            data_x -= 0.5

        self.axes = axes
        view_converter = dense_design_matrix.DefaultViewConverter((48, 48, 1),
                                                                  axes)
        super(GoogleDataset, self).__init__(X=data_x,
                                            y=data_y,
                                            view_converter=view_converter)
        assert not np.any(np.isnan(self.X))

        if preprocessor is not None:
            preprocessor.apply(self)
Ejemplo n.º 17
0
    def __init__(self,
                 path=None,
                 start=None,
                 stop=None,
                 shuffle=True,
                 rng=None,
                 seed=132987,
                 center=False,
                 scale=False,
                 axes=('b', 0, 1, 'c'),
                 preprocessor=None,
                 which_set='test'):

        if path is None:
            path = '/data/lisa/data/faces/EmotiW/preproc/'
            path = '/Tmp/zumerjer/'
            mode = 'r'
        else:
            mode = 'r'

        path = preprocess(path)
        #if which_set == 'valid':
        #    which_set = 'val'
        file_n = "{}{}.h5".format(path, which_set)
        if os.path.isfile(file_n):
            make_new = False
        else:
            make_new = True

        if make_new:
            self.make_data(path, shuffle, rng, seed, which_set, start, stop)

        self.h5file = tables.openFile(file_n, mode=mode)
        data = self.h5file.getNode('/', "Data")

        if not make_new and (start != None or stop != None):
            raise ValueError("Ah ah")

        self.axes = axes
        view_converter = dense_design_matrix.DefaultViewConverter((96, 96, 3),
                                                                  axes)
        super(ComboDatasetPyTable,
              self).__init__(X=data.X, y=data.y, view_converter=view_converter)
        assert not np.any(np.isnan(self.X))

        if preprocessor is not None:
            preprocessor.apply(self)
Ejemplo n.º 18
0
    def test_zero_image(self):
        """
        Test on zero-value image if cause any division by zero
        """

        X = as_floatX(np.zeros((5, 32 * 32 * 3)))

        axes = ['b', 0, 1, 'c']
        view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3),
                                                                  axes)
        dataset = DenseDesignMatrix(X=X, view_converter=view_converter)
        dataset.axes = axes
        preprocessor = LeCunLCN(img_shape=[32, 32])
        dataset.apply_preprocessor(preprocessor)
        result = dataset.get_design_matrix()

        assert isfinite(result)
Ejemplo n.º 19
0
    def __init__(self, which_set, center=False):

        #dear pylearn.datasets.cifar: there is no such thing as the cifar10 validation set. quit pretending that there is.
        orig = cifar10.cifar10(ntrain=50000, nvalid=0, ntest=10000)

        Xs = {'train': orig.train.x, 'test': orig.test.x}

        X = N.cast['float32'](Xs[which_set])

        if center:
            X -= 127.5

        view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3))

        super(CIFAR10, self).__init__(X=X, view_converter=view_converter)

        assert not N.any(N.isnan(self.X))
Ejemplo n.º 20
0
    def __init__(self, which_set, center=False):

        orig = icml07.MNIST_rotated_background(n_train=10000,
                                               n_valid=2000,
                                               n_test=10000)

        sets = {'train': orig.train, 'valid': orig.valid, 'test': orig.test}

        X = numpy.cast['float32'](sets[which_set].x)
        y = sets[which_set].y

        view_converter = dense_design_matrix.DefaultViewConverter((28, 28, 1))

        super(MNIST_rotated_background,
              self).__init__(X=X, y=y, view_converter=view_converter)

        assert not numpy.any(numpy.isnan(self.X))
Ejemplo n.º 21
0
    def test_channel(self):
        """
        Test if works fine withe different number of channel as argument
        """

        rng = np.random.RandomState([1, 2, 3])
        X = as_floatX(rng.randn(5, 32 * 32 * 3))

        axes = ['b', 0, 1, 'c']
        view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3),
                                                                  axes)
        dataset = DenseDesignMatrix(X=X, view_converter=view_converter)
        dataset.axes = axes
        preprocessor = LeCunLCN(img_shape=[32, 32], channels=[1, 2])
        dataset.apply_preprocessor(preprocessor)
        result = dataset.get_design_matrix()

        assert isfinite(result)
Ejemplo n.º 22
0
    def __init__(self, which_set, center = False):

        #dear pylearn.datasets.MNIST: there is no such thing as the MNIST validation set. quit pretending that there is.
        orig = i_hate_python.train_valid_test(ntrain=60000,nvalid=0,ntest=10000)

        Xs = {
                'train' : orig.train.x,
                'test'  : orig.test.x
            }

        X = N.cast['float32'](Xs[which_set])

        if center:
            assert False

        view_converter = dense_design_matrix.DefaultViewConverter((28,28,1))

        super(MNIST,self).__init__(X = X, view_converter = view_converter)

        assert not N.any(N.isnan(self.X))
Ejemplo n.º 23
0
def test_rgb_yuv():
    """
    Test on a random image if the per-processor loads and works without
    anyerror and doesn't result in any nan or inf values

    """

    rng = np.random.RandomState([1, 2, 3])
    X = as_floatX(rng.randn(5, 32 * 32 * 3))

    axes = ['b', 0, 1, 'c']
    view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3),
                                                              axes)
    dataset = DenseDesignMatrix(X=X, view_converter=view_converter)
    dataset.axes = axes
    preprocessor = RGB_YUV()
    dataset.apply_preprocessor(preprocessor)
    result = dataset.get_design_matrix()

    assert isfinite(result)
Ejemplo n.º 24
0
    def test_random_image(self):
        """
        Test on a random image if the per-processor loads and works without
        anyerror and doesn't result in any nan or inf values

        """

        rng = np.random.RandomState([1, 2, 3])
        X = as_floatX(rng.randn(5, 32 * 32 * 3))

        axes = ['b', 0, 1, 'c']
        view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3),
                                                                  axes)
        dataset = DenseDesignMatrix(X=X, view_converter=view_converter)
        dataset.axes = axes
        preprocessor = LeCunLCN(img_shape=[32, 32])
        dataset.apply_preprocessor(preprocessor)
        result = dataset.get_design_matrix()

        assert not np.any(np.isnan(result))
        assert not np.any(np.isinf(result))
Ejemplo n.º 25
0
    def __init__(self, which_set, multi_target=False):
        """
        :param which_set: one of ['train', 'test'] :param multi_target: If
        True, each label is an integer labeling the image catergory. If False,
        each label is a vector: [category, instance, lighting, elevation,
        azimuth]. All labels are given as integers. Use the categories,
        elevation_degrees, and azimuth_degrees arrays to map from these
        integers to actual values.

        :param multi_target: If False, labels will be integers indicating
        object category. If True, labels will be vectors of integers,
        indicating [ category, instance, elevation, azimuth, lighting ].
        """

        assert which_set in ['train', 'test']

        self.which_set = which_set

        X = SmallNORB.load(which_set, 'dat')

        # Casts to the GPU-supported float type, using theano._asarray(), a
        # safer alternative to numpy.asarray().
        X = theano._asarray(X, theano.config.floatX)

        # Formats data as rows in a matrix, for DenseDesignMatrix
        X = X.reshape(-1, 2*96*96)

        # This is uint8
        y = SmallNORB.load(which_set, 'cat')
        if multi_target:
            y_extra = SmallNORB.load(which_set, 'info')
            y = numpy.hstack((y[:, numpy.newaxis], y_extra))

        view_converter = dense_design_matrix.DefaultViewConverter((2, 96, 96))

        # TODO: let labels be accessible by key, like y.category, y.elevation,
        # etc.
        super(SmallNORB, self).__init__(X=X,
                                        y=y,
                                        view_converter=view_converter)
Ejemplo n.º 26
0
    def __init__(self, which_set, center = False):

        if which_set not in ['train','test']:
            if which_set == 'valid':
                raise ValueError("There is no such thing as the MNIST "
"validation set. MNIST consists of 60,000 train examples and 10,000 test"
" examples. If you wish to use a validation set you should divide the train "
"set yourself. The pylearn2 dataset implements and will only ever implement "
"the standard train / test split used in the literature.")
            raise ValueError('Unrecognized which_set value "%s".' %
                    (which_set,)+'". Valid values are ["train","test"].')


        path = "${PYLEARN2_DATA_PATH}/mnist/mnist-python/%s.pkl" % which_set

        obj = serial.load(path)
        X = obj['data']
        X = N.cast['float32'](X)
        y = N.asarray(obj['labels'])

        assert len(X.shape) == 2
        assert X.shape[1] == 784

        if which_set == 'train':
            assert X.shape[0] == 60000
        elif which_set == 'test':
            assert X.shape[0] == 10000
        else:
            assert False


        if center:
            X -= X.mean(axis=0)

        view_converter = dense_design_matrix.DefaultViewConverter((28,28,1))

        super(MNIST,self).__init__(X = X, y = y, view_converter = view_converter)

        assert not N.any(N.isnan(self.X))
Ejemplo n.º 27
0
    def __init__(self, which_set, center=False, multi_target=False):
        assert which_set in ['train', 'test']

        X = NORBSmall.load(which_set, 'dat')

        # put things in pylearn2's DenseDesignMatrix format
        X = np.cast['float32'](X)
        X = X.reshape(-1, 2 * 96 * 96)

        # this is uint8
        y = NORBSmall.load(which_set, 'cat')
        if multi_target:
            y_extra = NORBSmall.load(which_set, 'info')
            y = np.hstack((y[:, np.newaxis], y_extra))

        if center:
            X -= 127.5

        view_converter = dense_design_matrix.DefaultViewConverter((96, 96, 2))

        super(NORBSmall, self).__init__(X=X, y=y, y_labels=np.max(y) + 1,
                                        view_converter=view_converter)
Ejemplo n.º 28
0
    def __init__(self,
                 which_set,
                 center=False,
                 gcn=False,
                 one_hot=False,
                 seed=132987):

        assert which_set in ['Train', 'Val']
        self.rng = numpy.random.RandomState(seed)
        self.which_set = which_set
        self.center = center
        self.gcn = gcn
        self.one_hot = one_hot

        (X, y), self.meta = load_all_frames(which_set)
        ## filter out pure-black images ###
        X = (X / 255).astype(config.floatX)
        y = y.astype(config.floatX)

        if gcn:
            goodidx = numpy.where(numpy.sum(X, axis=1) != 0)
            meanx = numpy.mean(X, axis=1)[:, None]
            stdx = numpy.std(X, axis=1)[:, None]
            X[goodidx] = (X[goodidx] - meanx[goodidx]) / stdx[goodidx]

        if center:
            X -= numpy.mean(X, axis=0)

        if one_hot:
            one_hot = numpy.zeros((y.shape[0], 7), dtype='float32')
            for i in xrange(y.shape[0]):
                one_hot[i, y[i]] = 1.
            y = one_hot

        view_converter = dense_design_matrix.DefaultViewConverter((48, 48, 1))
        super(EmotiwFaces, self).__init__(X=X,
                                          y=y,
                                          view_converter=view_converter)
Ejemplo n.º 29
0
    def __init__(self, which_set, multi_target=False):
        """
        :param which_set: one of ['train', 'test'] :param multi_target: If True,
        each label is an integer labeling the image catergory. If False, each
        label is a vector: [category, instance, lighting, elevation,
        azimuth]. All labels are given as integers. Use the categories,
        elevation_degrees, and azimuth_degrees arrays to map from these integers
        to actual values.

        :param multi_target: If False, labels will be integers indicating object
        category. If True, labels will be vectors of integers, indicating [
        category, instance, elevation, azimuth, lighting ].
        """

        assert which_set in ['train', 'test']

        self.which_set = which_set

        X = SmallNORB.load(which_set, 'dat')

        # put things in pylearn2's DenseDesignMatrix format
        X = numpy.cast['float32'](X)
        X = X.reshape(-1, 2 * 96 * 96)

        #this is uint8
        y = SmallNORB.load(which_set, 'cat')
        if multi_target:
            y_extra = SmallNORB.load(which_set, 'info')
            y = numpy.hstack((y[:, numpy.newaxis], y_extra))

        view_converter = dense_design_matrix.DefaultViewConverter((2, 96, 96))

        # TODO: let labels be accessible by key, like y.category, y.elevation,
        # etc.
        super(SmallNORB, self).__init__(X=X,
                                        y=y,
                                        view_converter=view_converter)
Ejemplo n.º 30
0
    def __init__(self, which_set, center=False, gcn=None):

        assert which_set in ['train', 'test']

        path = "${PYLEARN2_DATA_PATH}/cifar100/cifar-100-python/" + which_set

        obj = serial.load(path)
        X = obj['data']

        assert X.max() == 255.
        assert X.min() == 0.

        X = N.cast['float32'](X)
        y = None  #not implemented yet

        self.center = center

        if center:
            X -= 127.5

        self.gcn = gcn
        if gcn is not None:
            assert isinstance(gcn, float)
            X = (X.T - X.mean(axis=1)).T
            X = (X.T / np.sqrt(np.square(X).sum(axis=1))).T
            X *= gcn

        view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3))

        super(CIFAR100, self).__init__(X=X, y=y, view_converter=view_converter)

        assert not N.any(N.isnan(self.X))

        self.y_fine = N.asarray(obj['fine_labels'])
        self.y_coarse = N.asarray(obj['coarse_labels'])

        self.y = self.y_fine