Exemple #1
0
def load(start, stop, datadir='data/CK'):
    im_list = glob.glob(os.path.join(datadir, 'faces_aligned/*.png'))[start:]
    if not im_list:
        msg = ('No image files found in: %s' %
               os.path.realpath(os.path.join(datadir, 'faces_aligned')))
        log.error(msg)
        raise RuntimeException(msg)
    X = []
    y = []
    more_to_read = stop - start
    for im_file in im_list:
        if more_to_read <= 0:
            break
        label_base_pat = os.path.basename(im_file)[:9] + '*_emotion.txt'
        maybe_label_file = glob.glob(
            os.path.join(datadir, 'labels', label_base_pat))
        if maybe_label_file:
            y.append(read_label(maybe_label_file[0]))
            imdata = imread(im_file, False)
            imdata = cv2.resize(imdata, (32, 32))
            imdata = imdata.flatten().astype(np.float32) / 255
            X.append(imdata)
            more_to_read -= 1
    return DenseDesignMatrix(X=np.asarray(X),
                             y=np.asarray(y).reshape(-1, 1),
                             view_converter=DefaultViewConverter(
                                 (32, 32, 1), axes=('b', 0, 1, 'c')))
Exemple #2
0
def get_features(path, split, standardize):
    if path.find(',') != -1:
        paths = path.split(',')
        Xs = [get_features(subpath, split, standardize) for subpath in paths]
        X = np.concatenate(Xs, axis=1)
        return X

    if path.endswith('.npy'):
        topo_view = np.load(path)
    else:
        topo_view = serial.load(path)

        if str(type(topo_view)).find('h5py') != -1:
            name, = topo_view.keys()
            topo_view = topo_view[name].value.T

    if len(topo_view.shape) == 2:
        X = topo_view
    else:
        view_converter = DefaultViewConverter(topo_view.shape[1:])

        print 'converting data'
        X = view_converter.topo_view_to_design_mat(topo_view)

    if split:
        X = np.concatenate((np.abs(X), np.abs(-X)), axis=1)

    if standardize:
        assert False  #bug: if X is test set, we need to subtract train mean, divide by train std
        X -= X.mean(axis=0)
        X /= np.sqrt(.01 + np.var(X, axis=0))

    return X
Exemple #3
0
    def _transform_multi_channel_data(self, X, y):
        # Data partitioning
        parted_X, parted_y = self._partition_data(
            X=X, y=y, partition_size=self.window_size)
        transposed_X = np.transpose(parted_X, [0, 2, 1])
        converted_X = np.reshape(transposed_X,
                                 (transposed_X.shape[0], transposed_X.shape[1],
                                  1, transposed_X.shape[2]))

        # Create view converter
        view_converter = DefaultViewConverter(shape=self.sample_shape,
                                              axes=('b', 0, 1, 'c'))

        # Convert data into a design matrix
        view_converted_X = view_converter.topo_view_to_design_mat(converted_X)
        assert np.all(converted_X == view_converter.design_mat_to_topo_view(
            view_converted_X))

        # Format the target into proper format
        sum_y = np.sum(parted_y, axis=1)
        sum_y[sum_y > 0] = 1
        one_hot_formatter = OneHotFormatter(max_labels=self.n_classes)
        hot_y = one_hot_formatter.format(sum_y)

        return view_converted_X, hot_y, view_converter
Exemple #4
0
    def __init__(self, config, which_set='train'): #, standardize=True, pca_whitening=False, ncomponents=None, epsilon=3):

        keys = ['train', 'test', 'valid']
        assert which_set in keys

        # load hdf5 metadata
        self.hdf5       = tables.open_file( config['hdf5'], mode='r')
        data            = self.hdf5.get_node('/', 'Data')
        param           = self.hdf5.get_node('/', 'Param')
        self.file_index = param.file_index[0]
        self.file_dict  = param.file_dict[0]
        self.label_list = param.label_list[0]
        self.targets    = param.targets[0]
        self.nfft       = param.fft[0]['nfft']

        # load parition information
        self.support   = config[which_set]
        self.file_list = config[which_set+'_files']
        self.mean      = config['mean']        
        self.mean      = self.mean.reshape((np.prod(self.mean.shape),))
        self.var       = config['var']
        self.var       = self.var.reshape((np.prod(self.var.shape),))
        self.istd      = np.reciprocal(np.sqrt(self.var))
        self.mask      = (self.istd < 20)
        self.tframes   = config['tframes']

        if self.tframes > 1:
            view_converter = DefaultViewConverter((self.tframes, len(self.mean)/self.tframes, 1))
            super(AudioDataset, self).__init__(X=data.X, y=data.y,
                view_converter=view_converter)
        else:
            super(AudioDataset, self).__init__(X=data.X, y=data.y)
Exemple #5
0
    def set_topological_view(self, V, axes=('b', 0, 1, 'c'), start=0):
        """
        Sets the dataset to represent V, where V is a batch
        of topological views of examples.

        Parameters
        ----------
        V : ndarray
            An array containing a design matrix representation of training
            examples. If unspecified, the entire dataset (`self.X`) is used
            instead.
        TODO: why is this parameter named 'V'?
        """
        assert not numpy.any(numpy.isnan(V))
        rows = V.shape[axes.index(0)]
        cols = V.shape[axes.index(1)]
        channels = V.shape[axes.index('c')]
        self.view_converter = DefaultViewConverter([rows, cols, channels],
                                                   axes=axes)
        X = self.view_converter.topo_view_to_design_mat(V)
        assert not numpy.any(numpy.isnan(X))

        FaceBBoxDDMPytables.fill_hdf5(h5file=self.h5file,
                                      data_x=X,
                                      start=start)
 def __init__(self, axes=('c', 0, 1, 'b')):
     assert_contains([('c', 0, 1, 'b'), ('b', 0, 1, 'c')], axes)
     axes = list(axes)
     vc = DefaultViewConverter((5, 5, 2), axes=axes)
     rng = numpy.random.RandomState([2013, 3, 12])
     X = rng.normal(size=(4, 50)).astype('float32')
     super(DummyDataset, self).__init__(X=X, view_converter=vc, axes=axes)
    def __init__(self, which_set,
            base_path = '${PYLEARN2_DATA_PATH}/icml_2013_emotions',
            start = None,
            stop = None,
            preprocessor = None,
            fit_preprocessor = False,
            axes = ('b', 0, 1, 'c'),
            fit_test_preprocessor = False):
        """
        which_set: A string specifying which portion of the dataset
            to load. Valid values are 'train' or 'public_test'
        base_path: The directory containing the .csv files from kaggle.com.
                This directory should be writable; if the .csv files haven't
                already been converted to npy, this class will convert them
                to save memory the next time they are loaded.
        fit_preprocessor: True if the preprocessor is allowed to fit the
                   data.
        fit_test_preprocessor: If we construct a test set based on this
                    dataset, should it be allowed to fit the test set?
        """

        self.test_args = locals()
        self.test_args['which_set'] = 'public_test'
        self.test_args['fit_preprocessor'] = fit_test_preprocessor
        del self.test_args['start']
        del self.test_args['stop']
        del self.test_args['self']

        files = {'train': 'train.csv', 'public_test' : 'test.csv'}

        try:
            filename = files[which_set]
        except KeyError:
            raise ValueError("Unrecognized dataset name: " + which_set)

        path = base_path + '/' + filename

        path = preprocess(path)

        X, y = self._load_data(path, which_set == 'train')


        if start is not None:
            assert which_set != 'test'
            assert isinstance(start, int)
            assert isinstance(stop, int)
            assert start >= 0
            assert start < stop
            assert stop <= X.shape[0]
            X = X[start:stop, :]
            if y is not None:
                y = y[start:stop, :]

        view_converter = DefaultViewConverter(shape=[48,48,1], axes=axes)

        super(EmotionsDataset, self).__init__(X=X, y=y, view_converter=view_converter)

        if preprocessor:
            preprocessor.apply(self, can_fit=fit_preprocessor)
Exemple #8
0
    def set_topological_view(self, V, axes=('b', 0, 1, 'c')):
        """
        Sets the dataset to represent V, where V is a batch
        of topological views of examples.

        .. todo::

            Why is this parameter named 'V'?

        Parameters
        ----------
        V : ndarray
            An array containing a design matrix representation of
            training examples.
        axes : WRITEME
        """
        assert not contains_nan(V)
        rows = V.shape[axes.index(0)]
        cols = V.shape[axes.index(1)]
        channels = V.shape[axes.index('c')]
        self.view_converter = DefaultViewConverter([rows, cols, channels],
                                                   axes=axes)
        self.X = self.view_converter.topo_view_to_design_mat(V)
        # self.X_topo_space stores a "default" topological space that
        # will be used only when self.iterator is called without a
        # data_specs, and with "topo=True", which is deprecated.
        self.X_topo_space = self.view_converter.topo_space
        assert not contains_nan(self.X)

        # Update data specs
        X_space = VectorSpace(dim=self.X.shape[1])
        X_source = 'features'
        if self.y is None:
            space = X_space
            source = X_source
        else:
            if self.y.ndim == 1:
                dim = 1
            else:
                dim = self.y.shape[-1]
            # This is to support old pickled models
            if getattr(self, 'y_labels', None) is not None:
                y_space = IndexSpace(dim=dim, max_labels=self.y_labels)
            elif getattr(self, 'max_labels', None) is not None:
                y_space = IndexSpace(dim=dim, max_labels=self.max_labels)
            else:
                y_space = VectorSpace(dim=dim)
            y_source = 'targets'

            Latent_space = VectorSpace(dim=self.latent.shape[-1])
            Latent_source = 'latents'

            space = CompositeSpace((X_space, y_space,Latent_space))
            source = (X_source, y_source,Latent_source)

        self.data_specs = (space, source)
        self.X_space = X_space
        self._iter_data_specs = (X_space, X_source)
Exemple #9
0
    def __init__(self,
                 which_set='full',
                 path='train.mat',
                 one_hot=False,
                 colorspace='none',
                 step=1,
                 start=None,
                 stop=None,
                 center=False,
                 rescale=False,
                 gcn=None,
                 toronto_prepro=False,
                 axes=('b', 0, 1, 'c')):

        self.__dict__.update(locals())
        del self.self

        #
        #self.one_hot = one_hot
        #self.colorspace = colorspace
        #self.step=step
        #self.which_set=which_set

        self.view_converter = None

        self.path = preprocess(self.path)
        X, y = self._load_data()

        if center:
            X -= 127.5
        #self.center = center

        if rescale:
            X /= 127.5
        #self.rescale = rescale

        if toronto_prepro:
            assert not center
            assert not gcn
            X = X / 255.
            if which_set == 'test':
                other = MATDATA(which_set='train')
                oX = other.X
                oX /= 255.
                X = X - oX.mean(axis=0)
            else:
                X = X - X.mean(axis=0)
        #self.toronto_prepro = toronto_prepro

        #self.gcn = gcn
        if gcn is not None:
            gcn = float(gcn)
            X = global_contrast_normalize(X, scale=gcn, min_divisor=1e-8)

        view_converter = DefaultViewConverter(
            (self.windowSize, self.windowSize, self.channels), axes)

        super(MATDATA, self).__init__(X=X, y=y, view_converter=view_converter)
Exemple #10
0
    def __init__(self,
                 start=None,
                 stop=None,
                 axes=('b', 0, 1, 'c'),
                 stdev=0.8,
                 hack=None,
                 preproc='GCN'):
        #       self.translation_dict = OrderedDict({1: 'left_eyebrow_inner_end', 2: 'mouth_top_lip_bottom', 3: 'right_ear_canal', 4: 'right_ear_top', 5: 'mouth_top_lip', 6: 'mouth_bottom_lip_top', 7: 'right_eyebrow_center', 8: 'chin_left', 9: 'nose_tip', 10: 'left_eyebrow_center_top', 11: 'left_eye_outer_corner', 12: 'right_ear', 13: 'mouth_bottom_lip', 14: 'left_eye_center', 15: 'left_mouth_outer_corner', 16: 'left_eye_center_top', 17: 'left_ear_center', 18: 'nostrils_center', 19: 'right_eye_outer_corner', 20: 'right_eye_center_bottom', 21: 'chin_center', 22: 'left_eye_inner_corner', 23: 'right_mouth_outer_corner', 24: 'left_ear_bottom', 25: 'right_eye_center_top', 26: 'right_eyebrow_inner_end', 27: 'left_eyebrow_outer_end', 28: 'left_ear_top', 29: 'right_ear_center', 30: 'nose_center_top', 31: 'face_center', 32: 'right_eye_inner_corner', 33: 'right_eyebrow_center_top', 34: 'left_eyebrow_center', 35: 'right_eye_pupil', 36: 'right_ear_bottom', 37: 'mouth_left_corner', 38: 'left_eye_center_bottom', 39: 'left_eyebrow_center_bottom', 41: 'mouth_right_corner', 42: 'right_nostril', 43: 'right_eye_center', 44: 'chin_right', 45: 'right_eyebrow_outer_end', 46: 'left_eye_pupil', 47: 'mouth_center', 48: 'left_nostril', 49: 'right_eyebrow_center_bottom', 50: 'left_ear_canal', 51: 'left_ear', 52: 'face_right', 53: 'face_left'})

        self.name = hack
        self.stdev = stdev
        self.axes = axes
        self.pixels = numpy.arange(0, 96).reshape((1, 96))
        for i in xrange(len(keypoints_names) * 2 - 1):
            self.pixels = numpy.vstack(
                (self.pixels, numpy.arange(0, 96).reshape((1, 96))))

        #self.which_set = which_set
        if hack is not None:
            X = LazyMemmap(preprocess('/Tmp/aggarwal/EmotiW_' + preproc + '_' +
                                      hack + '.npy'),
                           dtype='float32',
                           mode='c')
        else:
            X = LazyMemmap(preprocess(
                '${PYLEARN2_DATA_PATH}/faces/hdf5/complete_train_x.npy'),
                           dtype='uint8',
                           mode='c')

        Y = LazyMemmap(preprocess(
            '${PYLEARN2_DATA_PATH}/faces/hdf5/complete_train_y.npy'),
                       dtype=numpy.float32,
                       mode='c')

        num_examples = len(X) / (96.0 * 96.0 * 3.0)

        if stop is None:
            stop = num_examples
        if start is None:
            start = 0

        X = X.view()[start * 96 * 96 * 3:stop * 96 * 96 * 3]
        Y = Y.view()[start * len(keypoints_names) * 2:stop *
                     len(keypoints_names) * 2]
        X.shape = (stop - start, 96 * 96 * 3)
        #print 'shape of X', X.mean(axis = 1).shape
        Y.shape = (stop - start, len(keypoints_names) * 2)
        if hack is not None:
            Y = self.make_targets(Y, hack)
        else:
            Y = self.make_targets(Y, 'all')

        super(EmotiwKeypoints, self).__init__(
            X=X,
            y=Y,
            view_converter=DefaultViewConverter(shape=[96, 96, 3], axes=axes))
Exemple #11
0
    def __init__(self,
                 which_set,
                 which_data,
                 start=None,
                 stop=None,
                 preprocessor=None):
        assert which_set in ['train', 'test']
        assert which_data in ['melspectrum', 'specfeat']

        X = np.load(os.path.join(DATA_DIR, which_set + which_data + '.npy'))
        X = np.cast['float32'](X)
        # X needs to be 1D, shape info is stored in view_converter
        X = np.reshape(X, (X.shape[0], np.prod(X.shape[1:])))

        if which_set == 'test':
            # dummy targets
            y = np.zeros((X.shape[0], 2))
        else:
            y = np.load(os.path.join(DATA_DIR, 'targets.npy'))

        if start is not None:
            assert start >= 0
            assert stop > start
            assert stop <= X.shape[0]
            X = X[start:stop, :]
            y = y[start:stop]
            assert X.shape[0] == y.shape[0]

        if which_data == 'melspectrum':
            # 2D data with 1 channel
            view_converter = DefaultViewConverter((67, 40, 1))
        elif which_data == 'specfeat':
            # 24 channels with 1D data
            view_converter = DefaultViewConverter((67, 1, 24))

        super(Whales, self).__init__(X=X, y=y, view_converter=view_converter)

        assert not np.any(np.isnan(self.X))

        if preprocessor:
            preprocessor.apply(self)
Exemple #12
0
def make_viewer(mat,
                grid_shape=None,
                patch_shape=None,
                activation=None,
                pad=None,
                is_color=False,
                rescale=True):
    """
    .. todo::

        WRITEME properly

    Given filters in rows, guesses dimensions of patches
    and nice dimensions for the PatchViewer and returns a PatchViewer
    containing visualizations of the filters
    """

    num_channels = 1
    if is_color:
        num_channels = 3

    if grid_shape is None:
        grid_shape = PatchViewer.pick_shape(mat.shape[0])
    if mat.ndim > 2:
        patch_shape = mat.shape[1:3]
        topo_view = mat
        num_channels = mat.shape[3]
        is_color = num_channels > 1
    else:
        if patch_shape is None:
            assert mat.shape[1] % num_channels == 0
            patch_shape = PatchViewer.pick_shape(mat.shape[1] / num_channels,
                                                 exact=True)
            assert mat.shape[1] == (patch_shape[0] * patch_shape[1] *
                                    num_channels)
        topo_shape = (patch_shape[0], patch_shape[1], num_channels)
        view_converter = DefaultViewConverter(topo_shape)
        topo_view = view_converter.design_mat_to_topo_view(mat)
    rval = PatchViewer(grid_shape, patch_shape, pad=pad, is_color=is_color)
    for i in xrange(mat.shape[0]):
        if activation is not None:
            if hasattr(activation[0], '__iter__'):
                act = [a[i] for a in activation]
            else:
                act = activation[i]
        else:
            act = None

        patch = topo_view[i, :]

        rval.add_patch(patch, rescale=rescale, activation=act)
    return rval
Exemple #13
0
 def __init__(self,ds,ishape,numclass=-1,axes = ('b', 0, 1, 'c'),fit_preprocessor=True):
     X = ds[0]
     y = ds[1]        
     y_mat = y
     if numclass>0:
         y_mat=[]
         for yi in y:
             tmp = np.zeros(numclass)
             tmp[yi] = 1
             y_mat.append(tmp)
         y_mat = np.asarray(y_mat).astype('float32')
        
     view_converter = DefaultViewConverter(shape=ishape, axes=axes)
     super(DataPylearn2, self).__init__(X=X, y=y_mat, view_converter=view_converter)
Exemple #14
0
    def __init__(self, iterator, num_examples, image_shape):
        assert len(image_shape) == 2

        T = np.zeros((num_examples, image_shape[0], image_shape[1], 3),
                     dtype='float32')

        for i in xrange(num_examples):
            image_path = iterator.next()
            img = image.load(image_path)
            T[i, :] = make_letterboxed_thumbnail(img, image_shape)

        super(DARPA_ImageNet,
              self).__init__(topo_view=T,
                             view_converter=DefaultViewConverter(T.shape[1:]))
Exemple #15
0
    def __init__(self,
                 X=None,
                 topo_view=None,
                 y=None,
                 view_converter=None,
                 axes=('b', 0, 1, 'c'),
                 rng=_default_seed,
                 preprocessor=None,
                 fit_preprocessor=False,
                 X_labels=None,
                 y_labels=None,
                 block_length=1):

        assert block_length >= 1

        if block_length != 1:
            if y_labels == None:
                timeseries = np.reshape(
                    X[0:(X.shape[0] - X.shape[0] % block_length)],
                    (X[0:(X.shape[0] - X.shape[0] % block_length)].shape[0] /
                     block_length, -1))
            else:
                timeseries = np.reshape(
                    X[0:(X.shape[0] - X.shape[0] % block_length),
                      range(len(X[0]) - 1)],
                    (X[0:(X.shape[0] - X.shape[0] % block_length)].shape[0] /
                     block_length, -1))
                y = np.reshape(
                    X[0:(X.shape[0] - X.shape[0] % block_length), -1],
                    (X[0:(X.shape[0] - X.shape[0] % block_length)].shape[0] /
                     block_length, -1))
                y = y[:, 0].astype(int)

            #view_converter = DefaultViewConverter((1, timeseries.shape[1], 1))

            super(Timeseries,
                  self).__init__(timeseries, topo_view, y, view_converter,
                                 axes, rng, preprocessor, fit_preprocessor,
                                 X_labels, y_labels)
            self.shape = timeseries.shape
        else:
            view_converter = DefaultViewConverter((1, X.shape[1], 1))

            super(Timeseries,
                  self).__init__(X, topo_view, y, view_converter, axes, rng,
                                 preprocessor, fit_preprocessor, X_labels,
                                 y_labels)
            self.shape = X.shape
Exemple #16
0
    def __init__(self,
                 which_set,
                 base_path,
                 start=None,
                 stop=None,
                 preprocessor=None,
                 fit_preprocessor=False,
                 axes=('b', 0, 1, 'c'),
                 fit_test_preprocessor=False):

        print base_path, "?"

        self.test_args = locals()
        self.test_args['which_set'] = 'public_test'
        self.test_args['fit_preprocessor'] = fit_test_preprocessor
        del self.test_args['start']
        del self.test_args['stop']
        del self.test_args['self']

        if which_set == "train":
            X = np.load(base_path + "/Train_X.npy")
            y = np.load(base_path + "/Train_y.npy")
        elif which_set == "valid":
            X = np.load(base_path + "/Val_X.npy")
            y = np.load(base_path + "/Val_y.npy")
        else:
            raise ValueError("Unrecognized dataset name: " + which_set)

        if start is not None:
            assert isinstance(start, int)
            assert isinstance(stop, int)
            assert start >= 0
            assert start < stop
            assert stop <= X.shape[0]
            X = X[start:stop]
            y = y[start:stop]

        X = X.reshape((X.shape[0], 96 * 96 * 3))

        view_converter = DefaultViewConverter(shape=[96, 96, 3], axes=axes)

        super(AFEWDataset, self).__init__(X=X,
                                          y=y,
                                          view_converter=view_converter)

        if preprocessor:
            preprocessor.apply(self, can_fit=fit_preprocessor)
Exemple #17
0
    def __init__(self, npy_filename, which_set, one_hot, split):
        assert which_set in ['train', 'valid', 'test']

        self.one_hot = one_hot
        self.split = split

        # Load data from .npy file
        npy_filename_root = os.path.join(preprocess('${PYLEARN2_DATA_PATH}'),
                                         'icml07data', 'npy', npy_filename)

        x_file = npy_filename_root + '_inputs.npy'
        y_file = npy_filename_root + '_labels.npy'
        x_file = datasetCache.cache_file(x_file)
        y_file = datasetCache.cache_file(y_file)
        data_x = np.load(x_file, mmap_mode='r')
        data_y = np.load(y_file, mmap_mode='r')

        # some sanity checkes
        assert np.isfinite(data_x).all()
        assert np.isfinite(data_y).all()
        assert data_x.shape[0] == data_y.shape[0]

        # extract
        n_train, n_valid, n_test = split
        sets = {
            'train': (0, n_train),
            'valid': (n_train, n_train + n_valid),
            'test': (n_train + n_valid, n_train + n_valid + n_test)
        }
        start, end = sets[which_set]

        data_x = data_x[start:end]
        data_y = data_y[start:end]

        if one_hot:
            n_examples = data_y.shape[0]
            n_classes = data_y.max() + 1

            data_oh = np.zeros((n_examples, n_classes), dtype='float32')
            for i in xrange(data_y.shape[0]):
                data_oh[i, data_y[i]] = 1.
            data_y = data_oh

        view_converter = DefaultViewConverter((28, 28, 1))
        super(ICML07DataSet, self).__init__(X=data_x,
                                            y=data_y,
                                            view_converter=view_converter)
Exemple #18
0
def test_make_local_rfs():
    view_converter = DefaultViewConverter((10, 10, 3))
    test_dataset = DenseDesignMatrix(np.ones((10, 300)),
                                     view_converter=view_converter)
    matrixmul = make_local_rfs(test_dataset,
                               4, (5, 5), (5, 5),
                               draw_patches=True)
    W = matrixmul.get_params()[0].get_value()
    assert W.shape == (300, 4)
    np.testing.assert_allclose(W.sum(axis=0), 75 * np.ones(4))
    np.testing.assert_allclose(W.sum(axis=1), np.ones(300))

    matrixmul = make_local_rfs(test_dataset, 4, (5, 5), (5, 5))
    W = matrixmul.get_params()[0].get_value()
    assert W.shape == (300, 4)
    np.testing.assert_raises(ValueError, make_local_rfs, test_dataset, 2,
                             (5, 5), (5, 5))
def load_xy_data(npy_fn_x,
                 npy_fn_y,
                 start=0,
                 stop=None,
                 strip_dims=None,
                 reverse=False):
    """
    Load the data from `npy_fn_x` and `npy_fn_y`, pair them, and keep
    the rows from `start` (inclusive) to `stop` (exclusive).

    Parameters
    ----------
    npy_fn_x : str
    npy_fn_y : str
    start : int
    stop : int
        Useful for only using a part of the dataset. For data with a frame
        every 10 ms, 360000 frames would give 1 hour of data.
    strip_dims : int
        Only keep this many dimensions of each row (useful for stripping off
        deltas).
    reverse : bool
        If set, load the data by first treating `npy_fn_x` as input and
        `npy_fn_y` as output, and then the reverse.

    Return
    ------
    ddm : DenseDesignMatrix
    """

    X = np.load(npy_fn_x)
    X = X[start:stop, :strip_dims]

    Y = np.load(npy_fn_y)
    Y = Y[start:stop, :strip_dims]

    d_frame = X.shape[1]  # single frame dimension

    view_converter = DefaultViewConverter((d_frame, X.shape[1] / d_frame, 1))

    if not reverse:
        return DenseDesignMatrix(X=X, y=Y, view_converter=view_converter)
    else:
        return DenseDesignMatrix(X=np.vstack([X, Y]), y=np.vstack([Y, X]))
Exemple #20
0
    def set_topological_view(self, topo_view, axes=('b', 0, 1, 'c')):
        '''
        Sets the dataset to represent topo_view, where topo_view is a batch
        of topological views of examples.

        Parameters
        ----------
        topo_view : ndarray
            An array containing a design matrix representation of training
            examples.
        '''

        assert not np.any(np.isnan(topo_view))
        frames = topo_view.shape[axes.index(
            'b')]  # pretend frames come in as batch dim
        rows = topo_view.shape[axes.index(0)]
        cols = topo_view.shape[axes.index(1)]
        channels = topo_view.shape[axes.index('c')]

        # leave out frames...
        self.view_converter = DefaultViewConverter([rows, cols, channels],
                                                   axes=axes)

        self.X = self.view_converter.topo_view_to_design_mat(topo_view)
        # self.X_topo_space stores a "default" topological space that
        # will be used only when self.iterator is called without a
        # data_specs, and with "topo=True", which is deprecated.
        self.X_topo_space = self.view_converter.topo_space
        assert not np.any(np.isnan(self.X))

        # Update data specs
        X_space = VectorSpace(dim=frames * rows * cols * channels)
        X_source = 'features'

        assert self.y is None, 'y not supported now'
        space = X_space
        source = X_source

        self.data_specs = (space, source)
        self.X_space = X_space
        self._iter_data_specs = (X_space, X_source)
def test_split_nfold_datasets():
    #Load and create ddm from cifar100
    path = "/data/lisa/data/cifar100/cifar-100-python/train"
    obj = serial.load(path)
    X = obj['data']

    assert X.max() == 255.
    assert X.min() == 0.

    X = np.cast['float32'](X)
    y = None  #not implemented yet

    view_converter = DefaultViewConverter((32, 32, 3))

    ddm = DenseDesignMatrix(X=X, y=y, view_converter=view_converter)

    assert not np.any(np.isnan(ddm.X))
    ddm.y_fine = np.asarray(obj['fine_labels'])
    ddm.y_coarse = np.asarray(obj['coarse_labels'])
    folds = ddm.split_dataset_nfolds(10)
    assert folds[0].shape[0] == np.ceil(ddm.num_examples / 10)
Exemple #22
0
    def __init__(
            self,
            which_set,
            numclass,
            base_path='/data/vision/billf/manifold-learning/DL/Data/icml_2013_emotions',
            start=0,
            stop=-1,
            preprocessor=None,
            trainindex=0,
            ishape=None,
            fit_preprocessor=False,
            axes=('b', 0, 1, 'c'),
            fit_test_preprocessor=False,
            flip=0):
        files = {'train': 'occ_train.csv', 'public_test': 'test.csv'}
        try:
            filename = files[which_set]
        except KeyError:
            raise ValueError("Unrecognized dataset name: " + which_set)

        X, y = self.loadFile(base_path + '/' + filename, start, stop,
                             trainindex)
        # train_index
        if flip:
            X_list_flipLR, X_list_flipUD = self.flipData(X)
            X = X + X_list_flipLR
            y = y + y

        view_converter = DefaultViewConverter(shape=np.append(
            ishape.shape, ishape.num_channels),
                                              axes=axes)
        super(Occ, self).__init__(X=X,
                                  y=self.label_id2arr(y, numclass),
                                  view_converter=view_converter)

        if preprocessor:
            preprocessor.apply(self, can_fit=fit_preprocessor)
Exemple #23
0
    def __init__(self,
                 path='train.mat',
                 start=None,
                 stop=None,
                 center=False,
                 rescale=False,
                 axes=('b', 0, 1, 'c'),
                 channels=4):

        self.__dict__.update(locals())
        del self.self

        self.filters = tables.Filters(complib='blosc', complevel=5)

        self.view_converter = None

        self.path = preprocess(self.path)

        X, y = self._load_data()

        self.windowSize = np.uint8(np.sqrt(X.shape[1] / 4))

        if center and rescale:
            X[:] -= 127.5
            X[:] /= 127.5
        elif center:
            X[:] -= 127.5
        elif rescale:
            X[:] /= 255.

        view_converter = DefaultViewConverter((61, 61, 4), axes)

        super(MATDATAPyTables, self).__init__(X=X,
                                              y=y,
                                              view_converter=view_converter)

        self.h5file.flush()
def load_data(npy_fn, start=0, stop=None, strip_dims=None, stack_n_frames=1):
    """
    Load the data from `npy_fn` and keep the rows from `start` (inclusive) to
    `stop` (exclusive).

    Parameters
    ----------
    npy_fn : str
    start : int
    stop : int
        Useful for only using a part of the dataset. For data with a frame
        every 10 ms, 360000 frames would give 1 hour of data.
    strip_dims : int
        Only keep this many dimensions of each row (useful for stripping off
        deltas).
    stack_n_frames : None
        If given, treat this many frames as a window and sweep the window
        across the data (1-frame shift).

    Return
    ------
    ddm : DenseDesignMatrix
    """

    X = np.load(npy_fn)
    X = X[start:stop, :strip_dims]

    d_frame = X.shape[1]  # single frame dimension

    # Stack frames
    if stack_n_frames != 1:
        X = stack_overlapping_vectors(X, stack_n_frames, n_rate=1)

    view_converter = DefaultViewConverter((d_frame, X.shape[1] / d_frame, 1))

    return DenseDesignMatrix(X=X, view_converter=view_converter)
Exemple #25
0
    def __init__(self,
                 which_set,
                 center=False,
                 gcn=None,
                 toronto_prepro=False,
                 axes=('b', 0, 1, 'c'),
                 start=None,
                 stop=None,
                 one_hot=False):
        assert which_set in ['train', 'test']

        path = "${PYLEARN2_DATA_PATH}/cifar100/cifar-100-python/" + which_set

        obj = serial.load(path)
        X = obj['data']

        assert X.max() == 255.
        assert X.min() == 0.

        X = np.cast['float32'](X)
        y = np.asarray(obj['fine_labels'])

        self.center = center

        self.one_hot = one_hot
        if one_hot:
            one_hot = np.zeros((y.shape[0], 100), dtype='float32')
            for i in xrange(y.shape[0]):
                one_hot[i, y[i]] = 1.
            y = one_hot

        if center:
            X -= 127.5

        if toronto_prepro:
            assert not center
            assert not gcn
            if which_set == 'test':
                raise NotImplementedError("Need to subtract the mean of the "
                                          "*training* set.")
            X = X / 255.
            X = X - X.mean(axis=0)
        self.toronto_prepro = toronto_prepro

        self.gcn = gcn
        if gcn is not None:
            assert isinstance(gcn, float)
            X = (X.T - X.mean(axis=1)).T
            X = (X.T / np.sqrt(np.square(X).sum(axis=1))).T
            X *= gcn

        if start is not None:
            # This needs to come after the prepro so that it doesn't change
            # the pixel means computed above
            assert start >= 0
            assert stop > start
            assert stop <= X.shape[0]
            X = X[start:stop, :]
            y = y[start:stop]
            assert X.shape[0] == y.shape[0]

        self.axes = axes
        view_converter = DefaultViewConverter((32, 32, 3), axes)

        super(CIFAR100, self).__init__(X=X, y=y, view_converter=view_converter)

        assert not N.any(N.isnan(self.X))
Exemple #26
0
        feat = H * Mu1
    elif feature_type == 'exp_h':
        feat = H
    elif feature_type == 'map_hs':
        feat = (H > 0.5) * Mu1
    else:
        assert False

    print 'compiling theano function'
    f = function([V], feat)

    print 'running theano function'
    feat = f(X2)

    feat_dataset = DenseDesignMatrix(X=feat,
                                     view_converter=DefaultViewConverter(
                                         [1, 1, feat.shape[1]]))

    print 'reassembling features'
    ns = 32 - size + 1
    depatchifier = ReassembleGridPatches(orig_shape=(ns, ns),
                                         patch_shape=(1, 1))
    feat_dataset.apply_preprocessor(depatchifier)

    print 'making topological view'
    topo_feat = feat_dataset.get_topological_view()
    assert topo_feat.shape[0] == X.shape[0]

    print 'assembling visualizer'

    n = np.ceil(np.sqrt(model.nhid))
Exemple #27
0
def test_init_with_vc():
    rng = np.random.RandomState([4, 5, 6])
    d = DenseDesignMatrix(
        X=rng.randn(12, 5),
        view_converter=DefaultViewConverter([1, 2, 3]))
Exemple #28
0
import sys
from pylearn2.utils import serial
from pylearn2.datasets.preprocessing import ZCA
from pylearn2.datasets.dense_design_matrix import DenseDesignMatrix, DefaultViewConverter
from pylearn2.gui.patch_viewer import PatchViewer
import numpy as np

path = sys.argv[1]

prepro = serial.load(path)

zca = prepro.items[-1]

assert isinstance(zca, ZCA)

W = zca.P_

assert W.shape[1] % 3 == 0
n = int(np.sqrt(W.shape[1] / 3))

d = DenseDesignMatrix(X=W, view_converter=DefaultViewConverter((n, n, 3)))

W = d.get_weights_view(W)

pv = PatchViewer(grid_shape=(n * 3, n), patch_shape=(n, n), is_color=True)

for i in xrange(n * n * 3):
    pv.add_patch(W[i, ...], rescale=True)

pv.show()
Exemple #29
0
    def __call__(self, full_X):

        feature_type = self.feature_type
        pooling_region_counts = self.pooling_region_counts
        model = self.model
        size = self.size

        nan = 0

        full_X = full_X.reshape(1, full_X.shape[0], full_X.shape[1],
                                full_X.shape[2])

        if full_X.shape[3] == 1:
            full_X = np.concatenate((full_X, full_X, full_X), axis=3)

        print 'full_X.shape: ' + str(full_X.shape)

        num_examples = full_X.shape[0]
        assert num_examples == 1

        pipeline = self.preprocessor

        def average_pool(stride):
            def point(p):
                return p * ns / stride

            rval = np.zeros(
                (topo_feat.shape[0], stride, stride, topo_feat.shape[3]),
                dtype='float32')

            for i in xrange(stride):
                for j in xrange(stride):
                    rval[:, i, j, :] = self.region_features(
                        topo_feat[:,
                                  point(i):point(i + 1),
                                  point(j):point(j + 1), :])

            return rval

        outputs = [
            np.zeros((num_examples, count, count, model.nhid), dtype='float32')
            for count in pooling_region_counts
        ]

        assert len(outputs) > 0

        fd = DenseDesignMatrix(X=np.zeros((1, 1), dtype='float32'),
                               view_converter=DefaultViewConverter(
                                   [1, 1, model.nhid]))

        ns = 32 - size + 1
        depatchifier = ReassembleGridPatches(orig_shape=(ns, ns),
                                             patch_shape=(1, 1))

        batch_size = 1

        for i in xrange(0, num_examples - batch_size + 1, batch_size):
            print i
            t1 = time.time()

            d = DenseDesignMatrix(
                topo_view=np.cast['float32'](full_X[i:i + batch_size, :]),
                view_converter=DefaultViewConverter((32, 32, 3)))

            t2 = time.time()

            #print '\tapplying preprocessor'
            d.apply_preprocessor(pipeline, can_fit=False)
            X2 = d.get_design_matrix()

            t3 = time.time()

            #print '\trunning theano function'
            feat = self.f(X2)

            t4 = time.time()

            assert feat.dtype == 'float32'

            feat_dataset = copy.copy(fd)

            if np.any(np.isnan(feat)):
                nan += np.isnan(feat).sum()
                feat[np.isnan(feat)] = 0

            feat_dataset.set_design_matrix(feat)

            #print '\treassembling features'
            feat_dataset.apply_preprocessor(depatchifier)

            #print '\tmaking topological view'
            topo_feat = feat_dataset.get_topological_view()
            assert topo_feat.shape[0] == batch_size

            t5 = time.time()

            #average pooling
            for output, count in zip(outputs, pooling_region_counts):
                output[i:i + batch_size, ...] = average_pool(count)

            t6 = time.time()

            print(t6 - t1, t2 - t1, t3 - t2, t4 - t3, t5 - t4, t6 - t5)

        return outputs[0]
Exemple #30
0
    def _execute(self):

        global pooling_matrix
        save_path = self.save_path
        batch_size = self.batch_size
        feature_type = self.feature_type
        dataset_family = self.dataset_family
        which_set = self.which_set
        model = self.model
        size = self.size

        nan = 0

        dataset_descriptor = dataset_family[which_set][size]

        dataset = dataset_descriptor.dataset_maker()
        expected_num_examples = dataset_descriptor.num_examples

        full_X = dataset.get_design_matrix()
        num_examples = full_X.shape[0]
        assert num_examples == expected_num_examples

        if self.restrict is not None:
            assert self.restrict[1] <= full_X.shape[0]

            print 'restricting to examples ', self.restrict[
                0], ' through ', self.restrict[1], ' exclusive'
            full_X = full_X[self.restrict[0]:self.restrict[1], :]

            assert self.restrict[1] > self.restrict[0]

        #update for after restriction
        num_examples = full_X.shape[0]

        assert num_examples > 0

        dataset.X = None
        dataset.design_loc = None
        dataset.compress = False

        patchifier = ExtractGridPatches(patch_shape=(size, size),
                                        patch_stride=(1, 1))

        pipeline = serial.load(dataset_descriptor.pipeline_path)

        assert isinstance(pipeline.items[0], ExtractPatches)
        pipeline.items[0] = patchifier

        print 'defining features'
        V = T.matrix('V')
        model.make_pseudoparams()
        d = model.e_step.variational_inference(V=V)

        H = d['H_hat']
        Mu1 = d['S_hat']

        assert H.dtype == 'float32'
        assert Mu1.dtype == 'float32'

        if self.feature_type == 'map_hs':
            feat = (H > 0.5) * Mu1
        elif self.feature_type == 'map_h':
            feat = T.cast(H > 0.5, dtype='float32')
        elif self.feature_type == 'exp_hs':
            feat = H * Mu1
        elif self.feature_type == 'exp_h':
            feat = H
        elif self.feature_type == 'exp_h_thresh':
            feat = H * (H > .01)
        else:
            raise NotImplementedError()

        assert feat.dtype == 'float32'
        print 'compiling theano function'
        f = function([V], feat)

        if config.device.startswith('gpu') and model.nhid >= 4000:
            f = halver(f, model.nhid)

        topo_feat_var = T.TensorType(broadcastable=(False, False, False,
                                                    False),
                                     dtype='float32')()
        region_features = function([topo_feat_var],
                                   topo_feat_var.mean(axis=(1, 2)))

        def average_pool(stride):
            def point(p):
                return p * ns / stride

            rval = np.zeros(
                (topo_feat.shape[0], stride, stride, topo_feat.shape[3]),
                dtype='float32')

            for i in xrange(stride):
                for j in xrange(stride):
                    rval[:, i, j, :] = region_features(
                        topo_feat[:,
                                  point(i):point(i + 1),
                                  point(j):point(j + 1), :])

            return rval

        num_superpixels = 7
        output = np.zeros((num_examples, pooling_matrix.shape[0]),
                          dtype='float32')

        fd = DenseDesignMatrix(X=np.zeros((1, 1), dtype='float32'),
                               view_converter=DefaultViewConverter(
                                   [1, 1, model.nhid]))

        ns = 32 - size + 1
        depatchifier = ReassembleGridPatches(orig_shape=(ns, ns),
                                             patch_shape=(1, 1))

        if len(range(0, num_examples - batch_size + 1, batch_size)) <= 0:
            print num_examples
            print batch_size

        for i in xrange(0, num_examples - batch_size + 1, batch_size):
            print i
            t1 = time.time()

            d = copy.copy(dataset)
            d.set_design_matrix(full_X[i:i + batch_size, :])

            t2 = time.time()

            #print '\tapplying preprocessor'
            d.apply_preprocessor(pipeline, can_fit=False)
            X2 = d.get_design_matrix()

            t3 = time.time()

            #print '\trunning theano function'
            feat = f(X2)

            t4 = time.time()

            assert feat.dtype == 'float32'

            feat_dataset = copy.copy(fd)

            if np.any(np.isnan(feat)):
                nan += np.isnan(feat).sum()
                feat[np.isnan(feat)] = 0

            feat_dataset.set_design_matrix(feat)

            #print '\treassembling features'
            feat_dataset.apply_preprocessor(depatchifier)

            #print '\tmaking topological view'
            topo_feat = feat_dataset.get_topological_view()
            assert topo_feat.shape[0] == batch_size

            t5 = time.time()

            #average pooling
            superpixels = average_pool(num_superpixels)

            pooled = pooling_matrix.dot(superpixels.T).T

            output[i:i + batch_size, :] = pooled

            t6 = time.time()

            print(t6 - t1, t2 - t1, t3 - t2, t4 - t3, t5 - t4, t6 - t5)

        if self.chunk_size is not None:
            assert save_path.endswith('.npy')
            save_path_pieces = save_path.split('.npy')
            assert len(save_path_pieces) == 2
            assert save_path_pieces[1] == ''
            save_path = save_path_pieces[0] + '_' + chr(
                ord('A') + self.chunk_id) + '.npy'
        np.save(save_path, output)

        if nan > 0:
            warnings.warn(str(nan) + ' features were nan')