def load(start, stop, datadir='data/CK'): im_list = glob.glob(os.path.join(datadir, 'faces_aligned/*.png'))[start:] if not im_list: msg = ('No image files found in: %s' % os.path.realpath(os.path.join(datadir, 'faces_aligned'))) log.error(msg) raise RuntimeException(msg) X = [] y = [] more_to_read = stop - start for im_file in im_list: if more_to_read <= 0: break label_base_pat = os.path.basename(im_file)[:9] + '*_emotion.txt' maybe_label_file = glob.glob( os.path.join(datadir, 'labels', label_base_pat)) if maybe_label_file: y.append(read_label(maybe_label_file[0])) imdata = imread(im_file, False) imdata = cv2.resize(imdata, (32, 32)) imdata = imdata.flatten().astype(np.float32) / 255 X.append(imdata) more_to_read -= 1 return DenseDesignMatrix(X=np.asarray(X), y=np.asarray(y).reshape(-1, 1), view_converter=DefaultViewConverter( (32, 32, 1), axes=('b', 0, 1, 'c')))
def get_features(path, split, standardize): if path.find(',') != -1: paths = path.split(',') Xs = [get_features(subpath, split, standardize) for subpath in paths] X = np.concatenate(Xs, axis=1) return X if path.endswith('.npy'): topo_view = np.load(path) else: topo_view = serial.load(path) if str(type(topo_view)).find('h5py') != -1: name, = topo_view.keys() topo_view = topo_view[name].value.T if len(topo_view.shape) == 2: X = topo_view else: view_converter = DefaultViewConverter(topo_view.shape[1:]) print 'converting data' X = view_converter.topo_view_to_design_mat(topo_view) if split: X = np.concatenate((np.abs(X), np.abs(-X)), axis=1) if standardize: assert False #bug: if X is test set, we need to subtract train mean, divide by train std X -= X.mean(axis=0) X /= np.sqrt(.01 + np.var(X, axis=0)) return X
def _transform_multi_channel_data(self, X, y): # Data partitioning parted_X, parted_y = self._partition_data( X=X, y=y, partition_size=self.window_size) transposed_X = np.transpose(parted_X, [0, 2, 1]) converted_X = np.reshape(transposed_X, (transposed_X.shape[0], transposed_X.shape[1], 1, transposed_X.shape[2])) # Create view converter view_converter = DefaultViewConverter(shape=self.sample_shape, axes=('b', 0, 1, 'c')) # Convert data into a design matrix view_converted_X = view_converter.topo_view_to_design_mat(converted_X) assert np.all(converted_X == view_converter.design_mat_to_topo_view( view_converted_X)) # Format the target into proper format sum_y = np.sum(parted_y, axis=1) sum_y[sum_y > 0] = 1 one_hot_formatter = OneHotFormatter(max_labels=self.n_classes) hot_y = one_hot_formatter.format(sum_y) return view_converted_X, hot_y, view_converter
def __init__(self, config, which_set='train'): #, standardize=True, pca_whitening=False, ncomponents=None, epsilon=3): keys = ['train', 'test', 'valid'] assert which_set in keys # load hdf5 metadata self.hdf5 = tables.open_file( config['hdf5'], mode='r') data = self.hdf5.get_node('/', 'Data') param = self.hdf5.get_node('/', 'Param') self.file_index = param.file_index[0] self.file_dict = param.file_dict[0] self.label_list = param.label_list[0] self.targets = param.targets[0] self.nfft = param.fft[0]['nfft'] # load parition information self.support = config[which_set] self.file_list = config[which_set+'_files'] self.mean = config['mean'] self.mean = self.mean.reshape((np.prod(self.mean.shape),)) self.var = config['var'] self.var = self.var.reshape((np.prod(self.var.shape),)) self.istd = np.reciprocal(np.sqrt(self.var)) self.mask = (self.istd < 20) self.tframes = config['tframes'] if self.tframes > 1: view_converter = DefaultViewConverter((self.tframes, len(self.mean)/self.tframes, 1)) super(AudioDataset, self).__init__(X=data.X, y=data.y, view_converter=view_converter) else: super(AudioDataset, self).__init__(X=data.X, y=data.y)
def set_topological_view(self, V, axes=('b', 0, 1, 'c'), start=0): """ Sets the dataset to represent V, where V is a batch of topological views of examples. Parameters ---------- V : ndarray An array containing a design matrix representation of training examples. If unspecified, the entire dataset (`self.X`) is used instead. TODO: why is this parameter named 'V'? """ assert not numpy.any(numpy.isnan(V)) rows = V.shape[axes.index(0)] cols = V.shape[axes.index(1)] channels = V.shape[axes.index('c')] self.view_converter = DefaultViewConverter([rows, cols, channels], axes=axes) X = self.view_converter.topo_view_to_design_mat(V) assert not numpy.any(numpy.isnan(X)) FaceBBoxDDMPytables.fill_hdf5(h5file=self.h5file, data_x=X, start=start)
def __init__(self, axes=('c', 0, 1, 'b')): assert_contains([('c', 0, 1, 'b'), ('b', 0, 1, 'c')], axes) axes = list(axes) vc = DefaultViewConverter((5, 5, 2), axes=axes) rng = numpy.random.RandomState([2013, 3, 12]) X = rng.normal(size=(4, 50)).astype('float32') super(DummyDataset, self).__init__(X=X, view_converter=vc, axes=axes)
def __init__(self, which_set, base_path = '${PYLEARN2_DATA_PATH}/icml_2013_emotions', start = None, stop = None, preprocessor = None, fit_preprocessor = False, axes = ('b', 0, 1, 'c'), fit_test_preprocessor = False): """ which_set: A string specifying which portion of the dataset to load. Valid values are 'train' or 'public_test' base_path: The directory containing the .csv files from kaggle.com. This directory should be writable; if the .csv files haven't already been converted to npy, this class will convert them to save memory the next time they are loaded. fit_preprocessor: True if the preprocessor is allowed to fit the data. fit_test_preprocessor: If we construct a test set based on this dataset, should it be allowed to fit the test set? """ self.test_args = locals() self.test_args['which_set'] = 'public_test' self.test_args['fit_preprocessor'] = fit_test_preprocessor del self.test_args['start'] del self.test_args['stop'] del self.test_args['self'] files = {'train': 'train.csv', 'public_test' : 'test.csv'} try: filename = files[which_set] except KeyError: raise ValueError("Unrecognized dataset name: " + which_set) path = base_path + '/' + filename path = preprocess(path) X, y = self._load_data(path, which_set == 'train') if start is not None: assert which_set != 'test' assert isinstance(start, int) assert isinstance(stop, int) assert start >= 0 assert start < stop assert stop <= X.shape[0] X = X[start:stop, :] if y is not None: y = y[start:stop, :] view_converter = DefaultViewConverter(shape=[48,48,1], axes=axes) super(EmotionsDataset, self).__init__(X=X, y=y, view_converter=view_converter) if preprocessor: preprocessor.apply(self, can_fit=fit_preprocessor)
def set_topological_view(self, V, axes=('b', 0, 1, 'c')): """ Sets the dataset to represent V, where V is a batch of topological views of examples. .. todo:: Why is this parameter named 'V'? Parameters ---------- V : ndarray An array containing a design matrix representation of training examples. axes : WRITEME """ assert not contains_nan(V) rows = V.shape[axes.index(0)] cols = V.shape[axes.index(1)] channels = V.shape[axes.index('c')] self.view_converter = DefaultViewConverter([rows, cols, channels], axes=axes) self.X = self.view_converter.topo_view_to_design_mat(V) # self.X_topo_space stores a "default" topological space that # will be used only when self.iterator is called without a # data_specs, and with "topo=True", which is deprecated. self.X_topo_space = self.view_converter.topo_space assert not contains_nan(self.X) # Update data specs X_space = VectorSpace(dim=self.X.shape[1]) X_source = 'features' if self.y is None: space = X_space source = X_source else: if self.y.ndim == 1: dim = 1 else: dim = self.y.shape[-1] # This is to support old pickled models if getattr(self, 'y_labels', None) is not None: y_space = IndexSpace(dim=dim, max_labels=self.y_labels) elif getattr(self, 'max_labels', None) is not None: y_space = IndexSpace(dim=dim, max_labels=self.max_labels) else: y_space = VectorSpace(dim=dim) y_source = 'targets' Latent_space = VectorSpace(dim=self.latent.shape[-1]) Latent_source = 'latents' space = CompositeSpace((X_space, y_space,Latent_space)) source = (X_source, y_source,Latent_source) self.data_specs = (space, source) self.X_space = X_space self._iter_data_specs = (X_space, X_source)
def __init__(self, which_set='full', path='train.mat', one_hot=False, colorspace='none', step=1, start=None, stop=None, center=False, rescale=False, gcn=None, toronto_prepro=False, axes=('b', 0, 1, 'c')): self.__dict__.update(locals()) del self.self # #self.one_hot = one_hot #self.colorspace = colorspace #self.step=step #self.which_set=which_set self.view_converter = None self.path = preprocess(self.path) X, y = self._load_data() if center: X -= 127.5 #self.center = center if rescale: X /= 127.5 #self.rescale = rescale if toronto_prepro: assert not center assert not gcn X = X / 255. if which_set == 'test': other = MATDATA(which_set='train') oX = other.X oX /= 255. X = X - oX.mean(axis=0) else: X = X - X.mean(axis=0) #self.toronto_prepro = toronto_prepro #self.gcn = gcn if gcn is not None: gcn = float(gcn) X = global_contrast_normalize(X, scale=gcn, min_divisor=1e-8) view_converter = DefaultViewConverter( (self.windowSize, self.windowSize, self.channels), axes) super(MATDATA, self).__init__(X=X, y=y, view_converter=view_converter)
def __init__(self, start=None, stop=None, axes=('b', 0, 1, 'c'), stdev=0.8, hack=None, preproc='GCN'): # self.translation_dict = OrderedDict({1: 'left_eyebrow_inner_end', 2: 'mouth_top_lip_bottom', 3: 'right_ear_canal', 4: 'right_ear_top', 5: 'mouth_top_lip', 6: 'mouth_bottom_lip_top', 7: 'right_eyebrow_center', 8: 'chin_left', 9: 'nose_tip', 10: 'left_eyebrow_center_top', 11: 'left_eye_outer_corner', 12: 'right_ear', 13: 'mouth_bottom_lip', 14: 'left_eye_center', 15: 'left_mouth_outer_corner', 16: 'left_eye_center_top', 17: 'left_ear_center', 18: 'nostrils_center', 19: 'right_eye_outer_corner', 20: 'right_eye_center_bottom', 21: 'chin_center', 22: 'left_eye_inner_corner', 23: 'right_mouth_outer_corner', 24: 'left_ear_bottom', 25: 'right_eye_center_top', 26: 'right_eyebrow_inner_end', 27: 'left_eyebrow_outer_end', 28: 'left_ear_top', 29: 'right_ear_center', 30: 'nose_center_top', 31: 'face_center', 32: 'right_eye_inner_corner', 33: 'right_eyebrow_center_top', 34: 'left_eyebrow_center', 35: 'right_eye_pupil', 36: 'right_ear_bottom', 37: 'mouth_left_corner', 38: 'left_eye_center_bottom', 39: 'left_eyebrow_center_bottom', 41: 'mouth_right_corner', 42: 'right_nostril', 43: 'right_eye_center', 44: 'chin_right', 45: 'right_eyebrow_outer_end', 46: 'left_eye_pupil', 47: 'mouth_center', 48: 'left_nostril', 49: 'right_eyebrow_center_bottom', 50: 'left_ear_canal', 51: 'left_ear', 52: 'face_right', 53: 'face_left'}) self.name = hack self.stdev = stdev self.axes = axes self.pixels = numpy.arange(0, 96).reshape((1, 96)) for i in xrange(len(keypoints_names) * 2 - 1): self.pixels = numpy.vstack( (self.pixels, numpy.arange(0, 96).reshape((1, 96)))) #self.which_set = which_set if hack is not None: X = LazyMemmap(preprocess('/Tmp/aggarwal/EmotiW_' + preproc + '_' + hack + '.npy'), dtype='float32', mode='c') else: X = LazyMemmap(preprocess( '${PYLEARN2_DATA_PATH}/faces/hdf5/complete_train_x.npy'), dtype='uint8', mode='c') Y = LazyMemmap(preprocess( '${PYLEARN2_DATA_PATH}/faces/hdf5/complete_train_y.npy'), dtype=numpy.float32, mode='c') num_examples = len(X) / (96.0 * 96.0 * 3.0) if stop is None: stop = num_examples if start is None: start = 0 X = X.view()[start * 96 * 96 * 3:stop * 96 * 96 * 3] Y = Y.view()[start * len(keypoints_names) * 2:stop * len(keypoints_names) * 2] X.shape = (stop - start, 96 * 96 * 3) #print 'shape of X', X.mean(axis = 1).shape Y.shape = (stop - start, len(keypoints_names) * 2) if hack is not None: Y = self.make_targets(Y, hack) else: Y = self.make_targets(Y, 'all') super(EmotiwKeypoints, self).__init__( X=X, y=Y, view_converter=DefaultViewConverter(shape=[96, 96, 3], axes=axes))
def __init__(self, which_set, which_data, start=None, stop=None, preprocessor=None): assert which_set in ['train', 'test'] assert which_data in ['melspectrum', 'specfeat'] X = np.load(os.path.join(DATA_DIR, which_set + which_data + '.npy')) X = np.cast['float32'](X) # X needs to be 1D, shape info is stored in view_converter X = np.reshape(X, (X.shape[0], np.prod(X.shape[1:]))) if which_set == 'test': # dummy targets y = np.zeros((X.shape[0], 2)) else: y = np.load(os.path.join(DATA_DIR, 'targets.npy')) if start is not None: assert start >= 0 assert stop > start assert stop <= X.shape[0] X = X[start:stop, :] y = y[start:stop] assert X.shape[0] == y.shape[0] if which_data == 'melspectrum': # 2D data with 1 channel view_converter = DefaultViewConverter((67, 40, 1)) elif which_data == 'specfeat': # 24 channels with 1D data view_converter = DefaultViewConverter((67, 1, 24)) super(Whales, self).__init__(X=X, y=y, view_converter=view_converter) assert not np.any(np.isnan(self.X)) if preprocessor: preprocessor.apply(self)
def make_viewer(mat, grid_shape=None, patch_shape=None, activation=None, pad=None, is_color=False, rescale=True): """ .. todo:: WRITEME properly Given filters in rows, guesses dimensions of patches and nice dimensions for the PatchViewer and returns a PatchViewer containing visualizations of the filters """ num_channels = 1 if is_color: num_channels = 3 if grid_shape is None: grid_shape = PatchViewer.pick_shape(mat.shape[0]) if mat.ndim > 2: patch_shape = mat.shape[1:3] topo_view = mat num_channels = mat.shape[3] is_color = num_channels > 1 else: if patch_shape is None: assert mat.shape[1] % num_channels == 0 patch_shape = PatchViewer.pick_shape(mat.shape[1] / num_channels, exact=True) assert mat.shape[1] == (patch_shape[0] * patch_shape[1] * num_channels) topo_shape = (patch_shape[0], patch_shape[1], num_channels) view_converter = DefaultViewConverter(topo_shape) topo_view = view_converter.design_mat_to_topo_view(mat) rval = PatchViewer(grid_shape, patch_shape, pad=pad, is_color=is_color) for i in xrange(mat.shape[0]): if activation is not None: if hasattr(activation[0], '__iter__'): act = [a[i] for a in activation] else: act = activation[i] else: act = None patch = topo_view[i, :] rval.add_patch(patch, rescale=rescale, activation=act) return rval
def __init__(self,ds,ishape,numclass=-1,axes = ('b', 0, 1, 'c'),fit_preprocessor=True): X = ds[0] y = ds[1] y_mat = y if numclass>0: y_mat=[] for yi in y: tmp = np.zeros(numclass) tmp[yi] = 1 y_mat.append(tmp) y_mat = np.asarray(y_mat).astype('float32') view_converter = DefaultViewConverter(shape=ishape, axes=axes) super(DataPylearn2, self).__init__(X=X, y=y_mat, view_converter=view_converter)
def __init__(self, iterator, num_examples, image_shape): assert len(image_shape) == 2 T = np.zeros((num_examples, image_shape[0], image_shape[1], 3), dtype='float32') for i in xrange(num_examples): image_path = iterator.next() img = image.load(image_path) T[i, :] = make_letterboxed_thumbnail(img, image_shape) super(DARPA_ImageNet, self).__init__(topo_view=T, view_converter=DefaultViewConverter(T.shape[1:]))
def __init__(self, X=None, topo_view=None, y=None, view_converter=None, axes=('b', 0, 1, 'c'), rng=_default_seed, preprocessor=None, fit_preprocessor=False, X_labels=None, y_labels=None, block_length=1): assert block_length >= 1 if block_length != 1: if y_labels == None: timeseries = np.reshape( X[0:(X.shape[0] - X.shape[0] % block_length)], (X[0:(X.shape[0] - X.shape[0] % block_length)].shape[0] / block_length, -1)) else: timeseries = np.reshape( X[0:(X.shape[0] - X.shape[0] % block_length), range(len(X[0]) - 1)], (X[0:(X.shape[0] - X.shape[0] % block_length)].shape[0] / block_length, -1)) y = np.reshape( X[0:(X.shape[0] - X.shape[0] % block_length), -1], (X[0:(X.shape[0] - X.shape[0] % block_length)].shape[0] / block_length, -1)) y = y[:, 0].astype(int) #view_converter = DefaultViewConverter((1, timeseries.shape[1], 1)) super(Timeseries, self).__init__(timeseries, topo_view, y, view_converter, axes, rng, preprocessor, fit_preprocessor, X_labels, y_labels) self.shape = timeseries.shape else: view_converter = DefaultViewConverter((1, X.shape[1], 1)) super(Timeseries, self).__init__(X, topo_view, y, view_converter, axes, rng, preprocessor, fit_preprocessor, X_labels, y_labels) self.shape = X.shape
def __init__(self, which_set, base_path, start=None, stop=None, preprocessor=None, fit_preprocessor=False, axes=('b', 0, 1, 'c'), fit_test_preprocessor=False): print base_path, "?" self.test_args = locals() self.test_args['which_set'] = 'public_test' self.test_args['fit_preprocessor'] = fit_test_preprocessor del self.test_args['start'] del self.test_args['stop'] del self.test_args['self'] if which_set == "train": X = np.load(base_path + "/Train_X.npy") y = np.load(base_path + "/Train_y.npy") elif which_set == "valid": X = np.load(base_path + "/Val_X.npy") y = np.load(base_path + "/Val_y.npy") else: raise ValueError("Unrecognized dataset name: " + which_set) if start is not None: assert isinstance(start, int) assert isinstance(stop, int) assert start >= 0 assert start < stop assert stop <= X.shape[0] X = X[start:stop] y = y[start:stop] X = X.reshape((X.shape[0], 96 * 96 * 3)) view_converter = DefaultViewConverter(shape=[96, 96, 3], axes=axes) super(AFEWDataset, self).__init__(X=X, y=y, view_converter=view_converter) if preprocessor: preprocessor.apply(self, can_fit=fit_preprocessor)
def __init__(self, npy_filename, which_set, one_hot, split): assert which_set in ['train', 'valid', 'test'] self.one_hot = one_hot self.split = split # Load data from .npy file npy_filename_root = os.path.join(preprocess('${PYLEARN2_DATA_PATH}'), 'icml07data', 'npy', npy_filename) x_file = npy_filename_root + '_inputs.npy' y_file = npy_filename_root + '_labels.npy' x_file = datasetCache.cache_file(x_file) y_file = datasetCache.cache_file(y_file) data_x = np.load(x_file, mmap_mode='r') data_y = np.load(y_file, mmap_mode='r') # some sanity checkes assert np.isfinite(data_x).all() assert np.isfinite(data_y).all() assert data_x.shape[0] == data_y.shape[0] # extract n_train, n_valid, n_test = split sets = { 'train': (0, n_train), 'valid': (n_train, n_train + n_valid), 'test': (n_train + n_valid, n_train + n_valid + n_test) } start, end = sets[which_set] data_x = data_x[start:end] data_y = data_y[start:end] if one_hot: n_examples = data_y.shape[0] n_classes = data_y.max() + 1 data_oh = np.zeros((n_examples, n_classes), dtype='float32') for i in xrange(data_y.shape[0]): data_oh[i, data_y[i]] = 1. data_y = data_oh view_converter = DefaultViewConverter((28, 28, 1)) super(ICML07DataSet, self).__init__(X=data_x, y=data_y, view_converter=view_converter)
def test_make_local_rfs(): view_converter = DefaultViewConverter((10, 10, 3)) test_dataset = DenseDesignMatrix(np.ones((10, 300)), view_converter=view_converter) matrixmul = make_local_rfs(test_dataset, 4, (5, 5), (5, 5), draw_patches=True) W = matrixmul.get_params()[0].get_value() assert W.shape == (300, 4) np.testing.assert_allclose(W.sum(axis=0), 75 * np.ones(4)) np.testing.assert_allclose(W.sum(axis=1), np.ones(300)) matrixmul = make_local_rfs(test_dataset, 4, (5, 5), (5, 5)) W = matrixmul.get_params()[0].get_value() assert W.shape == (300, 4) np.testing.assert_raises(ValueError, make_local_rfs, test_dataset, 2, (5, 5), (5, 5))
def load_xy_data(npy_fn_x, npy_fn_y, start=0, stop=None, strip_dims=None, reverse=False): """ Load the data from `npy_fn_x` and `npy_fn_y`, pair them, and keep the rows from `start` (inclusive) to `stop` (exclusive). Parameters ---------- npy_fn_x : str npy_fn_y : str start : int stop : int Useful for only using a part of the dataset. For data with a frame every 10 ms, 360000 frames would give 1 hour of data. strip_dims : int Only keep this many dimensions of each row (useful for stripping off deltas). reverse : bool If set, load the data by first treating `npy_fn_x` as input and `npy_fn_y` as output, and then the reverse. Return ------ ddm : DenseDesignMatrix """ X = np.load(npy_fn_x) X = X[start:stop, :strip_dims] Y = np.load(npy_fn_y) Y = Y[start:stop, :strip_dims] d_frame = X.shape[1] # single frame dimension view_converter = DefaultViewConverter((d_frame, X.shape[1] / d_frame, 1)) if not reverse: return DenseDesignMatrix(X=X, y=Y, view_converter=view_converter) else: return DenseDesignMatrix(X=np.vstack([X, Y]), y=np.vstack([Y, X]))
def set_topological_view(self, topo_view, axes=('b', 0, 1, 'c')): ''' Sets the dataset to represent topo_view, where topo_view is a batch of topological views of examples. Parameters ---------- topo_view : ndarray An array containing a design matrix representation of training examples. ''' assert not np.any(np.isnan(topo_view)) frames = topo_view.shape[axes.index( 'b')] # pretend frames come in as batch dim rows = topo_view.shape[axes.index(0)] cols = topo_view.shape[axes.index(1)] channels = topo_view.shape[axes.index('c')] # leave out frames... self.view_converter = DefaultViewConverter([rows, cols, channels], axes=axes) self.X = self.view_converter.topo_view_to_design_mat(topo_view) # self.X_topo_space stores a "default" topological space that # will be used only when self.iterator is called without a # data_specs, and with "topo=True", which is deprecated. self.X_topo_space = self.view_converter.topo_space assert not np.any(np.isnan(self.X)) # Update data specs X_space = VectorSpace(dim=frames * rows * cols * channels) X_source = 'features' assert self.y is None, 'y not supported now' space = X_space source = X_source self.data_specs = (space, source) self.X_space = X_space self._iter_data_specs = (X_space, X_source)
def test_split_nfold_datasets(): #Load and create ddm from cifar100 path = "/data/lisa/data/cifar100/cifar-100-python/train" obj = serial.load(path) X = obj['data'] assert X.max() == 255. assert X.min() == 0. X = np.cast['float32'](X) y = None #not implemented yet view_converter = DefaultViewConverter((32, 32, 3)) ddm = DenseDesignMatrix(X=X, y=y, view_converter=view_converter) assert not np.any(np.isnan(ddm.X)) ddm.y_fine = np.asarray(obj['fine_labels']) ddm.y_coarse = np.asarray(obj['coarse_labels']) folds = ddm.split_dataset_nfolds(10) assert folds[0].shape[0] == np.ceil(ddm.num_examples / 10)
def __init__( self, which_set, numclass, base_path='/data/vision/billf/manifold-learning/DL/Data/icml_2013_emotions', start=0, stop=-1, preprocessor=None, trainindex=0, ishape=None, fit_preprocessor=False, axes=('b', 0, 1, 'c'), fit_test_preprocessor=False, flip=0): files = {'train': 'occ_train.csv', 'public_test': 'test.csv'} try: filename = files[which_set] except KeyError: raise ValueError("Unrecognized dataset name: " + which_set) X, y = self.loadFile(base_path + '/' + filename, start, stop, trainindex) # train_index if flip: X_list_flipLR, X_list_flipUD = self.flipData(X) X = X + X_list_flipLR y = y + y view_converter = DefaultViewConverter(shape=np.append( ishape.shape, ishape.num_channels), axes=axes) super(Occ, self).__init__(X=X, y=self.label_id2arr(y, numclass), view_converter=view_converter) if preprocessor: preprocessor.apply(self, can_fit=fit_preprocessor)
def __init__(self, path='train.mat', start=None, stop=None, center=False, rescale=False, axes=('b', 0, 1, 'c'), channels=4): self.__dict__.update(locals()) del self.self self.filters = tables.Filters(complib='blosc', complevel=5) self.view_converter = None self.path = preprocess(self.path) X, y = self._load_data() self.windowSize = np.uint8(np.sqrt(X.shape[1] / 4)) if center and rescale: X[:] -= 127.5 X[:] /= 127.5 elif center: X[:] -= 127.5 elif rescale: X[:] /= 255. view_converter = DefaultViewConverter((61, 61, 4), axes) super(MATDATAPyTables, self).__init__(X=X, y=y, view_converter=view_converter) self.h5file.flush()
def load_data(npy_fn, start=0, stop=None, strip_dims=None, stack_n_frames=1): """ Load the data from `npy_fn` and keep the rows from `start` (inclusive) to `stop` (exclusive). Parameters ---------- npy_fn : str start : int stop : int Useful for only using a part of the dataset. For data with a frame every 10 ms, 360000 frames would give 1 hour of data. strip_dims : int Only keep this many dimensions of each row (useful for stripping off deltas). stack_n_frames : None If given, treat this many frames as a window and sweep the window across the data (1-frame shift). Return ------ ddm : DenseDesignMatrix """ X = np.load(npy_fn) X = X[start:stop, :strip_dims] d_frame = X.shape[1] # single frame dimension # Stack frames if stack_n_frames != 1: X = stack_overlapping_vectors(X, stack_n_frames, n_rate=1) view_converter = DefaultViewConverter((d_frame, X.shape[1] / d_frame, 1)) return DenseDesignMatrix(X=X, view_converter=view_converter)
def __init__(self, which_set, center=False, gcn=None, toronto_prepro=False, axes=('b', 0, 1, 'c'), start=None, stop=None, one_hot=False): assert which_set in ['train', 'test'] path = "${PYLEARN2_DATA_PATH}/cifar100/cifar-100-python/" + which_set obj = serial.load(path) X = obj['data'] assert X.max() == 255. assert X.min() == 0. X = np.cast['float32'](X) y = np.asarray(obj['fine_labels']) self.center = center self.one_hot = one_hot if one_hot: one_hot = np.zeros((y.shape[0], 100), dtype='float32') for i in xrange(y.shape[0]): one_hot[i, y[i]] = 1. y = one_hot if center: X -= 127.5 if toronto_prepro: assert not center assert not gcn if which_set == 'test': raise NotImplementedError("Need to subtract the mean of the " "*training* set.") X = X / 255. X = X - X.mean(axis=0) self.toronto_prepro = toronto_prepro self.gcn = gcn if gcn is not None: assert isinstance(gcn, float) X = (X.T - X.mean(axis=1)).T X = (X.T / np.sqrt(np.square(X).sum(axis=1))).T X *= gcn if start is not None: # This needs to come after the prepro so that it doesn't change # the pixel means computed above assert start >= 0 assert stop > start assert stop <= X.shape[0] X = X[start:stop, :] y = y[start:stop] assert X.shape[0] == y.shape[0] self.axes = axes view_converter = DefaultViewConverter((32, 32, 3), axes) super(CIFAR100, self).__init__(X=X, y=y, view_converter=view_converter) assert not N.any(N.isnan(self.X))
feat = H * Mu1 elif feature_type == 'exp_h': feat = H elif feature_type == 'map_hs': feat = (H > 0.5) * Mu1 else: assert False print 'compiling theano function' f = function([V], feat) print 'running theano function' feat = f(X2) feat_dataset = DenseDesignMatrix(X=feat, view_converter=DefaultViewConverter( [1, 1, feat.shape[1]])) print 'reassembling features' ns = 32 - size + 1 depatchifier = ReassembleGridPatches(orig_shape=(ns, ns), patch_shape=(1, 1)) feat_dataset.apply_preprocessor(depatchifier) print 'making topological view' topo_feat = feat_dataset.get_topological_view() assert topo_feat.shape[0] == X.shape[0] print 'assembling visualizer' n = np.ceil(np.sqrt(model.nhid))
def test_init_with_vc(): rng = np.random.RandomState([4, 5, 6]) d = DenseDesignMatrix( X=rng.randn(12, 5), view_converter=DefaultViewConverter([1, 2, 3]))
import sys from pylearn2.utils import serial from pylearn2.datasets.preprocessing import ZCA from pylearn2.datasets.dense_design_matrix import DenseDesignMatrix, DefaultViewConverter from pylearn2.gui.patch_viewer import PatchViewer import numpy as np path = sys.argv[1] prepro = serial.load(path) zca = prepro.items[-1] assert isinstance(zca, ZCA) W = zca.P_ assert W.shape[1] % 3 == 0 n = int(np.sqrt(W.shape[1] / 3)) d = DenseDesignMatrix(X=W, view_converter=DefaultViewConverter((n, n, 3))) W = d.get_weights_view(W) pv = PatchViewer(grid_shape=(n * 3, n), patch_shape=(n, n), is_color=True) for i in xrange(n * n * 3): pv.add_patch(W[i, ...], rescale=True) pv.show()
def __call__(self, full_X): feature_type = self.feature_type pooling_region_counts = self.pooling_region_counts model = self.model size = self.size nan = 0 full_X = full_X.reshape(1, full_X.shape[0], full_X.shape[1], full_X.shape[2]) if full_X.shape[3] == 1: full_X = np.concatenate((full_X, full_X, full_X), axis=3) print 'full_X.shape: ' + str(full_X.shape) num_examples = full_X.shape[0] assert num_examples == 1 pipeline = self.preprocessor def average_pool(stride): def point(p): return p * ns / stride rval = np.zeros( (topo_feat.shape[0], stride, stride, topo_feat.shape[3]), dtype='float32') for i in xrange(stride): for j in xrange(stride): rval[:, i, j, :] = self.region_features( topo_feat[:, point(i):point(i + 1), point(j):point(j + 1), :]) return rval outputs = [ np.zeros((num_examples, count, count, model.nhid), dtype='float32') for count in pooling_region_counts ] assert len(outputs) > 0 fd = DenseDesignMatrix(X=np.zeros((1, 1), dtype='float32'), view_converter=DefaultViewConverter( [1, 1, model.nhid])) ns = 32 - size + 1 depatchifier = ReassembleGridPatches(orig_shape=(ns, ns), patch_shape=(1, 1)) batch_size = 1 for i in xrange(0, num_examples - batch_size + 1, batch_size): print i t1 = time.time() d = DenseDesignMatrix( topo_view=np.cast['float32'](full_X[i:i + batch_size, :]), view_converter=DefaultViewConverter((32, 32, 3))) t2 = time.time() #print '\tapplying preprocessor' d.apply_preprocessor(pipeline, can_fit=False) X2 = d.get_design_matrix() t3 = time.time() #print '\trunning theano function' feat = self.f(X2) t4 = time.time() assert feat.dtype == 'float32' feat_dataset = copy.copy(fd) if np.any(np.isnan(feat)): nan += np.isnan(feat).sum() feat[np.isnan(feat)] = 0 feat_dataset.set_design_matrix(feat) #print '\treassembling features' feat_dataset.apply_preprocessor(depatchifier) #print '\tmaking topological view' topo_feat = feat_dataset.get_topological_view() assert topo_feat.shape[0] == batch_size t5 = time.time() #average pooling for output, count in zip(outputs, pooling_region_counts): output[i:i + batch_size, ...] = average_pool(count) t6 = time.time() print(t6 - t1, t2 - t1, t3 - t2, t4 - t3, t5 - t4, t6 - t5) return outputs[0]
def _execute(self): global pooling_matrix save_path = self.save_path batch_size = self.batch_size feature_type = self.feature_type dataset_family = self.dataset_family which_set = self.which_set model = self.model size = self.size nan = 0 dataset_descriptor = dataset_family[which_set][size] dataset = dataset_descriptor.dataset_maker() expected_num_examples = dataset_descriptor.num_examples full_X = dataset.get_design_matrix() num_examples = full_X.shape[0] assert num_examples == expected_num_examples if self.restrict is not None: assert self.restrict[1] <= full_X.shape[0] print 'restricting to examples ', self.restrict[ 0], ' through ', self.restrict[1], ' exclusive' full_X = full_X[self.restrict[0]:self.restrict[1], :] assert self.restrict[1] > self.restrict[0] #update for after restriction num_examples = full_X.shape[0] assert num_examples > 0 dataset.X = None dataset.design_loc = None dataset.compress = False patchifier = ExtractGridPatches(patch_shape=(size, size), patch_stride=(1, 1)) pipeline = serial.load(dataset_descriptor.pipeline_path) assert isinstance(pipeline.items[0], ExtractPatches) pipeline.items[0] = patchifier print 'defining features' V = T.matrix('V') model.make_pseudoparams() d = model.e_step.variational_inference(V=V) H = d['H_hat'] Mu1 = d['S_hat'] assert H.dtype == 'float32' assert Mu1.dtype == 'float32' if self.feature_type == 'map_hs': feat = (H > 0.5) * Mu1 elif self.feature_type == 'map_h': feat = T.cast(H > 0.5, dtype='float32') elif self.feature_type == 'exp_hs': feat = H * Mu1 elif self.feature_type == 'exp_h': feat = H elif self.feature_type == 'exp_h_thresh': feat = H * (H > .01) else: raise NotImplementedError() assert feat.dtype == 'float32' print 'compiling theano function' f = function([V], feat) if config.device.startswith('gpu') and model.nhid >= 4000: f = halver(f, model.nhid) topo_feat_var = T.TensorType(broadcastable=(False, False, False, False), dtype='float32')() region_features = function([topo_feat_var], topo_feat_var.mean(axis=(1, 2))) def average_pool(stride): def point(p): return p * ns / stride rval = np.zeros( (topo_feat.shape[0], stride, stride, topo_feat.shape[3]), dtype='float32') for i in xrange(stride): for j in xrange(stride): rval[:, i, j, :] = region_features( topo_feat[:, point(i):point(i + 1), point(j):point(j + 1), :]) return rval num_superpixels = 7 output = np.zeros((num_examples, pooling_matrix.shape[0]), dtype='float32') fd = DenseDesignMatrix(X=np.zeros((1, 1), dtype='float32'), view_converter=DefaultViewConverter( [1, 1, model.nhid])) ns = 32 - size + 1 depatchifier = ReassembleGridPatches(orig_shape=(ns, ns), patch_shape=(1, 1)) if len(range(0, num_examples - batch_size + 1, batch_size)) <= 0: print num_examples print batch_size for i in xrange(0, num_examples - batch_size + 1, batch_size): print i t1 = time.time() d = copy.copy(dataset) d.set_design_matrix(full_X[i:i + batch_size, :]) t2 = time.time() #print '\tapplying preprocessor' d.apply_preprocessor(pipeline, can_fit=False) X2 = d.get_design_matrix() t3 = time.time() #print '\trunning theano function' feat = f(X2) t4 = time.time() assert feat.dtype == 'float32' feat_dataset = copy.copy(fd) if np.any(np.isnan(feat)): nan += np.isnan(feat).sum() feat[np.isnan(feat)] = 0 feat_dataset.set_design_matrix(feat) #print '\treassembling features' feat_dataset.apply_preprocessor(depatchifier) #print '\tmaking topological view' topo_feat = feat_dataset.get_topological_view() assert topo_feat.shape[0] == batch_size t5 = time.time() #average pooling superpixels = average_pool(num_superpixels) pooled = pooling_matrix.dot(superpixels.T).T output[i:i + batch_size, :] = pooled t6 = time.time() print(t6 - t1, t2 - t1, t3 - t2, t4 - t3, t5 - t4, t6 - t5) if self.chunk_size is not None: assert save_path.endswith('.npy') save_path_pieces = save_path.split('.npy') assert len(save_path_pieces) == 2 assert save_path_pieces[1] == '' save_path = save_path_pieces[0] + '_' + chr( ord('A') + self.chunk_id) + '.npy' np.save(save_path, output) if nan > 0: warnings.warn(str(nan) + ' features were nan')