def _gcn(self, X): if not self.specs: X = global_contrast_normalize(X, scale=True) return X pre = 0 for n in feature_ns: if n <= X.shape[1]: X[:, pre:n] = global_contrast_normalize(X[:, pre:n], scale=True) pre = n return X
def __init__(self, which_set, data_path=None, center=True, rescale=True, gcn=True): self.class_name = ['neg', 'pos'] # load data path = "${PYLEARN2_DATA_PATH}/cin/" #datapath = path + 'feature850-2-1.pkl' if data_path is None: data_path = path + 'feature850-2-1.pkl' else: data_path = path + data_path data_path = serial.preprocess(data_path) with open(data_path, 'rb') as f: #f = open(datapath, 'rb') train_set, valid_set, test_set = cPickle.load(f) #f.close() self.train_set = train_set self.valid_set = valid_set self.test_set = test_set if which_set == 'train': X, Y = self.train_set elif which_set == 'valid': X, Y = self.valid_set else: X, Y = self.test_set X.astype(float) axis = 0 _max = np.max(X, axis=axis) _min = np.min(X, axis=axis) _mean = np.mean(X, axis=axis) _std = np.std(X, axis=axis) _scale = _max - _min # print _max # print _min # print _mean # print _std if gcn: X = global_contrast_normalize(X, scale=gcn) else: if center: X[:, ] -= _mean if rescale: X[:, ] /= _scale # topo_view = X.reshape(X.shape[0], X.shape[1], 1, 1) # y = np.reshape(Y, (Y.shape[0], 1)) # y = np.atleast_2d(Y).T y = np.zeros((Y.shape[0], 2)) y[:, 0] = Y y[:, 0] = 1 - Y print X.shape, y.shape super(CIN_FEATURE2, self).__init__(X=X, y=y)
def apply(self, dataset, can_fit=False): if self.skip: return if self._batch_size is None: X = global_contrast_normalize(dataset.get_design_matrix(), scale=self._scale, subtract_mean=self._subtract_mean, use_std=self._use_std, sqrt_bias=self._sqrt_bias, min_divisor=self._min_divisor) dataset.set_design_matrix(X) else: X = dataset.get_design_matrix() data_size = X.shape[0] last = (np.floor(data_size / float(self._batch_size)) * self._batch_size) for i in xrange(0, data_size, self._batch_size): if i >= last: stop = i + np.mod(data_size, self._batch_size) else: stop = i + self._batch_size log.info("GCN processing data from %d to %d" % (i, stop)) data = self.transform(X[i:stop]) dataset.set_design_matrix(data, start = i)
def apply(self, dataset, can_fit=False): #check if we have already flattened patches if self.normalized_data_key in dataset.keys(): print "skipping normalization, this has already been run" return else: print "normalizing patches" in_data = dataset[self.data_to_normalize_key] data_size = in_data.shape[0] dataset.create_dataset(self.normalized_data_key, in_data.shape, chunks=((self.batch_size,)+in_data.shape[1:])) out_data = dataset[self.normalized_data_key] #iterate over patches for patch_index in range(data_size): if patch_index % 2000 == 0: print str(patch_index) + '/' + str(data_size) #iterate over rgbd so they are all normalized separately at this point for channel in range(4): out_data[patch_index, :, :, channel] = global_contrast_normalize(in_data[patch_index, :, :, channel], scale=self.scale, subtract_mean=self.subtract_mean, use_std=self.use_std, sqrt_bias=self.sqrt_bias, min_divisor=self.min_divisor)
def test_subtract_mean_false(): rng = numpy.random.RandomState(0) X = abs(rng.randn(50, 70)) Y = global_contrast_normalize(X, subtract_mean=False, scale=5) numpy.testing.assert_allclose(numpy.sqrt((Y ** 2).sum(axis=1)), 5) numpy.testing.assert_raises(AssertionError, numpy.testing.assert_allclose, Y.mean(axis=1), 0, atol=1e-10)
def __init__(self, which_set = 'full', path = 'train.mat', one_hot = False, colorspace = 'none', step = 1, start = None, stop = None, center = False, rescale = False, gcn = None, toronto_prepro = False, axes=('b', 0, 1, 'c')): self.__dict__.update(locals()) del self.self # #self.one_hot = one_hot #self.colorspace = colorspace #self.step=step #self.which_set=which_set self.view_converter = None self.path = preprocess(self.path) X, y = self._load_data() if center: X -= 127.5 #self.center = center if rescale: X /= 127.5 #self.rescale = rescale if toronto_prepro: assert not center assert not gcn X = X / 255. if which_set == 'test': other = MATDATA(which_set='train') oX = other.X oX /= 255. X = X - oX.mean(axis=0) else: X = X - X.mean(axis=0) #self.toronto_prepro = toronto_prepro #self.gcn = gcn if gcn is not None: gcn = float(gcn) X = global_contrast_normalize(X, scale=gcn, min_divisor=1e-8) view_converter = DefaultViewConverter(( self.windowSize,self.windowSize,self.channels), axes) super(MATDATA, self).__init__(X=X, y=y, view_converter=view_converter)
def __init__(self, which_set='full', path='train.mat', one_hot=False, colorspace='none', step=1, start=None, stop=None, center=False, rescale=False, gcn=None, toronto_prepro=False, axes=('b', 0, 1, 'c')): self.__dict__.update(locals()) del self.self # #self.one_hot = one_hot #self.colorspace = colorspace #self.step=step #self.which_set=which_set self.view_converter = None self.path = preprocess(self.path) X, y = self._load_data() if center: X -= 127.5 #self.center = center if rescale: X /= 127.5 #self.rescale = rescale if toronto_prepro: assert not center assert not gcn X = X / 255. if which_set == 'test': other = MATDATA(which_set='train') oX = other.X oX /= 255. X = X - oX.mean(axis=0) else: X = X - X.mean(axis=0) #self.toronto_prepro = toronto_prepro #self.gcn = gcn if gcn is not None: gcn = float(gcn) X = global_contrast_normalize(X, scale=gcn, min_divisor=1e-8) view_converter = DefaultViewConverter( (self.windowSize, self.windowSize, self.channels), axes) super(MATDATA, self).__init__(X=X, y=y, view_converter=view_converter)
def normalize(img, prep, img_shape): # this requires zca from pylearn 2 for all functions prep. img = prep.inverse(img.reshape(1, -1))[0] img /= np.abs(img).max() img = np.clip(img, -1., 1.) img = (img + 1.) / 2. img = global_contrast_normalize(img.reshape(1, -1) * 255, scale=55.) img = prep._gpu_matrix_dot(img - prep.mean_, prep.P_) return img.reshape(img_shape)
def test_subtract_mean_false(): rng = numpy.random.RandomState(0) X = abs(rng.randn(50, 70)) Y = global_contrast_normalize(X, subtract_mean=False, scale=5) numpy.testing.assert_allclose(numpy.sqrt((Y**2).sum(axis=1)), 5) numpy.testing.assert_raises(AssertionError, numpy.testing.assert_allclose, Y.mean(axis=1), 0, atol=1e-10)
def transform(self, X): if self.flag_gcn: X = global_contrast_normalize(X) if self.flag_lcn: X = self.lcn_transform(X) if self.flag_zca: X, _ = centersphere(X, method='ZCA', A=self.zca_mat) return X
def real_time_prediction(): ### loading new images for classification starts here fo = open(save_path, 'rb') # batch path batch1 = pickle.load(fo) fo.close() xarr = np.array(batch1['data'], dtype='float32') xarr = global_contrast_normalize(xarr, scale=55.) no_of_row = len(batch1['data']) xdat = np.array( xarr.reshape((no_of_row, 3, 32, 32)), dtype='float32') #reshape first parameter = batch matrix no. of row xdat = np.transpose(xdat[:, :, :, :], (1, 2, 3, 0)) x = dense_design_matrix.DenseDesignMatrix(topo_view=xdat, axes=['c', 0, 1, 'b']) x.apply_preprocessor(my_pca_preprocessor, can_fit=False) tarr = x.get_topological_view() #print tarr y = f(tarr) ###########searching max in matrix################################################## #j = no. of row in prediction_batch #i = no. of classes (0-9) #result=('airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck') result = ('bottle', 'book', 'toy', 'pen', 'chair', 'coin', 'phone', 'hand', 'note', 'head') resultString = '' for j in range(0, no_of_row): max_index = 0 max_no = y[j][0] #print max_no for i in range(0, 10): if y[j][i] > max_no: max_no = y[j][i] max_index = i # print max_index print "======================" print 'Photo', j + 1, ' max=', result[max_index] if j > 0: resultString += ',' resultString += result[max_index] #print 'y =', y ###################################################################################3 return resultString
def __init__(self, which_set=None, file=None, center = False, rescale = False, gcn = None, one_hot = False, start = None, stop = None, axes=('b', 0, 1, 'c'), toronto_prepro = False, preprocessor = None): # note: there is no such thing as the cifar10 validation set; # pylearn1 defined one but really it should be user-configurable # (as it is here) self.axes = axes # we define here: dtype = 'uint8' ntrain = 0 nvalid = 0 # artefact, we won't use it ntest = 300000 # we also expose the following details: self.img_shape = (3,32,32) self.img_size = N.prod(self.img_shape) #self.n_classes = 10 #self.label_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', #'dog', 'frog','horse','ship','truck'] # prepare loading #fnames = ['data_batch_%i' % i for i in range(1,6)] #lenx = N.ceil((ntrain + nvalid) / 10000.)*10000 #x = N.zeros((lenx,self.img_size), dtype=dtype) #y = N.zeros(lenx, dtype=dtype) X=np.load(file).astype(np.float32) # load train data if center: X -= 127.5 self.center = center if rescale: X /= 127.5 self.rescale = rescale self.toronto_prepro = toronto_prepro self.gcn = gcn if gcn is not None: gcn = float(gcn) X = global_contrast_normalize(X, scale=gcn) view_converter = dense_design_matrix.DefaultViewConverter((32,32,3), axes) super(CIFAR10_TEST, self).__init__(X=X, view_converter=view_converter) assert not np.any(np.isnan(self.X)) if preprocessor: preprocessor.apply(self)
def real_time_prediction(): ### loading new images for classification starts here fo = open(save_path,'rb') # batch path batch1 = pickle.load(fo) fo.close() xarr = np.array(batch1['data'],dtype='float32') xarr = global_contrast_normalize(xarr, scale=55.) no_of_row=len(batch1['data']) xdat = np.array(xarr.reshape((no_of_row,3,32,32)),dtype='float32') #reshape first parameter = batch matrix no. of row xdat = np.transpose(xdat[:,:,:,:],(1,2,3,0)) x = dense_design_matrix.DenseDesignMatrix(topo_view=xdat, axes = ['c', 0, 1, 'b']) x.apply_preprocessor(my_pca_preprocessor, can_fit = False) tarr = x.get_topological_view() #print tarr y = f(tarr) ###########searching max in matrix################################################## #j = no. of row in prediction_batch #i = no. of classes (0-9) #result=('airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck') result=('bottle','book','toy','pen','chair','coin','phone','hand','note','head') resultString='' for j in range(0,no_of_row): max_index=0 max_no=y[j][0] #print max_no for i in range(0,10): if y[j][i]>max_no: max_no=y[j][i] max_index=i # print max_index print "======================" print 'Photo',j+1, ' max=', result[max_index] if j > 0: resultString += ',' resultString += result[max_index] #print 'y =', y ###################################################################################3 return resultString
def __init__(self, source_directory, axes=('b', 0, 1, 'c'), remove_misfits = False): self.axes = axes # we define here: dtype = 'uint8' files = self.file_list_of_source(source_directory) # we also expose the following details: self.img_shape = self.determine_shape(files[0]) #this is rather dangerous if the first file is not representative to the remainder self.img_size = np.prod(self.img_shape) # prepare loading x = np.zeros((len(files), self.img_size), dtype=dtype) # load train data X = self.flatten_images(x, files) X = global_contrast_normalize(X) view_converter = dense_design_matrix.DefaultViewConverter(self.img_shape, self.axes) super(LocalImages, self).__init__(X = X, view_converter = view_converter)
def apply(self, dataset, can_fit=False): if self._batch_size is None: X = global_contrast_normalize(dataset.get_design_matrix(), scale=self._scale, subtract_mean=self._subtract_mean, use_std=self._use_std, sqrt_bias=self._sqrt_bias, min_divisor=self._min_divisor) dataset.set_design_matrix(X) else: X = dataset.get_design_matrix() data_size = X.shape[0] last = (np.floor(data_size / float(self._batch_size)) * self._batch_size) for i in xrange(0, data_size, self._batch_size): if i >= last: stop = i + np.mod(data_size, self._batch_size) else: stop = i + self._batch_size log.info("GCN processing data from {} to {}".format(i, stop)) data = self.transform(X[i:stop]) dataset.set_design_matrix(data, start=i)
def __init__(self, which_set, data_path=None, center=True, rescale=True, gcn=True, specs=False): self.class_name = ['neg', 'pos'] # load data path = "${PYLEARN2_DATA_PATH}/cin/" #datapath = path + 'feature850-2-1.pkl' if data_path is None: data_path = path + 'feature1406-2-1.pkl' else: data_path = path + data_path data_path = serial.preprocess(data_path) with open(data_path, 'rb') as f: #f = open(datapath, 'rb') train_set, valid_set, test_set = cPickle.load(f) #f.close() self.train_set = train_set self.valid_set = valid_set self.test_set = test_set self.specs = specs if which_set == 'train': X, Y = self.train_set elif which_set == 'valid': X, Y = self.valid_set else: X, Y = self.test_set X.astype(float) axis = 0 _max = np.max(X, axis=axis) _min = np.min(X, axis=axis) _mean = np.mean(X, axis=axis) _std = np.std(X, axis=axis) _scale = _max - _min def features_map_fn(indexes): rval = [] for sequence_index, example_index in self._fetch_index(indexes): rval.append(self.samples_sequences[sequence_index][example_index:example_index + self.frames_per_example].ravel()) return rval def targets_map_fn(indexes): rval = [] for sequence_index, example_index in self._fetch_index(indexes): rval.append(self.samples_sequences[sequence_index][example_index + self.frames_per_example].ravel()) return rval map_fn_components = [features_map_fn, targets_map_fn] self.map_functions = tuple(map_fn_components) self.cumulative_example_indexes = X.shape[0] # print _max # print _min # print _mean # print _std if gcn: X = global_contrast_normalize(X, scale=gcn) else: if center: X[:, ] -= _mean if rescale: X[:, ] /= _scale # topo_view = X.reshape(X.shape[0], X.shape[1], 1, 1) # y = np.reshape(Y, (Y.shape[0], 1)) # y = np.atleast_2d(Y).T y = np.zeros((Y.shape[0], 2)) y[:, 0] = Y y[:, 0] = 1 - Y print X.shape, y.shape super(CIN_FEATURE2, self).__init__(X=X, y=y) # super(CIN_FEATURE2, self).__init__(topo_view=topo_view, y=y, y_labels=2) if specs: assert X.shape[1] == (850 + 656) self.init_data_specs() self.feature850 = X[:, 0:850] self.feature656 = X[:, 850:] self.y = y
def test_scale(): rng = numpy.random.RandomState(0) X = abs(rng.randn(50, 70)) Y = global_contrast_normalize(X, scale=5) numpy.testing.assert_allclose(numpy.sqrt((Y**2).sum(axis=1)), 5) numpy.testing.assert_allclose(Y.mean(axis=1), 0, atol=1e-10)
def __init__(self, which_set, center=False, rescale=False, gcn=None, one_hot=False, start=None, stop=None, axes=('b', 0, 1, 'c'), toronto_prepro=False, preprocessor=None): # note: there is no such thing as the cifar10 validation set; # pylearn1 defined one but really it should be user-configurable # (as it is here) self.axes = axes # we define here: dtype = 'uint8' ntrain = 50000 nvalid = 0 # artefact, we won't use it ntest = 10000 # we also expose the following details: self.img_shape = (3, 32, 32) self.img_size = N.prod(self.img_shape) self.n_classes = 10 self.label_names = [ 'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck' ] # prepare loading fnames = ['data_batch_%i' % i for i in range(1, 6)] lenx = N.ceil((ntrain + nvalid) / 10000.) * 10000 x = N.zeros((lenx, self.img_size), dtype=dtype) y = N.zeros(lenx, dtype=dtype) # load train data nloaded = 0 for i, fname in enumerate(fnames): data = CIFAR10._unpickle(fname) x[i * 10000:(i + 1) * 10000, :] = data['data'] y[i * 10000:(i + 1) * 10000] = data['labels'] nloaded += 10000 if nloaded >= ntrain + nvalid + ntest: break # load test data data = CIFAR10._unpickle('test_batch') # process this data Xs = {'train': x[0:ntrain], 'test': data['data'][0:ntest]} Ys = {'train': y[0:ntrain], 'test': data['labels'][0:ntest]} X = N.cast['float32'](Xs[which_set]) y = Ys[which_set] if isinstance(y, list): y = np.asarray(y) if which_set == 'test': assert y.shape[0] == 10000 self.one_hot = one_hot if one_hot: one_hot = np.zeros((y.shape[0], 10), dtype='float32') for i in xrange(y.shape[0]): one_hot[i, y[i]] = 1. y = one_hot if center: X -= 127.5 self.center = center if rescale: X /= 127.5 self.rescale = rescale if toronto_prepro: assert not center assert not gcn X = X / 255. if which_set == 'test': other = CIFAR10(which_set='train') oX = other.X oX /= 255. X = X - oX.mean(axis=0) else: X = X - X.mean(axis=0) self.toronto_prepro = toronto_prepro self.gcn = gcn if gcn is not None: gcn = float(gcn) X = global_contrast_normalize(X, scale=gcn) if start is not None: # This needs to come after the prepro so that it doesn't change the pixel # means computed above for toronto_prepro assert start >= 0 assert stop > start assert stop <= X.shape[0] X = X[start:stop, :] y = y[start:stop] assert X.shape[0] == y.shape[0] if which_set == 'test': assert X.shape[0] == 10000 view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3), axes) super(CIFAR10, self).__init__(X=X, y=y, view_converter=view_converter) assert not np.any(np.isnan(self.X)) if preprocessor: preprocessor.apply(self)
def __init__(self, which_set, data_path=None, center=True, rescale=True, gcn=True, specs=True): self.class_name = ['neg', 'pos'] # load data path = "${PYLEARN2_DATA_PATH}/cin/" #datapath = path + 'feature850-2-1.pkl' if data_path is None: data_path = path + 'feature1406-2-1.pkl' else: data_path = path + data_path data_path = serial.preprocess(data_path) with open(data_path, 'rb') as f: #f = open(datapath, 'rb') train_set, valid_set, test_set = cPickle.load(f) #f.close() self.train_set = train_set self.valid_set = valid_set self.test_set = test_set self.specs = specs if which_set == 'train': X, Y = self.train_set elif which_set == 'valid': X, Y = self.valid_set else: X, Y = self.test_set X.astype(float) axis = 0 _max = np.max(X, axis=axis) _min = np.min(X, axis=axis) _mean = np.mean(X, axis=axis) _std = np.std(X, axis=axis) _scale = _max - _min if gcn: X = global_contrast_normalize(X, scale=gcn) else: if center: X[:, ] -= _mean if rescale: X[:, ] /= _scale # topo_view = X.reshape(X.shape[0], X.shape[1], 1, 1) # y = np.reshape(Y, (Y.shape[0], 1)) # y = np.atleast_2d(Y).T self.raw_X = X self.raw_y = Y y = np.zeros((Y.shape[0], 2)) y[:, 0] = Y y[:, 1] = 1 - Y # print "Load CIN_FEATURE2 data: {}, with size X:{}, y:{}".format(data_path, X.shape, y.shape) super(CIN_FEATURE2, self).__init__(X=X, y=y) # super(CIN_FEATURE2, self).__init__(topo_view=topo_view, y=y, y_labels=2) if specs: assert X.shape[1] == (850 + 556) self.init_data_specs() self.feature850 = X[:, 0:850] self.feature556 = X[:, 850:] self.y = y
def test_min_divisor(): rng = numpy.random.RandomState(0) X = abs(rng.randn(50, 70)) X[0] *= 1e-15 Y = global_contrast_normalize(X, subtract_mean=False, use_std=True) numpy.testing.assert_array_equal(X[0], Y[0])
def test_basic(): rng = numpy.random.RandomState(0) X = abs(rng.randn(50, 70)) Y = global_contrast_normalize(X) numpy.testing.assert_allclose((Y**2).sum(axis=1), 1) numpy.testing.assert_allclose(Y.mean(axis=1), 0, atol=1e-10)
def __init__(self, which_set, center=False, rescale=False, gcn=None, start=None, stop=None, axes=('b', 0, 1, 'c'), toronto_prepro=False, preprocessor=None): # note: there is no such thing as the cifar10 validation set; # pylearn1 defined one but really it should be user-configurable # (as it is here) self.axes = axes # we define here: dtype = 'uint8' ntrain = 50000 nvalid = 0 # artefact, we won't use it ntest = 10000 # we also expose the following details: self.img_shape = (3, 32, 32) self.img_size = numpy.prod(self.img_shape) self.n_classes = 10 self.label_names = [ 'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck' ] # prepare loading fnames = ['data_batch_%i' % i for i in range(1, 6)] datasets = {} datapath = os.path.join( string_utils.preprocess('${PYLEARN2_DATA_PATH}'), 'cifar10', 'cifar-10-batches-py') for name in fnames + ['test_batch']: fname = os.path.join(datapath, name) if not os.path.exists(fname): raise IOError(fname + " was not found. You probably need to " "download the CIFAR-10 dataset by using the " "download script in " "pylearn2/scripts/datasets/download_cifar10.sh " "or manually from " "http://www.cs.utoronto.ca/~kriz/cifar.html") datasets[name] = cache.datasetCache.cache_file(fname) lenx = int(numpy.ceil((ntrain + nvalid) / 10000.) * 10000) x = numpy.zeros((lenx, self.img_size), dtype=dtype) y = numpy.zeros((lenx, 1), dtype=dtype) # load train data nloaded = 0 for i, fname in enumerate(fnames): _logger.info('loading file %s' % datasets[fname]) data = serial.load(datasets[fname]) x[i * 10000:(i + 1) * 10000, :] = data['data'] y[i * 10000:(i + 1) * 10000, 0] = data['labels'] nloaded += 10000 if nloaded >= ntrain + nvalid + ntest: break # load test data _logger.info('loading file %s' % datasets['test_batch']) data = serial.load(datasets['test_batch']) # process this data Xs = {'train': x[0:ntrain], 'test': data['data'][0:ntest]} Ys = {'train': y[0:ntrain], 'test': data['labels'][0:ntest]} X = numpy.cast['float32'](Xs[which_set]) y = Ys[which_set] if isinstance(y, list): y = numpy.asarray(y).astype(dtype) if which_set == 'test': assert y.shape[0] == 10000 y = y.reshape((y.shape[0], 1)) if center: X -= 127.5 self.center = center if rescale: X /= 127.5 self.rescale = rescale if toronto_prepro: assert not center assert not gcn X = X / 255. if which_set == 'test': other = CIFAR10(which_set='train') oX = other.X oX /= 255. X = X - oX.mean(axis=0) else: X = X - X.mean(axis=0) self.toronto_prepro = toronto_prepro self.gcn = gcn if gcn is not None: gcn = float(gcn) X = global_contrast_normalize(X, scale=gcn) if start is not None: # This needs to come after the prepro so that it doesn't # change the pixel means computed above for toronto_prepro assert start >= 0 assert stop > start assert stop <= X.shape[0] X = X[start:stop, :] y = y[start:stop, :] assert X.shape[0] == y.shape[0] if which_set == 'test': assert X.shape[0] == 10000 view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3), axes) super(CIFAR10, self).__init__(X=X, y=y, view_converter=view_converter, y_labels=self.n_classes) assert not contains_nan(self.X) if preprocessor: preprocessor.apply(self)
def __init__( self, lfw_path, filelist_path, embedding_file=None, center=False, scale=False, start=None, stop=None, gcn=None, shuffle=False, rng=None, seed=132987, axes=("b", 0, 1, "c"), img_shape=(3, 250, 250), ): self.axes = axes self.img_shape = img_shape C, H, W = img_shape self.img_size = np.prod(self.img_shape) files = [] with open(filelist_path, "r") as filelist_f: files = [line.strip() for line in filelist_f] # Load raw pixel integer values dtype = "uint8" X = np.zeros((len(files), W, H, C), dtype=dtype) img_ids = [] for i, line in enumerate(files): if "\t" in line: # New format: contains image IDs img_path, img_id = line.strip().split() img_ids.append(int(img_id)) else: img_path = line.strip() full_path = os.path.join(lfw_path, img_path) im = image.load(full_path, rescale_image=False, dtype=dtype) # Handle grayscale images which may not have RGB channels if len(im.shape) == 2: W, H = im.shape # Repeat image 3 times across axis 2 im = im.reshape(W, H, 1).repeat(3, 2) # Swap color channel to front X[i] = im # Cast to float32, center / scale if necessary X = np.cast["float32"](X) # Create dense design matrix from topological view X = X.reshape(X.shape[0], -1) # Prepare img_ids if embedding_file is not None: if len(img_ids) != len(files): raise ValueError("You must provide a filelist with indexes " "into the embedding array for each image.") img_ids = np.array(img_ids, dtype="uint32") if center and scale: X[:] -= 127.5 X[:] /= 127.5 elif center: X[:] -= 127.5 elif scale: X[:] /= 255.0 self.gcn = gcn if gcn is not None: gcn = float(gcn) X = global_contrast_normalize(X, scale=gcn) if shuffle: rng = make_np_rng(rng, seed, which_method="permutation") rand_idx = rng.permutation(len(X)) X = X[rand_idx] img_ids = img_ids[rand_idx] if start is not None: assert start >= 0 assert stop > start assert stop <= X.shape[0] X = X[start:stop] if len(img_ids) > 0: img_ids = img_ids[start:stop] # Load embeddings if provided Y = None if embedding_file is not None: embeddings = np.load(embedding_file)["arr_0"] assert embeddings.shape[0] >= len(files) Y = embeddings[img_ids].astype(theano.config.floatX) # create view converting for retrieving topological view self.view_converter = dense_design_matrix.DefaultViewConverter((W, H, C), axes) # init super class super(LFW, self).__init__(X=X, y=Y) assert not contains_nan(self.X) # Another hack: rename 'targets' to match model expectations if embedding_file is not None: space, (X_source, y_source) = self.data_specs self.data_specs = (space, (X_source, "condition"))
def test_basic(): rng = numpy.random.RandomState(0) X = abs(rng.randn(50, 70)) Y = global_contrast_normalize(X) numpy.testing.assert_allclose((Y ** 2).sum(axis=1), 1) numpy.testing.assert_allclose(Y.mean(axis=1), 0, atol=1e-10)
def test_std_norm(): rng = numpy.random.RandomState(0) X = abs(rng.randn(50, 70)) Y = global_contrast_normalize(X, use_std=True, scale=5) numpy.testing.assert_allclose(Y.std(axis=1, ddof=1), 5)
print batch1_data.shape print batch1_labels.shape print batch2_data.shape print batch2_labels.shape print batch3_data.shape print batch3_labels.shape image = paramgraphics.mat_to_img(batch1_data[:100,:].T, dim_input, colorImg=colorImg, scale=True) image.save(saveDir+'svhn_train.png', 'PNG') image = paramgraphics.mat_to_img(batch2_data[:100,:].T, dim_input, colorImg=colorImg, scale=True) image.save(saveDir+'svhn_valid.png', 'PNG') image = paramgraphics.mat_to_img(batch3_data[:100,:].T, dim_input, colorImg=colorImg, scale=True) image.save(saveDir+'svhn_test.png', 'PNG') if preprocessing == 'gcn_var': batch1_data = pypp.global_contrast_normalize(batch1_data, subtract_mean=True, use_std=True) batch2_data = pypp.global_contrast_normalize(batch2_data, subtract_mean=True, use_std=True) batch3_data = pypp.global_contrast_normalize(batch3_data, subtract_mean=True, use_std=True) elif preprocessing == 'gcn_norm': batch1_data = pypp.global_contrast_normalize(batch1_data, subtract_mean=True) batch2_data = pypp.global_contrast_normalize(batch2_data, subtract_mean=True) batch3_data = pypp.global_contrast_normalize(batch3_data, subtract_mean=True) print batch1_data.shape print batch1_data.max() print batch1_data.min() print batch2_data.shape print batch2_data.max() print batch2_data.min() print batch3_data.shape print batch3_data.max() print batch3_data.min()
def test_scale(): rng = numpy.random.RandomState(0) X = abs(rng.randn(50, 70)) Y = global_contrast_normalize(X, scale=5) numpy.testing.assert_allclose(numpy.sqrt((Y ** 2).sum(axis=1)), 5) numpy.testing.assert_allclose(Y.mean(axis=1), 0, atol=1e-10)
def __init__(self, which_set, center=False, rescale=False, gcn=None, one_hot=None, start=None, stop=None, axes=('b', 0, 1, 'c'), toronto_prepro=False, preprocessor=None, two_image=False): # note: there is no such thing as the cifar10 validation set; # pylearn1 defined one but really it should be user-configurable # (as it is here) self.axes = axes # we define here: dtype = 'uint8' ntrain = 50000 nvalid = 0 # artefact, we won't use it ntest = 10000 # we also expose the following details: self.img_shape = (3, 32, 32) self.img_shape2 = (32, 32, 3) self.img_size = N.prod(self.img_shape) self.n_classes = 10 self.label_names = [ 'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck' ] # prepare loading fnames = ['data_batch_%i' % i for i in range(1, 6)] lenx = N.ceil((ntrain + nvalid) / 10000.) * 10000 x = N.zeros((lenx, self.img_size), dtype=dtype) y = N.zeros((lenx, 1), dtype=dtype) # load train data nloaded = 0 for i, fname in enumerate(fnames): data = CIFAR10._unpickle(fname) x[i * 10000:(i + 1) * 10000, :] = data['data'] y[i * 10000:(i + 1) * 10000, 0] = data['labels'] nloaded += 10000 if nloaded >= ntrain + nvalid + ntest: break # load test data data = CIFAR10._unpickle('test_batch') # 2value image # can not use other option when you use two_image option print x.shape if two_image: from PIL import Image two_value_x = [] self.img_shape = (1, 32, 32) self.img_shape2 = (32, 32, 1) for i, pixel in enumerate(x.reshape(50000, 3, 32, 32)): if i % 1000 == 0: print i pixel = np.transpose(pixel, (1, 2, 0)) test_img = Image.new("RGB", (32, 32), (255, 0, 0)) test_img.putdata( [tuple(x.tolist()) for x in pixel.reshape(1024, 3)]) two_value_x.append( [x for x in test_img.convert("1").getdata()]) x = np.asarray(two_value_x) # process this data Xs = {'train': x[0:ntrain], 'test': data['data'][0:ntest]} Ys = {'train': y[0:ntrain], 'test': data['labels'][0:ntest]} X = N.cast['float32'](Xs[which_set]) y = Ys[which_set] if isinstance(y, list): y = np.asarray(y).astype(dtype) if which_set == 'test': assert y.shape[0] == 10000 y = y.reshape((y.shape[0], 1)) max_labels = 10 if one_hot is not None: warnings.warn( "the `one_hot` parameter is deprecated. To get " "one-hot encoded targets, request that they " "live in `VectorSpace` through the `data_specs` " "parameter of MNIST's iterator method. " "`one_hot` will be removed on or after " "September 20, 2014.", stacklevel=2) if center: X -= 127.5 self.center = center if rescale: X /= 127.5 self.rescale = rescale if toronto_prepro: assert not center assert not gcn X = X / 255. if which_set == 'test': other = CIFAR10(which_set='train') oX = other.X oX /= 255. X = X - oX.mean(axis=0) else: X = X - X.mean(axis=0) self.toronto_prepro = toronto_prepro self.gcn = gcn if gcn is not None: gcn = float(gcn) X = global_contrast_normalize(X, scale=gcn) if start is not None: # This needs to come after the prepro so that it doesn't # change the pixel means computed above for toronto_prepro assert start >= 0 assert stop > start assert stop <= X.shape[0] X = X[start:stop, :] y = y[start:stop, :] assert X.shape[0] == y.shape[0] if which_set == 'test': assert X.shape[0] == 10000 # view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3), # axes) view_converter = dense_design_matrix.DefaultViewConverter( self.img_shape2, axes) super(CIFAR10, self).__init__(X=X, y=y, view_converter=view_converter, y_labels=self.n_classes) assert not contains_nan(self.X) if preprocessor: preprocessor.apply(self)
def __init__(self, which_set, center=False, rescale=False, gcn=None, one_hot=False, start=None, stop=None, axes=('b', 0, 1, 'c'), toronto_prepro=False, preprocessor=None): # note: there is no such thing as the cifar10 validation set; # pylearn1 defined one but really it should be user-configurable # (as it is here) self.axes = axes # we define here: dtype = 'uint8' ntrain = 50000 nvalid = 0 # artefact, we won't use it ntest = 10000 # we also expose the following details: self.img_shape = (3, 32, 32) self.img_size = np.prod(self.img_shape) self.n_classes = 10 self.label_names = [ 'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck' ] # # prepare loading # fnames = ['data_batch_%i' % i for i in range(1,6)] # lenx = np.ceil((ntrain + nvalid) / 10000.)*10000 # x = np.zeros((lenx,self.img_size), dtype=dtype) # y = np.zeros(lenx, dtype=dtype) # # # load train data # nloaded = 0 # for i, fname in enumerate(fnames): # data = CIFAR10._unpickle(fname) # x[i*10000:(i+1)*10000, :] = data['data'] # y[i*10000:(i+1)*10000] = data['labels'] # nloaded += 10000 # if nloaded >= ntrain + nvalid + ntest: break; # # # load test data # data = CIFAR10._unpickle('test_batch') # # # process this data # Xs = { # 'train' : x[0:ntrain], # 'test' : data['data'][0:ntest] # } # # Ys = { # 'train' : y[0:ntrain], # 'test' : data['labels'][0:ntest] # } if which_set == 'train': # pkl = self._unpickle(os.environ['PYLEARN2_DATA_PATH']+ # 'cifar10/pylearn2_gcn_whitened/train.pkl') #pkl = self._unpickle(os.environ['PYLEARN2_DATA_PATH']+ # 'cifar10/pylearn2_gcn_whitened/test.pkl') #X = pkl.X #y = pkl.y X = np.load(os.environ['PYLEARN2_DATA_PATH'] + '/cifar10/train_X.npy') y = np.load(os.environ['PYLEARN2_DATA_PATH'] + '/cifar10/train_y.npy') X = np.cast['float32'](X) y = np.cast['float32'](y) elif which_set == 'test': # pkl = self._unpickle(os.environ['PYLEARN2_DATA_PATH']+ # 'cifar10/pylearn2_gcn_whitened/test.pkl') # X = pkl.X # y = pkl.y X = np.load(os.environ['PYLEARN2_DATA_PATH'] + '/cifar10/test_X.npy') y = np.load(os.environ['PYLEARN2_DATA_PATH'] + '/cifar10/test_y.npy') X = np.cast['float32'](X) y = np.cast['float32'](y) # X = np.cast['float32'](Xs[which_set]) # y = Ys[which_set] if which_set == 'test': assert X.shape[0] == 10000 if isinstance(y, list): y = np.asarray(y) if center: X -= 127.5 self.center = center if rescale: X /= 127.5 self.rescale = rescale if toronto_prepro: assert not center assert not gcn X = X / 255. if which_set == 'test': other = CIFAR10(which_set='train') oX = other.X oX /= 255. X = X - oX.mean(axis=0) else: X = X - X.mean(axis=0) self.toronto_prepro = toronto_prepro self.gcn = gcn if gcn is not None: gcn = float(gcn) X = global_contrast_normalize(X, scale=gcn) if start is not None: # This needs to come after the prepro so that it doesn't change the pixel # means computed above for toronto_prepro assert start >= 0 assert stop > start assert stop <= X.shape[0] X = X[start:stop, :] y = y[start:stop] assert X.shape[0] == y.shape[0] view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3), axes) if which_set == 'train': length = X.shape[0] def search_right_label(desired_label, i): for idx in xrange(i, length): if y[idx] == desired_label: return idx def swap_ele(index, i): x_tmp = X[i] X[i] = X[index] X[index] = x_tmp y_tmp = y[i] y[i] = y[index] y[index] = y_tmp desired_label = 0 for i in xrange(length): desired_label = i % 10 if y[i] != desired_label: index = search_right_label(desired_label, i) swap_ele(index, i) for i in xrange(length - 100, length): print y[i] self.one_hot = one_hot if one_hot: one_hot = np.zeros((y.shape[0], 10), dtype='float32') for i in xrange(y.shape[0]): one_hot[i, y[i]] = 1. y = one_hot super(My_CIFAR10, self).__init__(X=X, y=y, view_converter=view_converter) assert not np.any(np.isnan(self.X)) if preprocessor: preprocessor.apply(self)
def __init__(self, which_set, center=False, rescale=False, gcn=None, start=None, stop=None, axes=('b', 0, 1, 'c'), toronto_prepro = False, preprocessor = None): # note: there is no such thing as the cifar10 validation set; # pylearn1 defined one but really it should be user-configurable # (as it is here) self.axes = axes # we define here: dtype = 'uint8' ntrain = 50000 nvalid = 0 # artefact, we won't use it ntest = 10000 # we also expose the following details: self.img_shape = (3, 32, 32) self.img_size = numpy.prod(self.img_shape) self.n_classes = 10 self.label_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] # prepare loading fnames = ['data_batch_%i' % i for i in range(1, 6)] datasets = {} datapath = os.path.join( string_utils.preprocess('${PYLEARN2_DATA_PATH}'), 'cifar10', 'cifar-10-batches-py') for name in fnames + ['test_batch']: fname = os.path.join(datapath, name) if not os.path.exists(fname): raise IOError(fname + " was not found. You probably need to " "download the CIFAR-10 dataset by using the " "download script in " "pylearn2/scripts/datasets/download_cifar10.sh " "or manually from " "http://www.cs.utoronto.ca/~kriz/cifar.html") datasets[name] = cache.datasetCache.cache_file(fname) lenx = numpy.ceil((ntrain + nvalid) / 10000.) * 10000 x = numpy.zeros((lenx, self.img_size), dtype=dtype) y = numpy.zeros((lenx, 1), dtype=dtype) # load train data nloaded = 0 for i, fname in enumerate(fnames): _logger.info('loading file %s' % datasets[fname]) data = serial.load(datasets[fname]) x[i * 10000:(i + 1) * 10000, :] = data['data'] y[i * 10000:(i + 1) * 10000, 0] = data['labels'] nloaded += 10000 if nloaded >= ntrain + nvalid + ntest: break # load test data _logger.info('loading file %s' % datasets['test_batch']) data = serial.load(datasets['test_batch']) # process this data Xs = {'train': x[0:ntrain], 'test': data['data'][0:ntest]} Ys = {'train': y[0:ntrain], 'test': data['labels'][0:ntest]} X = numpy.cast['float32'](Xs[which_set]) y = Ys[which_set] if isinstance(y, list): y = numpy.asarray(y).astype(dtype) if which_set == 'test': assert y.shape[0] == 10000 y = y.reshape((y.shape[0], 1)) if center: X -= 127.5 self.center = center if rescale: X /= 127.5 self.rescale = rescale if toronto_prepro: assert not center assert not gcn X = X / 255. if which_set == 'test': other = CIFAR10(which_set='train') oX = other.X oX /= 255. X = X - oX.mean(axis=0) else: X = X - X.mean(axis=0) self.toronto_prepro = toronto_prepro self.gcn = gcn if gcn is not None: gcn = float(gcn) X = global_contrast_normalize(X, scale=gcn) if start is not None: # This needs to come after the prepro so that it doesn't # change the pixel means computed above for toronto_prepro assert start >= 0 assert stop > start assert stop <= X.shape[0] X = X[start:stop, :] y = y[start:stop, :] assert X.shape[0] == y.shape[0] if which_set == 'test': assert X.shape[0] == 10000 view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3), axes) super(CIFAR10, self).__init__(X=X, y=y, view_converter=view_converter, y_labels=self.n_classes) assert not contains_nan(self.X) if preprocessor: preprocessor.apply(self)
def __init__(self, which_set, center=False, rescale=False, gcn=None, start=None, stop=None, axes=('b', 0, 1, 'c'), toronto_prepro = False, preprocessor = None): # note: there is no such thing as the cifar10 validation set; # pylearn1 defined one but really it should be user-configurable # (as it is here) self.axes = axes # we define here: dtype = 'uint8' ntrain = 50000 nvalid = 0 # artefact, we won't use it ntest = 10000 # we also expose the following details: self.img_shape = (3, 32, 32) self.img_size = numpy.prod(self.img_shape) self.n_classes = 100 # make sure that this is working (we can also copy it from meta file) self.label_names = range(1900,2000) import cPickle fo = open('datasets/data_batch') dict = cPickle.load(fo) fo.close() lenx = numpy.ceil((ntrain + nvalid) / 10000.) * 10000 x = numpy.zeros((lenx, self.img_size), dtype=dtype) y = numpy.zeros((lenx, 1), dtype=dtype) # load train data #data = serial.load(datasets[fname]) x[0:8305,:] = dict['data'] #x[i * 10000:(i + 1) * 10000, :] = dict['data'] #y[i * 10000:(i + 1) * 10000, 0] = dict['labels'] #X = dict['data'] y[0:8305,0] = dict['labels'] # load test data #_logger.info('loading file %s' % datasets['test_batch']) #data = serial.load(datasets['test_batch']) # process this data #Xs = {'train': x[0:ntrain], # 'test': data['data'][0:ntest]} #Ys = {'train': y[0:ntrain], # 'test': data['labels'][0:ntest]} X = numpy.cast['float32'](x[0:8305]) y = y[0:8305] # y = Ys[which_set] if isinstance(y, list): y = numpy.asarray(y).astype(dtype) self.center = center self.rescale = rescale self.toronto_prepro = toronto_prepro self.gcn = gcn if gcn is not None: gcn = float(gcn) X = global_contrast_normalize(X, scale=gcn) view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3), axes) super(Timeliner, self).__init__(X=X, y=y, view_converter=view_converter, y_labels=self.n_classes) assert not contains_nan(self.X) if preprocessor: preprocessor.apply(self)
if __name__ == "__main__": #Load dataset train_x, test_x, train_y, test_y = unpack_facedataset( ) # 7:3 ratio for train:test # preprocess images # GCN and ZCA object!! # Normalize and then ZCA whitening # Normalized data only used on inversion, not in training try: zca = load("faces/zca.data") except Exception as e: print("Failed to load preprocessed data from disk, computing zca") train_x_normalized = global_contrast_normalize(train_x * 255, scale=55.) zca = ZCA() zca.fit(train_x_normalized) save("faces/zca.data", zca) x = tf.compat.v1.placeholder(tf.float32, shape=[None, 112 * 92]) y_ = tf.compat.v1.placeholder(tf.float32, shape=[None, 40]) model = Model(x, y_) session = tf.compat.v1.InteractiveSession() session.run(tf.compat.v1.global_variables_initializer()) #print(f"test {test_y.shape} t: {type(test_y)} ; train {train_y.shape} t: {type(train_y)}") model.train(train_x, train_y, session, test_x, test_y, 250) perform_inversion(zca, test_x[0::3], model, session) # perform_inversion(train_x, test_x[0::3], model, session)
def __init__(self, lfw_path, filelist_path, embedding_file=None, center=False, scale=False, start=None, stop=None, gcn=None, shuffle=False, rng=None, seed=132987, axes=('b', 0, 1, 'c'), img_shape=(3, 250, 250)): self.axes = axes self.img_shape = img_shape C, H, W = img_shape self.img_size = np.prod(self.img_shape) files = [] with open(filelist_path, 'r') as filelist_f: files = [line.strip() for line in filelist_f] # Load raw pixel integer values dtype = 'uint8' X = np.zeros((len(files), W, H, C), dtype=dtype) img_ids = [] for i, line in enumerate(files): if '\t' in line: # New format: contains image IDs img_path, img_id = line.strip().split() img_ids.append(int(img_id)) else: img_path = line.strip() full_path = os.path.join(lfw_path, img_path) im = image.load(full_path, rescale_image=False, dtype=dtype) # Handle grayscale images which may not have RGB channels if len(im.shape) == 2: W, H = im.shape # Repeat image 3 times across axis 2 im = im.reshape(W, H, 1).repeat(3, 2) # Swap color channel to front X[i] = im # Cast to float32, center / scale if necessary X = np.cast['float32'](X) # Create dense design matrix from topological view X = X.reshape(X.shape[0], -1) # Prepare img_ids if embedding_file is not None: if len(img_ids) != len(files): raise ValueError("You must provide a filelist with indexes " "into the embedding array for each image.") img_ids = np.array(img_ids, dtype='uint32') if center and scale: X[:] -= 127.5 X[:] /= 127.5 elif center: X[:] -= 127.5 elif scale: X[:] /= 255. self.gcn = gcn if gcn is not None: gcn = float(gcn) X = global_contrast_normalize(X, scale=gcn) if shuffle: rng = make_np_rng(rng, seed, which_method='permutation') rand_idx = rng.permutation(len(X)) X = X[rand_idx] img_ids = img_ids[rand_idx] if start is not None: assert start >= 0 assert stop > start assert stop <= X.shape[0] X = X[start:stop] if len(img_ids) > 0: img_ids = img_ids[start:stop] # Load embeddings if provided Y = None if embedding_file is not None: embeddings = np.load(embedding_file)['arr_0'] assert embeddings.shape[0] >= len(files) Y = embeddings[img_ids].astype(theano.config.floatX) # create view converting for retrieving topological view self.view_converter = dense_design_matrix.DefaultViewConverter( (W, H, C), axes) # init super class super(LFW, self).__init__(X=X, y=Y) assert not contains_nan(self.X) # Another hack: rename 'targets' to match model expectations if embedding_file is not None: space, (X_source, y_source) = self.data_specs self.data_specs = (space, (X_source, 'condition'))
for i in xrange(0, data_size, batch_size): stop = i + numpy.mod(data_size, batch_size) if i >= last else i + batch_size data[i:stop, :,:,0] = lecun_lcn(data[i:stop,:,:,0].astype('float32'), img_shape, kernel_size) return data preprocess = True if preprocess: print "Pre-processing the data" features = [] labels = [] for item, y in zip(data_x, data_y): data_shape = item.shape item = item / 255. item = global_contrast_normalize(item.reshape((data_shape[0], 48*48))).reshape(data_shape) item = apply_lcn(item, img_shape = [48,48], kernel_size=5) features.append(item.astype('float32')) labels.append(y) print "Done pre-preprocessing" data = {'data_x' : features, 'data_y' : labels, 'clip_ids' : clip_ids} #save_path = "/data/lisa/data/faces/EmotiW/preproc/samira/KGL-AFEW/" save_path = "/data/lisa/data/faces/EmotiW/preproc/samira/KGLIS-AFEWIS/" serial.save(save_path + 'afew2_valid_prep.pkl', data) #serial.save(save_path + 'afew2_train_prep.pkl', data)
scale=True) image.save(saveDir + 'svhn_train.png', 'PNG') image = paramgraphics.mat_to_img(batch2_data[:100, :].T, dim_input, colorImg=colorImg, scale=True) image.save(saveDir + 'svhn_valid.png', 'PNG') image = paramgraphics.mat_to_img(batch3_data[:100, :].T, dim_input, colorImg=colorImg, scale=True) image.save(saveDir + 'svhn_test.png', 'PNG') if preprocessing == 'gcn_var': batch1_data = pypp.global_contrast_normalize(batch1_data, subtract_mean=True, use_std=True) batch2_data = pypp.global_contrast_normalize(batch2_data, subtract_mean=True, use_std=True) batch3_data = pypp.global_contrast_normalize(batch3_data, subtract_mean=True, use_std=True) elif preprocessing == 'gcn_norm': batch1_data = pypp.global_contrast_normalize(batch1_data, subtract_mean=True) batch2_data = pypp.global_contrast_normalize(batch2_data, subtract_mean=True) batch3_data = pypp.global_contrast_normalize(batch3_data, subtract_mean=True) print batch1_data.shape
def __init__(self, which_set, data_path=None, center=True, rescale=True, gcn=True, specs=True, foldi=1, foldn=10, filestr="feature2086-5-{}.pkl"): self.class_name = ['neg', 'cin1','cin2','cin3','cancer'] # load data self.specs = specs self.filestr = filestr if which_set == 'valid': i = (foldi) % foldn filepath = self.filestr.format(str(i + 1)) filepath = self.dirpath + filepath filepath = serial.preprocess(filepath) X, Y = self.loadi(filepath) elif which_set == 'test': i = (foldi - 1) % foldn filepath = self.filestr.format(str(i + 1)) filepath = self.dirpath + filepath filepath = serial.preprocess(filepath) X, Y = self.loadi(filepath) else: indexs = range(foldn) i = foldi % foldn indexs.pop(i) if i == 0: indexs.pop(-1) else: i = (foldi - 1) % foldn indexs.pop(i) Xs = [] Ys = [] for i in indexs: filepath = self.filestr.format(str(i + 1)) filepath = self.dirpath + filepath filepath = serial.preprocess(filepath) X, Y = self.loadi(filepath) Xs.append(X) Ys.append(Y) X = np.vstack(Xs) Y = np.hstack(Ys) print X.shape, Y.shape # col0s = np.where(Y == 0)[0] # print len(col0s) X.astype(float) axis = 0 _max = np.max(X, axis=axis) _min = np.min(X, axis=axis) _mean = np.mean(X, axis=axis) _std = np.std(X, axis=axis) _scale = _max - _min if gcn: X[:, :850] = global_contrast_normalize(X[:, :850], scale=gcn) X[:, 850:850+556] = global_contrast_normalize(X[:, 850:850 + 556], scale=gcn) X[:, 850+556:] = global_contrast_normalize(X[:, 850+556:], scale=gcn) # else: # if center: # X[:, ] -= _mean # if rescale: # X[:, ] /= _scale # topo_view = X.reshape(X.shape[0], X.shape[1], 1, 1) # y = np.reshape(Y, (Y.shape[0], 1)) # y = np.atleast_2d(Y).T self.raw_X = X self.raw_y = Y y = np.zeros((Y.shape[0], 5)) for i in range(Y.shape[0]): j = Y[i] y[i, j] = 1 # print y[:, :] # y[:, 0] = Y # y[:, 1] = 1 - Y print "Load CIN_FEATURE2086_5 data: {}, with size X:{}, y:{}".format(data_path, X.shape, y.shape) super(CIN_FEATURE2086_5, self).__init__(X=X, y=y) # super(CIN_FEATURE2, self).__init__(topo_view=topo_view, y=y, y_labels=2) if specs: assert X.shape[1] == (850 + 556 + 680) self.init_data_specs() self.feature850 = X[:, 0:850] self.feature556 = X[:, 850:850 + 556] self.feature680 = X[:, 850 + 556:] self.y = y
def __init__( self, which_set, center=False, rescale=False, gcn=None, one_hot=False, start=None, stop=None, axes=("b", 0, 1, "c"), toronto_prepro=False, preprocessor=None, ): # note: there is no such thing as the cifar10 validation set; # pylearn1 defined one but really it should be user-configurable # (as it is here) self.axes = axes # we define here: dtype = "uint8" ntrain = 50000 nvalid = 0 # artefact, we won't use it ntest = 10000 # we also expose the following details: self.img_shape = (3, 32, 32) self.img_size = np.prod(self.img_shape) self.n_classes = 10 self.label_names = ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"] # # prepare loading # fnames = ['data_batch_%i' % i for i in range(1,6)] # lenx = np.ceil((ntrain + nvalid) / 10000.)*10000 # x = np.zeros((lenx,self.img_size), dtype=dtype) # y = np.zeros(lenx, dtype=dtype) # # # load train data # nloaded = 0 # for i, fname in enumerate(fnames): # data = CIFAR10._unpickle(fname) # x[i*10000:(i+1)*10000, :] = data['data'] # y[i*10000:(i+1)*10000] = data['labels'] # nloaded += 10000 # if nloaded >= ntrain + nvalid + ntest: break; # # # load test data # data = CIFAR10._unpickle('test_batch') # # # process this data # Xs = { # 'train' : x[0:ntrain], # 'test' : data['data'][0:ntest] # } # # Ys = { # 'train' : y[0:ntrain], # 'test' : data['labels'][0:ntest] # } if which_set == "train": # pkl = self._unpickle(os.environ['PYLEARN2_DATA_PATH']+ # 'cifar10/pylearn2_gcn_whitened/train.pkl') # pkl = self._unpickle(os.environ['PYLEARN2_DATA_PATH']+ # 'cifar10/pylearn2_gcn_whitened/test.pkl') # X = pkl.X # y = pkl.y X = np.load(os.environ["PYLEARN2_DATA_PATH"] + "/cifar10/train_X.npy") y = np.load(os.environ["PYLEARN2_DATA_PATH"] + "/cifar10/train_y.npy") X = np.cast["float32"](X) y = np.cast["float32"](y) elif which_set == "test": # pkl = self._unpickle(os.environ['PYLEARN2_DATA_PATH']+ # 'cifar10/pylearn2_gcn_whitened/test.pkl') # X = pkl.X # y = pkl.y X = np.load(os.environ["PYLEARN2_DATA_PATH"] + "/cifar10/test_X.npy") y = np.load(os.environ["PYLEARN2_DATA_PATH"] + "/cifar10/test_y.npy") X = np.cast["float32"](X) y = np.cast["float32"](y) # X = np.cast['float32'](Xs[which_set]) # y = Ys[which_set] if which_set == "test": assert X.shape[0] == 10000 if isinstance(y, list): y = np.asarray(y) if center: X -= 127.5 self.center = center if rescale: X /= 127.5 self.rescale = rescale if toronto_prepro: assert not center assert not gcn X = X / 255.0 if which_set == "test": other = CIFAR10(which_set="train") oX = other.X oX /= 255.0 X = X - oX.mean(axis=0) else: X = X - X.mean(axis=0) self.toronto_prepro = toronto_prepro self.gcn = gcn if gcn is not None: gcn = float(gcn) X = global_contrast_normalize(X, scale=gcn) if start is not None: # This needs to come after the prepro so that it doesn't change the pixel # means computed above for toronto_prepro assert start >= 0 assert stop > start assert stop <= X.shape[0] X = X[start:stop, :] y = y[start:stop] assert X.shape[0] == y.shape[0] view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3), axes) if which_set == "train": length = X.shape[0] def search_right_label(desired_label, i): for idx in xrange(i, length): if y[idx] == desired_label: return idx def swap_ele(index, i): x_tmp = X[i] X[i] = X[index] X[index] = x_tmp y_tmp = y[i] y[i] = y[index] y[index] = y_tmp desired_label = 0 for i in xrange(length): desired_label = i % 10 if y[i] != desired_label: index = search_right_label(desired_label, i) swap_ele(index, i) for i in xrange(length - 100, length): print y[i] self.one_hot = one_hot if one_hot: one_hot = np.zeros((y.shape[0], 10), dtype="float32") for i in xrange(y.shape[0]): one_hot[i, y[i]] = 1.0 y = one_hot super(My_CIFAR10, self).__init__(X=X, y=y, view_converter=view_converter) assert not np.any(np.isnan(self.X)) if preprocessor: preprocessor.apply(self)
def __init__(self, which_set, center=False, rescale=False, gcn=None, one_hot=None, start=None, stop=None, axes=('b', 0, 1, 'c'), toronto_prepro = False, preprocessor = None, two_image=False): # note: there is no such thing as the cifar10 validation set; # pylearn1 defined one but really it should be user-configurable # (as it is here) self.axes = axes # we define here: dtype = 'uint8' ntrain = 50000 nvalid = 0 # artefact, we won't use it ntest = 10000 # we also expose the following details: self.img_shape = (3, 32, 32) self.img_shape2 = (32, 32,3) self.img_size = N.prod(self.img_shape) self.n_classes = 10 self.label_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] # prepare loading fnames = ['data_batch_%i' % i for i in range(1, 6)] lenx = N.ceil((ntrain + nvalid) / 10000.)*10000 x = N.zeros((lenx, self.img_size), dtype=dtype) y = N.zeros((lenx, 1), dtype=dtype) # load train data nloaded = 0 for i, fname in enumerate(fnames): data = CIFAR10._unpickle(fname) x[i*10000:(i+1)*10000, :] = data['data'] y[i*10000:(i+1)*10000, 0] = data['labels'] nloaded += 10000 if nloaded >= ntrain + nvalid + ntest: break # load test data data = CIFAR10._unpickle('test_batch') # 2value image # can not use other option when you use two_image option print x.shape if two_image: from PIL import Image two_value_x = [] self.img_shape = (1, 32, 32) self.img_shape2 = (32, 32,1) for i,pixel in enumerate(x.reshape(50000, 3, 32, 32)): if i % 1000 == 0: print i pixel = np.transpose(pixel, (1,2,0)) test_img = Image.new("RGB",(32,32),(255,0,0)) test_img.putdata([tuple(x.tolist()) for x in pixel.reshape(1024,3)]) two_value_x.append([x for x in test_img.convert("1").getdata()] ) x = np.asarray(two_value_x) # process this data Xs = {'train': x[0:ntrain], 'test': data['data'][0:ntest]} Ys = {'train': y[0:ntrain], 'test': data['labels'][0:ntest]} X = N.cast['float32'](Xs[which_set]) y = Ys[which_set] if isinstance(y, list): y = np.asarray(y).astype(dtype) if which_set == 'test': assert y.shape[0] == 10000 y = y.reshape((y.shape[0], 1)) max_labels = 10 if one_hot is not None: warnings.warn("the `one_hot` parameter is deprecated. To get " "one-hot encoded targets, request that they " "live in `VectorSpace` through the `data_specs` " "parameter of MNIST's iterator method. " "`one_hot` will be removed on or after " "September 20, 2014.", stacklevel=2) if center: X -= 127.5 self.center = center if rescale: X /= 127.5 self.rescale = rescale if toronto_prepro: assert not center assert not gcn X = X / 255. if which_set == 'test': other = CIFAR10(which_set='train') oX = other.X oX /= 255. X = X - oX.mean(axis=0) else: X = X - X.mean(axis=0) self.toronto_prepro = toronto_prepro self.gcn = gcn if gcn is not None: gcn = float(gcn) X = global_contrast_normalize(X, scale=gcn) if start is not None: # This needs to come after the prepro so that it doesn't # change the pixel means computed above for toronto_prepro assert start >= 0 assert stop > start assert stop <= X.shape[0] X = X[start:stop, :] y = y[start:stop, :] assert X.shape[0] == y.shape[0] if which_set == 'test': assert X.shape[0] == 10000 # view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3), # axes) view_converter = dense_design_matrix.DefaultViewConverter(self.img_shape2, axes) super(CIFAR10, self).__init__(X=X, y=y, view_converter=view_converter, y_labels=self.n_classes) assert not contains_nan(self.X) if preprocessor: preprocessor.apply(self)
def __init__( self, which_set, center=False, rescale=False, gcn=None, one_hot=False, start=None, stop=None, axes=("b", 0, 1, "c"), toronto_prepro=False, preprocessor=None, ): """ Parameters ---------- which_set : str One of 'train', 'test' gcn : float, optional Multiplicative constant to use for global contrast normalization. No global contrast normalization is applied, if None .. todo:: WRITEME """ # note: there is no such thing as the cifar10 validation set; # pylearn1 defined one but really it should be user-configurable # (as it is here) self.axes = axes # we define here: dtype = "uint8" ntrain = 50000 nvalid = 0 # artefact, we won't use it ntest = 10000 # we also expose the following details: self.img_shape = (3, 32, 32) self.img_size = N.prod(self.img_shape) self.n_classes = 10 self.label_names = ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"] # prepare loading fnames = ["data_batch_%i" % i for i in range(1, 6)] lenx = N.ceil((ntrain + nvalid) / 10000.0) * 10000 x = N.zeros((lenx, self.img_size), dtype=dtype) y = N.zeros(lenx, dtype=dtype) # load train data nloaded = 0 for i, fname in enumerate(fnames): data = CIFAR10._unpickle(fname) x[i * 10000 : (i + 1) * 10000, :] = data["data"] y[i * 10000 : (i + 1) * 10000] = data["labels"] nloaded += 10000 if nloaded >= ntrain + nvalid + ntest: break # load test data data = CIFAR10._unpickle("test_batch") # process this data Xs = {"train": x[0:ntrain], "test": data["data"][0:ntest]} Ys = {"train": y[0:ntrain], "test": data["labels"][0:ntest]} X = N.cast["float32"](Xs[which_set]) y = Ys[which_set] if isinstance(y, list): y = np.asarray(y) if which_set == "test": assert y.shape[0] == 10000 self.one_hot = one_hot if one_hot: one_hot = np.zeros((y.shape[0], 10), dtype="float32") for i in xrange(y.shape[0]): one_hot[i, y[i]] = 1.0 y = one_hot if center: X -= 127.5 self.center = center if rescale: X /= 127.5 self.rescale = rescale if toronto_prepro: assert not center assert not gcn X = X / 255.0 if which_set == "test": other = CIFAR10(which_set="train") oX = other.X oX /= 255.0 X = X - oX.mean(axis=0) else: X = X - X.mean(axis=0) self.toronto_prepro = toronto_prepro self.gcn = gcn if gcn is not None: gcn = float(gcn) X = global_contrast_normalize(X, scale=gcn) if start is not None: # This needs to come after the prepro so that it doesn't change the pixel # means computed above for toronto_prepro assert start >= 0 assert stop > start assert stop <= X.shape[0] X = X[start:stop, :] y = y[start:stop] assert X.shape[0] == y.shape[0] if which_set == "test": assert X.shape[0] == 10000 view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3), axes) super(CIFAR10, self).__init__(X=X, y=y, view_converter=view_converter) assert not np.any(np.isnan(self.X)) if preprocessor: preprocessor.apply(self)
def __init__(self, which_set, center = False, rescale = False, gcn = None, one_hot = False, start = None, stop = None, axes=('b', 0, 1, 'c'), toronto_prepro = False, preprocessor = None): # note: there is no such thing as the cifar10 validation set; # pylearn1 defined one but really it should be user-configurable # (as it is here) self.axes = axes # we define here: dtype = 'uint8' ntrain = 50000 nvalid = 0 # artefact, we won't use it ntest = 10000 # we also expose the following details: self.img_shape = (3,32,32) self.img_size = N.prod(self.img_shape) self.n_classes = 10 self.label_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog','horse','ship','truck'] # prepare loading fnames = ['data_batch_%i' % i for i in range(1,6)] lenx = N.ceil((ntrain + nvalid) / 10000.)*10000 x = N.zeros((lenx,self.img_size), dtype=dtype) y = N.zeros(lenx, dtype=dtype) # load train data nloaded = 0 for i, fname in enumerate(fnames): data = CIFAR10._unpickle(fname) x[i*10000:(i+1)*10000, :] = data['data'] y[i*10000:(i+1)*10000] = data['labels'] nloaded += 10000 if nloaded >= ntrain + nvalid + ntest: break; # load test data data = CIFAR10._unpickle('test_batch') # process this data Xs = { 'train' : x[0:ntrain], 'test' : data['data'][0:ntest] } Ys = { 'train' : y[0:ntrain], 'test' : data['labels'][0:ntest] } X = N.cast['float32'](Xs[which_set]) y = Ys[which_set] if isinstance(y,list): y = np.asarray(y) if which_set == 'test': assert y.shape[0] == 10000 self.one_hot = one_hot if one_hot: one_hot = np.zeros((y.shape[0],10),dtype='float32') for i in xrange(y.shape[0]): one_hot[i,y[i]] = 1. y = one_hot if center: X -= 127.5 self.center = center if rescale: X /= 127.5 self.rescale = rescale if toronto_prepro: assert not center assert not gcn X = X / 255. if which_set == 'test': other = CIFAR10(which_set='train') oX = other.X oX /= 255. X = X - oX.mean(axis=0) else: X = X - X.mean(axis=0) self.toronto_prepro = toronto_prepro self.gcn = gcn if gcn is not None: gcn = float(gcn) X = global_contrast_normalize(X, scale=gcn) if start is not None: # This needs to come after the prepro so that it doesn't change the pixel # means computed above for toronto_prepro assert start >= 0 assert stop > start assert stop <= X.shape[0] X = X[start:stop, :] y = y[start:stop] assert X.shape[0] == y.shape[0] if which_set == 'test': assert X.shape[0] == 10000 view_converter = dense_design_matrix.DefaultViewConverter((32,32,3), axes) super(CIFAR10, self).__init__(X=X, y=y, view_converter=view_converter) assert not np.any(np.isnan(self.X)) if preprocessor: preprocessor.apply(self)
batch_size) if i >= last else i + batch_size data[i:stop, :, :, 0] = lecun_lcn(data[i:stop, :, :, 0].astype('float32'), img_shape, kernel_size) return data preprocess = True if preprocess: print "Pre-processing the data" features = [] labels = [] for item, y in zip(data_x, data_y): data_shape = item.shape item = item / 255. item = global_contrast_normalize(item.reshape( (data_shape[0], 48 * 48))).reshape(data_shape) item = apply_lcn(item, img_shape=[48, 48], kernel_size=5) features.append(item.astype('float32')) labels.append(y) print "Done pre-preprocessing" data = {'data_x': features, 'data_y': labels, 'clip_ids': clip_ids} #save_path = "/data/lisa/data/faces/EmotiW/preproc/samira/KGL-AFEW/" save_path = "/data/lisa/data/faces/EmotiW/preproc/samira/KGLIS-AFEWIS/" serial.save(save_path + 'afew2_valid_prep.pkl', data) #serial.save(save_path + 'afew2_train_prep.pkl', data)