def get_data(args): if args.which_set in ('train', 'test'): dataset = norb.SmallNORB(args.which_set, True) else: with open(args.which_set) as norb_file: dataset = pickle.load(norb_file) if len(dataset.y.shape) < 2 or dataset.y.shape[1] == 1: print("This viewer does not support NORB datasets that " "only have classification labels.") sys.exit(1) if args.zca is not None: with open(args.zca) as zca_file: zca = pickle.load(zca_file) dataset.X = zca.inverse(dataset.X) num_examples = dataset.X.shape[0] topo_shape = ((num_examples, ) + tuple(dataset.view_converter.shape)) assert topo_shape[-1] == 1 topo_shape = topo_shape[:-1] values = dataset.X.reshape(topo_shape) labels = numpy.array(dataset.y, 'int') return values, labels, dataset.which_set
def get_data(which_set): dataset = norb.SmallNORB(which_set, True) num_examples = dataset.get_data()[0].shape[0] iterator = dataset.iterator(mode='sequential', batch_size=num_examples, topo=True, targets=True) values, labels = iterator.next() return values, numpy.array(labels, 'int')
def _download(normalize=True): """ Download the NORB dataset if it is not present. :return: The train, test and validation set. """ def load_data(data_file): # set temp environ data path for pylearn2. os.environ['PYLEARN2_DATA_PATH'] = env_paths.get_data_path("norb") data_dir = os.path.join(os.environ['PYLEARN2_DATA_PATH'], 'norb_small', 'original') if not os.path.exists(data_dir): os.makedirs(data_dir) dataset = os.path.join(data_dir, data_file) if (not os.path.isfile(dataset)): import urllib origin = ( os.path.join('http://www.cs.nyu.edu/~ylclab/data/norb-v1.0-small/', data_file) ) logger.info('Downloading data from %s', origin) urllib.urlretrieve(origin, dataset) return dataset def unzip(path): with gzip.open(path, 'rb') as infile: with open(path.replace('.gz', ''), 'w') as outfile: for line in infile: outfile.write(line) def norm(x): orig_shape = (96, 96) new_shape = (32, 32) x = x.reshape((-1, 2, 96 * 96)) def reshape_digits(x, shape): def rebin(_a, shape): img = imresize(_a, shape, interp='nearest') return img.reshape(-1) nrows = x.shape[0] ncols = shape[0] * shape[1] result = np.zeros((nrows, x.shape[1], ncols)) for i in range(nrows): result[i, 0, :] = rebin(x[i, 0, :].reshape(orig_shape), shape).reshape((1, ncols)) result[i, 1, :] = rebin(x[i, 1, :].reshape(orig_shape), shape).reshape((1, ncols)) return result x = reshape_digits(x, new_shape) x = x.reshape((-1, 2 * np.prod(new_shape))) x += np.random.uniform(0, 1, size=x.shape).astype('float32') # Add uniform noise x /= 256. x -= x.mean(axis=0) x = np.asarray(x, dtype='float32') return x unzip(load_data("smallnorb-5x46789x9x18x6x2x96x96-training-dat.mat.gz")) unzip(load_data("smallnorb-5x46789x9x18x6x2x96x96-training-cat.mat.gz")) train_norb = norb.SmallNORB('train') train_x = train_norb.X train_t = train_norb.y unzip(load_data("smallnorb-5x01235x9x18x6x2x96x96-testing-dat.mat.gz")) unzip(load_data("smallnorb-5x01235x9x18x6x2x96x96-testing-cat.mat.gz")) test_norb = norb.SmallNORB('test') test_x = test_norb.X test_t = test_norb.y if normalize: test_x = norm(test_x) train_x = norm(train_x) # Dummy validation set. NOTE: still in training set. idx = np.random.randint(0, train_x.shape[0] - 1, 5000) valid_x = train_x[idx, :] valid_t = train_t[idx] return (train_x, train_t), (test_x, test_t), (valid_x, valid_t)