Esempio n. 1
0
    def get_data(args):
        if args.which_set in ('train', 'test'):
            dataset = norb.SmallNORB(args.which_set, True)
        else:
            with open(args.which_set) as norb_file:
                dataset = pickle.load(norb_file)
                if len(dataset.y.shape) < 2 or dataset.y.shape[1] == 1:
                    print("This viewer does not support NORB datasets that "
                          "only have classification labels.")
                    sys.exit(1)

            if args.zca is not None:
                with open(args.zca) as zca_file:
                    zca = pickle.load(zca_file)
                    dataset.X = zca.inverse(dataset.X)

        num_examples = dataset.X.shape[0]

        topo_shape = ((num_examples, ) +
                      tuple(dataset.view_converter.shape))
        assert topo_shape[-1] == 1
        topo_shape = topo_shape[:-1]
        values = dataset.X.reshape(topo_shape)
        labels = numpy.array(dataset.y, 'int')
        return values, labels, dataset.which_set
Esempio n. 2
0
 def get_data(which_set):
     dataset = norb.SmallNORB(which_set, True)
     num_examples = dataset.get_data()[0].shape[0]
     iterator = dataset.iterator(mode='sequential',
                                 batch_size=num_examples,
                                 topo=True,
                                 targets=True)
     values, labels = iterator.next()
     return values, numpy.array(labels, 'int')
Esempio n. 3
0
def _download(normalize=True):
    """
    Download the NORB dataset if it is not present.
    :return: The train, test and validation set.
    """

    def load_data(data_file):
        # set temp environ data path for pylearn2.
        os.environ['PYLEARN2_DATA_PATH'] = env_paths.get_data_path("norb")

        data_dir = os.path.join(os.environ['PYLEARN2_DATA_PATH'], 'norb_small', 'original')
        if not os.path.exists(data_dir):
            os.makedirs(data_dir)
        dataset = os.path.join(data_dir, data_file)

        if (not os.path.isfile(dataset)):
            import urllib
            origin = (
                os.path.join('http://www.cs.nyu.edu/~ylclab/data/norb-v1.0-small/', data_file)
            )
            logger.info('Downloading data from %s', origin)

            urllib.urlretrieve(origin, dataset)
        return dataset

    def unzip(path):
        with gzip.open(path, 'rb') as infile:
            with open(path.replace('.gz', ''), 'w') as outfile:
                for line in infile:
                    outfile.write(line)

    def norm(x):
        orig_shape = (96, 96)
        new_shape = (32, 32)
        x = x.reshape((-1, 2, 96 * 96))

        def reshape_digits(x, shape):
            def rebin(_a, shape):
                img = imresize(_a, shape, interp='nearest')
                return img.reshape(-1)

            nrows = x.shape[0]
            ncols = shape[0] * shape[1]
            result = np.zeros((nrows, x.shape[1], ncols))
            for i in range(nrows):
                result[i, 0, :] = rebin(x[i, 0, :].reshape(orig_shape), shape).reshape((1, ncols))
                result[i, 1, :] = rebin(x[i, 1, :].reshape(orig_shape), shape).reshape((1, ncols))
            return result

        x = reshape_digits(x, new_shape)
        x = x.reshape((-1, 2 * np.prod(new_shape)))
        x += np.random.uniform(0, 1, size=x.shape).astype('float32')  # Add uniform noise
        x /= 256.
        x -= x.mean(axis=0)

        x = np.asarray(x, dtype='float32')
        return x

    unzip(load_data("smallnorb-5x46789x9x18x6x2x96x96-training-dat.mat.gz"))
    unzip(load_data("smallnorb-5x46789x9x18x6x2x96x96-training-cat.mat.gz"))

    train_norb = norb.SmallNORB('train')
    train_x = train_norb.X
    train_t = train_norb.y

    unzip(load_data("smallnorb-5x01235x9x18x6x2x96x96-testing-dat.mat.gz"))
    unzip(load_data("smallnorb-5x01235x9x18x6x2x96x96-testing-cat.mat.gz"))

    test_norb = norb.SmallNORB('test')
    test_x = test_norb.X
    test_t = test_norb.y

    if normalize:
        test_x = norm(test_x)
        train_x = norm(train_x)

    # Dummy validation set. NOTE: still in training set.
    idx = np.random.randint(0, train_x.shape[0] - 1, 5000)
    valid_x = train_x[idx, :]
    valid_t = train_t[idx]

    return (train_x, train_t), (test_x, test_t), (valid_x, valid_t)