Python SmallNORB Examples

Programming Language: Python

Namespace/Package Name: pylearn2.datasets.norb

Method/Function: SmallNORB

Examples at hotexamples.com: 3

Python SmallNORB - 3 examples found. These are the top rated real world Python examples of pylearn2.datasets.norb.SmallNORB extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: browse_small_norb.py Project: yo-ga/TextDetector

    def get_data(args):
        if args.which_set in ('train', 'test'):
            dataset = norb.SmallNORB(args.which_set, True)
        else:
            with open(args.which_set) as norb_file:
                dataset = pickle.load(norb_file)
                if len(dataset.y.shape) < 2 or dataset.y.shape[1] == 1:
                    print("This viewer does not support NORB datasets that "
                          "only have classification labels.")
                    sys.exit(1)

            if args.zca is not None:
                with open(args.zca) as zca_file:
                    zca = pickle.load(zca_file)
                    dataset.X = zca.inverse(dataset.X)

        num_examples = dataset.X.shape[0]

        topo_shape = ((num_examples, ) +
                      tuple(dataset.view_converter.shape))
        assert topo_shape[-1] == 1
        topo_shape = topo_shape[:-1]
        values = dataset.X.reshape(topo_shape)
        labels = numpy.array(dataset.y, 'int')
        return values, labels, dataset.which_set

Example #2

Show file

 def get_data(which_set):
     dataset = norb.SmallNORB(which_set, True)
     num_examples = dataset.get_data()[0].shape[0]
     iterator = dataset.iterator(mode='sequential',
                                 batch_size=num_examples,
                                 topo=True,
                                 targets=True)
     values, labels = iterator.next()
     return values, numpy.array(labels, 'int')

Example #3

Show file

def _download(normalize=True):
    """
    Download the NORB dataset if it is not present.
    :return: The train, test and validation set.
    """

    def load_data(data_file):
        # set temp environ data path for pylearn2.
        os.environ['PYLEARN2_DATA_PATH'] = env_paths.get_data_path("norb")

        data_dir = os.path.join(os.environ['PYLEARN2_DATA_PATH'], 'norb_small', 'original')
        if not os.path.exists(data_dir):
            os.makedirs(data_dir)
        dataset = os.path.join(data_dir, data_file)

        if (not os.path.isfile(dataset)):
            import urllib
            origin = (
                os.path.join('http://www.cs.nyu.edu/~ylclab/data/norb-v1.0-small/', data_file)
            )
            logger.info('Downloading data from %s', origin)

            urllib.urlretrieve(origin, dataset)
        return dataset

    def unzip(path):
        with gzip.open(path, 'rb') as infile:
            with open(path.replace('.gz', ''), 'w') as outfile:
                for line in infile:
                    outfile.write(line)

    def norm(x):
        orig_shape = (96, 96)
        new_shape = (32, 32)
        x = x.reshape((-1, 2, 96 * 96))

        def reshape_digits(x, shape):
            def rebin(_a, shape):
                img = imresize(_a, shape, interp='nearest')
                return img.reshape(-1)

            nrows = x.shape[0]
            ncols = shape[0] * shape[1]
            result = np.zeros((nrows, x.shape[1], ncols))
            for i in range(nrows):
                result[i, 0, :] = rebin(x[i, 0, :].reshape(orig_shape), shape).reshape((1, ncols))
                result[i, 1, :] = rebin(x[i, 1, :].reshape(orig_shape), shape).reshape((1, ncols))
            return result

        x = reshape_digits(x, new_shape)
        x = x.reshape((-1, 2 * np.prod(new_shape)))
        x += np.random.uniform(0, 1, size=x.shape).astype('float32')  # Add uniform noise
        x /= 256.
        x -= x.mean(axis=0)

        x = np.asarray(x, dtype='float32')
        return x

    unzip(load_data("smallnorb-5x46789x9x18x6x2x96x96-training-dat.mat.gz"))
    unzip(load_data("smallnorb-5x46789x9x18x6x2x96x96-training-cat.mat.gz"))

    train_norb = norb.SmallNORB('train')
    train_x = train_norb.X
    train_t = train_norb.y

    unzip(load_data("smallnorb-5x01235x9x18x6x2x96x96-testing-dat.mat.gz"))
    unzip(load_data("smallnorb-5x01235x9x18x6x2x96x96-testing-cat.mat.gz"))

    test_norb = norb.SmallNORB('test')
    test_x = test_norb.X
    test_t = test_norb.y

    if normalize:
        test_x = norm(test_x)
        train_x = norm(train_x)

    # Dummy validation set. NOTE: still in training set.
    idx = np.random.randint(0, train_x.shape[0] - 1, 5000)
    valid_x = train_x[idx, :]
    valid_t = train_t[idx]

    return (train_x, train_t), (test_x, test_t), (valid_x, valid_t)