Beispiel #1
0
    def test_download_and_convert(self, size=16):
        tempdir = self.tempdir

        with remember_cwd():
            os.chdir(tempdir)

            assert_raises(MissingInputFiles,
                          caltech101_silhouettes.convert_silhouettes,
                          size=16, directory=tempdir,
                          output_directory=tempdir)
            assert_raises(ValueError, silhouettes_downloader,
                          size=10, directory=tempdir)

            silhouettes_downloader(size=size, directory=tempdir)

            assert_raises(ValueError,
                          caltech101_silhouettes.convert_silhouettes,
                          size=10, directory=tempdir,
                          output_directory=tempdir)

            caltech101_silhouettes.convert_silhouettes(
                size=size, directory=tempdir, output_directory=tempdir)

        output_file = "caltech101_silhouettes{}.hdf5".format(size)
        output_file = os.path.join(tempdir, output_file)

        with h5py.File(output_file, 'r') as h5:
            assert h5['features'].shape == (8641, 1, size, size)
            assert h5['targets'].shape == (8641, 1)
Beispiel #2
0
 def setUp(self):
     numpy.random.seed(9 + 5 + 2015)
     self.train_features_mock = [
         numpy.random.randint(0, 256, (10, 3, 32, 32)).astype('uint8')
         for i in range(5)]
     self.train_targets_mock = [
         numpy.random.randint(0, 10, (10,)).astype('uint8')
         for i in range(5)]
     self.test_features_mock = numpy.random.randint(
         0, 256, (10, 3, 32, 32)).astype('uint8')
     self.test_targets_mock = numpy.random.randint(
         0, 10, (10,)).astype('uint8')
     self.tempdir = tempfile.mkdtemp()
     with remember_cwd():
         os.chdir(self.tempdir)
         os.mkdir('cifar-10-batches-py')
         for i, (x, y) in enumerate(zip(self.train_features_mock,
                                        self.train_targets_mock)):
             filename = os.path.join(
                 'cifar-10-batches-py', 'data_batch_{}'.format(i + 1))
             with open(filename, 'wb') as f:
                 cPickle.dump({'data': x, 'labels': y}, f)
         filename = os.path.join('cifar-10-batches-py', 'test_batch')
         with open(filename, 'wb') as f:
             cPickle.dump({'data': self.test_features_mock,
                           'labels': self.test_targets_mock},
                          f)
         with tarfile.open('cifar-10-python.tar.gz', 'w:gz') as tar_file:
             tar_file.add('cifar-10-batches-py')
Beispiel #3
0
 def setUp(self):
     numpy.random.seed(9 + 5 + 2015)
     self.train_features_mock = numpy.random.randint(
         0, 256, (10, 3, 32, 32)).astype('uint8')
     self.train_fine_labels_mock = numpy.random.randint(
         0, 100, (10,)).astype('uint8')
     self.train_coarse_labels_mock = numpy.random.randint(
         0, 20, (10,)).astype('uint8')
     self.test_features_mock = numpy.random.randint(
         0, 256, (10, 3, 32, 32)).astype('uint8')
     self.test_fine_labels_mock = numpy.random.randint(
         0, 100, (10,)).astype('uint8')
     self.test_coarse_labels_mock = numpy.random.randint(
         0, 20, (10,)).astype('uint8')
     self.tempdir = tempfile.mkdtemp()
     with remember_cwd():
         os.chdir(self.tempdir)
         os.mkdir('cifar-100-python')
         filename = os.path.join('cifar-100-python', 'train')
         with open(filename, 'wb') as f:
             cPickle.dump({
                 'data': self.train_features_mock.reshape((10, -1)),
                 'fine_labels': self.train_fine_labels_mock,
                 'coarse_labels': self.train_coarse_labels_mock}, f)
         filename = os.path.join('cifar-100-python', 'test')
         with open(filename, 'wb') as f:
             cPickle.dump({
                 'data': self.test_features_mock.reshape((10, -1)),
                 'fine_labels': self.test_fine_labels_mock,
                 'coarse_labels': self.test_coarse_labels_mock}, f)
         with tarfile.open('cifar-100-python.tar.gz', 'w:gz') as tar_file:
             tar_file.add('cifar-100-python')
Beispiel #4
0
    def test_convert(self):
        tempdir = self.tempdir

        with remember_cwd():
            os.chdir(tempdir)
            assert_raises(IOError,
                          adult.convert_adult,
                          directory=tempdir,
                          output_directory=tempdir)

            default_downloader(
                directory=tempdir,
                urls=['https://archive.ics.uci.edu/ml/'
                      'machine-learning-databases/adult/adult.data',
                      'https://archive.ics.uci.edu/ml/'
                      'machine-learning-databases/adult/adult.test'],
                filenames=['adult.data', 'adult.test'])

            adult.convert_adult(directory=tempdir,
                                output_directory=tempdir)

        output_file = "adult.hdf5"
        output_file = os.path.join(tempdir, output_file)

        with h5py.File(output_file, 'r') as h5:
            assert h5['features'].shape == (30162 + 15060, 104)
            assert h5['targets'].shape[0] == h5['features'].shape[0]
Beispiel #5
0
    def test_download_and_convert(self, size=16):
        tempdir = self.tempdir

        with remember_cwd():
            os.chdir(tempdir)

            assert_raises(MissingInputFiles,
                          caltech101_silhouettes.convert_silhouettes,
                          size=16,
                          directory=tempdir,
                          output_directory=tempdir)
            assert_raises(ValueError,
                          silhouettes_downloader,
                          size=10,
                          directory=tempdir)

            silhouettes_downloader(size=size, directory=tempdir)

            assert_raises(ValueError,
                          caltech101_silhouettes.convert_silhouettes,
                          size=10,
                          directory=tempdir,
                          output_directory=tempdir)

            caltech101_silhouettes.convert_silhouettes(
                size=size, directory=tempdir, output_directory=tempdir)

        output_file = "caltech101_silhouettes{}.hdf5".format(size)
        output_file = os.path.join(tempdir, output_file)

        with h5py.File(output_file, 'r') as h5:
            assert h5['features'].shape == (8641, 1, size, size)
            assert h5['targets'].shape == (8641, 1)
Beispiel #6
0
 def setUp(self):
     numpy.random.seed(9 + 5 + 2015)
     self.train_features_mock = [
         numpy.random.randint(0, 256, (10, 3, 32, 32)).astype('uint8')
         for i in range(5)
     ]
     self.train_targets_mock = [
         numpy.random.randint(0, 10, (10, )).astype('uint8')
         for i in range(5)
     ]
     self.test_features_mock = numpy.random.randint(
         0, 256, (10, 3, 32, 32)).astype('uint8')
     self.test_targets_mock = numpy.random.randint(0, 10,
                                                   (10, )).astype('uint8')
     self.tempdir = tempfile.mkdtemp()
     with remember_cwd():
         os.chdir(self.tempdir)
         os.mkdir('cifar-10-batches-py')
         for i, (x, y) in enumerate(
                 zip(self.train_features_mock, self.train_targets_mock)):
             filename = os.path.join('cifar-10-batches-py',
                                     'data_batch_{}'.format(i + 1))
             with open(filename, 'wb') as f:
                 cPickle.dump({'data': x, 'labels': y}, f)
         filename = os.path.join('cifar-10-batches-py', 'test_batch')
         with open(filename, 'wb') as f:
             cPickle.dump(
                 {
                     'data': self.test_features_mock,
                     'labels': self.test_targets_mock
                 }, f)
         with tarfile.open('cifar-10-python.tar.gz', 'w:gz') as tar_file:
             tar_file.add('cifar-10-batches-py')
Beispiel #7
0
    def test_convert(self):
        tempdir = self.tempdir

        with remember_cwd():
            os.chdir(tempdir)
            assert_raises(IOError,
                          adult.convert_adult,
                          directory=tempdir,
                          output_directory=tempdir)

            default_downloader(
                directory=tempdir,
                urls=[
                    'https://archive.ics.uci.edu/ml/'
                    'machine-learning-databases/adult/adult.data',
                    'https://archive.ics.uci.edu/ml/'
                    'machine-learning-databases/adult/adult.test'
                ],
                filenames=['adult.data', 'adult.test'])

            adult.convert_adult(directory=tempdir, output_directory=tempdir)

        output_file = "adult.hdf5"
        output_file = os.path.join(tempdir, output_file)

        with h5py.File(output_file, 'r') as h5:
            assert h5['features'].shape == (30162 + 15060, 104)
            assert h5['targets'].shape[0] == h5['features'].shape[0]
Beispiel #8
0
 def setUp(self):
     numpy.random.seed(21 + 1 + 2016)
     self.tempdir = tempfile.mkdtemp()
     with remember_cwd():
         os.chdir(self.tempdir)
         self.images = numpy.random.randint(
             0, 256, (10, 218, 178, 3)).astype('uint8')
         with zipfile.ZipFile('img_align_celeba.zip', 'w') as image_file:
             for i, image in enumerate(self.images):
                 Image.fromarray(image).save('img.jpeg')
                 image_file.write(
                     'img.jpeg',
                     'img_align_celeba/{:06d}.jpg'.format(i + 1))
         with open('list_attr_celeba.txt', 'w') as attr_file:
             attr_file.write('mock\nmock')
             for i in range(1, 11):
                 attr_file.write('\n{:06d}.jpg'.format(i) + (' 1' * 40))
Beispiel #9
0
 def setUp(self):
     numpy.random.seed(21 + 1 + 2016)
     self.tempdir = tempfile.mkdtemp()
     with remember_cwd():
         os.chdir(self.tempdir)
         self.images = numpy.random.randint(
             0, 256, (10, 218, 178, 3)).astype('uint8')
         with zipfile.ZipFile('img_align_celeba.zip', 'w') as image_file:
             for i, image in enumerate(self.images):
                 Image.fromarray(image).save('img.jpeg')
                 image_file.write(
                     'img.jpeg',
                     'img_align_celeba/{:06d}.jpg'.format(i + 1))
         with open('list_attr_celeba.txt', 'w') as attr_file:
             attr_file.write('mock\nmock')
             for i in range(1, 11):
                 attr_file.write('\n{:06d}.jpg'.format(i) + (' 1' * 40))
Beispiel #10
0
    def test_download_and_convert(self):
        tempdir = self.tempdir

        with remember_cwd():
            os.chdir(tempdir)

            assert_raises(IOError,
                          iris.convert_iris,
                          directory=tempdir,
                          output_directory=tempdir)

            default_downloader(
                directory=tempdir,
                urls=['https://archive.ics.uci.edu/ml/'
                      'machine-learning-databases/iris/iris.data'],
                filenames=['iris.data'])

            classes = {
                b'Iris-setosa': 0,
                b'Iris-versicolor': 1,
                b'Iris-virginica': 2}
            data = numpy.loadtxt(
                os.path.join(tempdir, 'iris.data'),
                converters={4: lambda x: classes[x]},
                delimiter=',')
            features = data[:, :-1].astype('float32')
            targets = data[:, -1].astype('uint8').reshape((-1, 1))

            iris.convert_iris(directory=tempdir,
                              output_directory=tempdir)

        output_file = "iris.hdf5"
        output_file = os.path.join(tempdir, output_file)
        with h5py.File(output_file, 'r') as h5:
            assert numpy.allclose(h5['features'], features)
            assert numpy.allclose(h5['targets'], targets)
Beispiel #11
0
    def test_download_and_convert(self):
        tempdir = self.tempdir

        with remember_cwd():
            os.chdir(tempdir)

            assert_raises(IOError,
                          iris.convert_iris,
                          directory=tempdir,
                          output_directory=tempdir)

            default_downloader(directory=tempdir,
                               urls=[
                                   'https://archive.ics.uci.edu/ml/'
                                   'machine-learning-databases/iris/iris.data'
                               ],
                               filenames=['iris.data'])

            classes = {
                b'Iris-setosa': 0,
                b'Iris-versicolor': 1,
                b'Iris-virginica': 2
            }
            data = numpy.loadtxt(os.path.join(tempdir, 'iris.data'),
                                 converters={4: lambda x: classes[x]},
                                 delimiter=',')
            features = data[:, :-1].astype('float32')
            targets = data[:, -1].astype('uint8').reshape((-1, 1))

            iris.convert_iris(directory=tempdir, output_directory=tempdir)

        output_file = "iris.hdf5"
        output_file = os.path.join(tempdir, output_file)
        with h5py.File(output_file, 'r') as h5:
            assert numpy.allclose(h5['features'], features)
            assert numpy.allclose(h5['targets'], targets)
Beispiel #12
0
 def setUp(self):
     numpy.random.seed(9 + 5 + 2015)
     self.train_features_mock = numpy.random.randint(
         0, 256, (10, 3, 32, 32)).astype('uint8')
     self.train_fine_labels_mock = numpy.random.randint(
         0, 100, (10, )).astype('uint8')
     self.train_coarse_labels_mock = numpy.random.randint(
         0, 20, (10, )).astype('uint8')
     self.test_features_mock = numpy.random.randint(
         0, 256, (10, 3, 32, 32)).astype('uint8')
     self.test_fine_labels_mock = numpy.random.randint(
         0, 100, (10, )).astype('uint8')
     self.test_coarse_labels_mock = numpy.random.randint(
         0, 20, (10, )).astype('uint8')
     self.tempdir = tempfile.mkdtemp()
     with remember_cwd():
         os.chdir(self.tempdir)
         os.mkdir('cifar-100-python')
         filename = os.path.join('cifar-100-python', 'train')
         with open(filename, 'wb') as f:
             cPickle.dump(
                 {
                     'data': self.train_features_mock.reshape((10, -1)),
                     'fine_labels': self.train_fine_labels_mock,
                     'coarse_labels': self.train_coarse_labels_mock
                 }, f)
         filename = os.path.join('cifar-100-python', 'test')
         with open(filename, 'wb') as f:
             cPickle.dump(
                 {
                     'data': self.test_features_mock.reshape((10, -1)),
                     'fine_labels': self.test_fine_labels_mock,
                     'coarse_labels': self.test_coarse_labels_mock
                 }, f)
         with tarfile.open('cifar-100-python.tar.gz', 'w:gz') as tar_file:
             tar_file.add('cifar-100-python')
Beispiel #13
0
    def setUp(self):
        numpy.random.seed(9 + 5 + 2015)

        self.tempdir = tempfile.mkdtemp()
        with remember_cwd():
            os.chdir(self.tempdir)

            self.f1_mock = {}

            def make_mock_format_1(split):
                self.f1_mock[split] = {}
                self.f1_mock[split]['image'] = [
                    numpy.random.randint(0, 256, (6, 6, 3)).astype('uint8'),
                    numpy.random.randint(0, 256, (5, 5, 3)).astype('uint8')]
                other_sources = ('label', 'height', 'width', 'left', 'top')
                for source in other_sources:
                    self.f1_mock[split][source] = [
                        numpy.random.randint(0, 4, (2,)).astype('uint8'),
                        # This ensures that label '10' is converted to '1'.
                        10 * numpy.ones((1,)).astype('uint8')]

                with tarfile.open('{}.tar.gz'.format(split),
                                  'w:gz') as tar_file:
                    os.mkdir(split)
                    for i, image in enumerate(self.f1_mock[split]['image']):
                        Image.fromarray(image).save(
                            os.path.join(split, '{}.png'.format(i + 1)))
                    struct_path = os.path.join(split, 'digitStruct.mat')
                    with h5py.File(struct_path, 'w') as f:
                        for source in other_sources:
                            suffixes = []
                            for i in range(2):
                                suffix = 'i1{}{}'.format(source, i)
                                suffixes.append([suffix.encode('utf8')])
                                name = 'digitStruct/{}'.format(suffix)
                                f[name] = [[self.f1_mock[split][source][0][i]]]
                            name = 'digitStruct/image_1/{}'.format(source)
                            f[name] = suffixes
                            name = 'digitStruct/image_2/{}'.format(source)
                            f[name] = [[self.f1_mock[split][source][1][0]]]
                        ref_dtype = h5py.special_dtype(ref=h5py.Reference)
                        bbox = f.create_dataset(
                            'digitStruct/bbox', (2, 1), dtype=ref_dtype)
                        bbox[...] = [[f['digitStruct/image_1'].ref],
                                     [f['digitStruct/image_2'].ref]]
                    tar_file.add(split)

            for split in ('train', 'test', 'extra'):
                make_mock_format_1(split)

            self.f2_train_features_mock = numpy.random.randint(
                0, 256, (32, 32, 3, 10)).astype('uint8')
            self.f2_train_targets_mock = numpy.random.randint(
                1, 11, (10, 1)).astype('uint8')
            self.f2_test_features_mock = numpy.random.randint(
                0, 256, (32, 32, 3, 10)).astype('uint8')
            self.f2_test_targets_mock = numpy.random.randint(
                1, 11, (10, 1)).astype('uint8')
            self.f2_extra_features_mock = numpy.random.randint(
                0, 256, (32, 32, 3, 10)).astype('uint8')
            self.f2_extra_targets_mock = numpy.random.randint(
                1, 11, (10, 1)).astype('uint8')
            savemat('train_32x32.mat', {'X': self.f2_train_features_mock,
                                        'y': self.f2_train_targets_mock})
            savemat('test_32x32.mat', {'X': self.f2_test_features_mock,
                                       'y': self.f2_test_targets_mock})
            savemat('extra_32x32.mat', {'X': self.f2_extra_features_mock,
                                        'y': self.f2_extra_targets_mock})
Beispiel #14
0
    def setUp(self):
        numpy.random.seed(9 + 5 + 2015)

        self.tempdir = tempfile.mkdtemp()
        with remember_cwd():
            os.chdir(self.tempdir)

            self.f1_mock = {}

            def make_mock_format_1(split):
                self.f1_mock[split] = {}
                self.f1_mock[split]['image'] = [
                    numpy.random.randint(0, 256, (6, 6, 3)).astype('uint8'),
                    numpy.random.randint(0, 256, (5, 5, 3)).astype('uint8')
                ]
                other_sources = ('label', 'height', 'width', 'left', 'top')
                for source in other_sources:
                    self.f1_mock[split][source] = [
                        numpy.random.randint(0, 4, (2, )).astype('uint8'),
                        # This ensures that label '10' is converted to '1'.
                        10 * numpy.ones((1, )).astype('uint8')
                    ]

                with tarfile.open('{}.tar.gz'.format(split),
                                  'w:gz') as tar_file:
                    os.mkdir(split)
                    for i, image in enumerate(self.f1_mock[split]['image']):
                        Image.fromarray(image).save(
                            os.path.join(split, '{}.png'.format(i + 1)))
                    struct_path = os.path.join(split, 'digitStruct.mat')
                    with h5py.File(struct_path, 'w') as f:
                        for source in other_sources:
                            suffixes = []
                            for i in range(2):
                                suffix = 'i1{}{}'.format(source, i)
                                suffixes.append([suffix.encode('utf8')])
                                name = 'digitStruct/{}'.format(suffix)
                                f[name] = [[self.f1_mock[split][source][0][i]]]
                            name = 'digitStruct/image_1/{}'.format(source)
                            f[name] = suffixes
                            name = 'digitStruct/image_2/{}'.format(source)
                            f[name] = [[self.f1_mock[split][source][1][0]]]
                        ref_dtype = h5py.special_dtype(ref=h5py.Reference)
                        bbox = f.create_dataset('digitStruct/bbox', (2, 1),
                                                dtype=ref_dtype)
                        bbox[...] = [[f['digitStruct/image_1'].ref],
                                     [f['digitStruct/image_2'].ref]]
                    tar_file.add(split)

            for split in ('train', 'test', 'extra'):
                make_mock_format_1(split)

            self.f2_train_features_mock = numpy.random.randint(
                0, 256, (32, 32, 3, 10)).astype('uint8')
            self.f2_train_targets_mock = numpy.random.randint(
                1, 11, (10, 1)).astype('uint8')
            self.f2_test_features_mock = numpy.random.randint(
                0, 256, (32, 32, 3, 10)).astype('uint8')
            self.f2_test_targets_mock = numpy.random.randint(
                1, 11, (10, 1)).astype('uint8')
            self.f2_extra_features_mock = numpy.random.randint(
                0, 256, (32, 32, 3, 10)).astype('uint8')
            self.f2_extra_targets_mock = numpy.random.randint(
                1, 11, (10, 1)).astype('uint8')
            savemat('train_32x32.mat', {
                'X': self.f2_train_features_mock,
                'y': self.f2_train_targets_mock
            })
            savemat('test_32x32.mat', {
                'X': self.f2_test_features_mock,
                'y': self.f2_test_targets_mock
            })
            savemat('extra_32x32.mat', {
                'X': self.f2_extra_features_mock,
                'y': self.f2_extra_targets_mock
            })