def test_download_and_convert(self, size=16): tempdir = self.tempdir with remember_cwd(): os.chdir(tempdir) assert_raises(MissingInputFiles, caltech101_silhouettes.convert_silhouettes, size=16, directory=tempdir, output_directory=tempdir) assert_raises(ValueError, silhouettes_downloader, size=10, directory=tempdir) silhouettes_downloader(size=size, directory=tempdir) assert_raises(ValueError, caltech101_silhouettes.convert_silhouettes, size=10, directory=tempdir, output_directory=tempdir) caltech101_silhouettes.convert_silhouettes( size=size, directory=tempdir, output_directory=tempdir) output_file = "caltech101_silhouettes{}.hdf5".format(size) output_file = os.path.join(tempdir, output_file) with h5py.File(output_file, 'r') as h5: assert h5['features'].shape == (8641, 1, size, size) assert h5['targets'].shape == (8641, 1)
def setUp(self): numpy.random.seed(9 + 5 + 2015) self.train_features_mock = [ numpy.random.randint(0, 256, (10, 3, 32, 32)).astype('uint8') for i in range(5)] self.train_targets_mock = [ numpy.random.randint(0, 10, (10,)).astype('uint8') for i in range(5)] self.test_features_mock = numpy.random.randint( 0, 256, (10, 3, 32, 32)).astype('uint8') self.test_targets_mock = numpy.random.randint( 0, 10, (10,)).astype('uint8') self.tempdir = tempfile.mkdtemp() with remember_cwd(): os.chdir(self.tempdir) os.mkdir('cifar-10-batches-py') for i, (x, y) in enumerate(zip(self.train_features_mock, self.train_targets_mock)): filename = os.path.join( 'cifar-10-batches-py', 'data_batch_{}'.format(i + 1)) with open(filename, 'wb') as f: cPickle.dump({'data': x, 'labels': y}, f) filename = os.path.join('cifar-10-batches-py', 'test_batch') with open(filename, 'wb') as f: cPickle.dump({'data': self.test_features_mock, 'labels': self.test_targets_mock}, f) with tarfile.open('cifar-10-python.tar.gz', 'w:gz') as tar_file: tar_file.add('cifar-10-batches-py')
def setUp(self): numpy.random.seed(9 + 5 + 2015) self.train_features_mock = numpy.random.randint( 0, 256, (10, 3, 32, 32)).astype('uint8') self.train_fine_labels_mock = numpy.random.randint( 0, 100, (10,)).astype('uint8') self.train_coarse_labels_mock = numpy.random.randint( 0, 20, (10,)).astype('uint8') self.test_features_mock = numpy.random.randint( 0, 256, (10, 3, 32, 32)).astype('uint8') self.test_fine_labels_mock = numpy.random.randint( 0, 100, (10,)).astype('uint8') self.test_coarse_labels_mock = numpy.random.randint( 0, 20, (10,)).astype('uint8') self.tempdir = tempfile.mkdtemp() with remember_cwd(): os.chdir(self.tempdir) os.mkdir('cifar-100-python') filename = os.path.join('cifar-100-python', 'train') with open(filename, 'wb') as f: cPickle.dump({ 'data': self.train_features_mock.reshape((10, -1)), 'fine_labels': self.train_fine_labels_mock, 'coarse_labels': self.train_coarse_labels_mock}, f) filename = os.path.join('cifar-100-python', 'test') with open(filename, 'wb') as f: cPickle.dump({ 'data': self.test_features_mock.reshape((10, -1)), 'fine_labels': self.test_fine_labels_mock, 'coarse_labels': self.test_coarse_labels_mock}, f) with tarfile.open('cifar-100-python.tar.gz', 'w:gz') as tar_file: tar_file.add('cifar-100-python')
def test_convert(self): tempdir = self.tempdir with remember_cwd(): os.chdir(tempdir) assert_raises(IOError, adult.convert_adult, directory=tempdir, output_directory=tempdir) default_downloader( directory=tempdir, urls=['https://archive.ics.uci.edu/ml/' 'machine-learning-databases/adult/adult.data', 'https://archive.ics.uci.edu/ml/' 'machine-learning-databases/adult/adult.test'], filenames=['adult.data', 'adult.test']) adult.convert_adult(directory=tempdir, output_directory=tempdir) output_file = "adult.hdf5" output_file = os.path.join(tempdir, output_file) with h5py.File(output_file, 'r') as h5: assert h5['features'].shape == (30162 + 15060, 104) assert h5['targets'].shape[0] == h5['features'].shape[0]
def setUp(self): numpy.random.seed(9 + 5 + 2015) self.train_features_mock = [ numpy.random.randint(0, 256, (10, 3, 32, 32)).astype('uint8') for i in range(5) ] self.train_targets_mock = [ numpy.random.randint(0, 10, (10, )).astype('uint8') for i in range(5) ] self.test_features_mock = numpy.random.randint( 0, 256, (10, 3, 32, 32)).astype('uint8') self.test_targets_mock = numpy.random.randint(0, 10, (10, )).astype('uint8') self.tempdir = tempfile.mkdtemp() with remember_cwd(): os.chdir(self.tempdir) os.mkdir('cifar-10-batches-py') for i, (x, y) in enumerate( zip(self.train_features_mock, self.train_targets_mock)): filename = os.path.join('cifar-10-batches-py', 'data_batch_{}'.format(i + 1)) with open(filename, 'wb') as f: cPickle.dump({'data': x, 'labels': y}, f) filename = os.path.join('cifar-10-batches-py', 'test_batch') with open(filename, 'wb') as f: cPickle.dump( { 'data': self.test_features_mock, 'labels': self.test_targets_mock }, f) with tarfile.open('cifar-10-python.tar.gz', 'w:gz') as tar_file: tar_file.add('cifar-10-batches-py')
def test_convert(self): tempdir = self.tempdir with remember_cwd(): os.chdir(tempdir) assert_raises(IOError, adult.convert_adult, directory=tempdir, output_directory=tempdir) default_downloader( directory=tempdir, urls=[ 'https://archive.ics.uci.edu/ml/' 'machine-learning-databases/adult/adult.data', 'https://archive.ics.uci.edu/ml/' 'machine-learning-databases/adult/adult.test' ], filenames=['adult.data', 'adult.test']) adult.convert_adult(directory=tempdir, output_directory=tempdir) output_file = "adult.hdf5" output_file = os.path.join(tempdir, output_file) with h5py.File(output_file, 'r') as h5: assert h5['features'].shape == (30162 + 15060, 104) assert h5['targets'].shape[0] == h5['features'].shape[0]
def setUp(self): numpy.random.seed(21 + 1 + 2016) self.tempdir = tempfile.mkdtemp() with remember_cwd(): os.chdir(self.tempdir) self.images = numpy.random.randint( 0, 256, (10, 218, 178, 3)).astype('uint8') with zipfile.ZipFile('img_align_celeba.zip', 'w') as image_file: for i, image in enumerate(self.images): Image.fromarray(image).save('img.jpeg') image_file.write( 'img.jpeg', 'img_align_celeba/{:06d}.jpg'.format(i + 1)) with open('list_attr_celeba.txt', 'w') as attr_file: attr_file.write('mock\nmock') for i in range(1, 11): attr_file.write('\n{:06d}.jpg'.format(i) + (' 1' * 40))
def test_download_and_convert(self): tempdir = self.tempdir with remember_cwd(): os.chdir(tempdir) assert_raises(IOError, iris.convert_iris, directory=tempdir, output_directory=tempdir) default_downloader( directory=tempdir, urls=['https://archive.ics.uci.edu/ml/' 'machine-learning-databases/iris/iris.data'], filenames=['iris.data']) classes = { b'Iris-setosa': 0, b'Iris-versicolor': 1, b'Iris-virginica': 2} data = numpy.loadtxt( os.path.join(tempdir, 'iris.data'), converters={4: lambda x: classes[x]}, delimiter=',') features = data[:, :-1].astype('float32') targets = data[:, -1].astype('uint8').reshape((-1, 1)) iris.convert_iris(directory=tempdir, output_directory=tempdir) output_file = "iris.hdf5" output_file = os.path.join(tempdir, output_file) with h5py.File(output_file, 'r') as h5: assert numpy.allclose(h5['features'], features) assert numpy.allclose(h5['targets'], targets)
def test_download_and_convert(self): tempdir = self.tempdir with remember_cwd(): os.chdir(tempdir) assert_raises(IOError, iris.convert_iris, directory=tempdir, output_directory=tempdir) default_downloader(directory=tempdir, urls=[ 'https://archive.ics.uci.edu/ml/' 'machine-learning-databases/iris/iris.data' ], filenames=['iris.data']) classes = { b'Iris-setosa': 0, b'Iris-versicolor': 1, b'Iris-virginica': 2 } data = numpy.loadtxt(os.path.join(tempdir, 'iris.data'), converters={4: lambda x: classes[x]}, delimiter=',') features = data[:, :-1].astype('float32') targets = data[:, -1].astype('uint8').reshape((-1, 1)) iris.convert_iris(directory=tempdir, output_directory=tempdir) output_file = "iris.hdf5" output_file = os.path.join(tempdir, output_file) with h5py.File(output_file, 'r') as h5: assert numpy.allclose(h5['features'], features) assert numpy.allclose(h5['targets'], targets)
def setUp(self): numpy.random.seed(9 + 5 + 2015) self.train_features_mock = numpy.random.randint( 0, 256, (10, 3, 32, 32)).astype('uint8') self.train_fine_labels_mock = numpy.random.randint( 0, 100, (10, )).astype('uint8') self.train_coarse_labels_mock = numpy.random.randint( 0, 20, (10, )).astype('uint8') self.test_features_mock = numpy.random.randint( 0, 256, (10, 3, 32, 32)).astype('uint8') self.test_fine_labels_mock = numpy.random.randint( 0, 100, (10, )).astype('uint8') self.test_coarse_labels_mock = numpy.random.randint( 0, 20, (10, )).astype('uint8') self.tempdir = tempfile.mkdtemp() with remember_cwd(): os.chdir(self.tempdir) os.mkdir('cifar-100-python') filename = os.path.join('cifar-100-python', 'train') with open(filename, 'wb') as f: cPickle.dump( { 'data': self.train_features_mock.reshape((10, -1)), 'fine_labels': self.train_fine_labels_mock, 'coarse_labels': self.train_coarse_labels_mock }, f) filename = os.path.join('cifar-100-python', 'test') with open(filename, 'wb') as f: cPickle.dump( { 'data': self.test_features_mock.reshape((10, -1)), 'fine_labels': self.test_fine_labels_mock, 'coarse_labels': self.test_coarse_labels_mock }, f) with tarfile.open('cifar-100-python.tar.gz', 'w:gz') as tar_file: tar_file.add('cifar-100-python')
def setUp(self): numpy.random.seed(9 + 5 + 2015) self.tempdir = tempfile.mkdtemp() with remember_cwd(): os.chdir(self.tempdir) self.f1_mock = {} def make_mock_format_1(split): self.f1_mock[split] = {} self.f1_mock[split]['image'] = [ numpy.random.randint(0, 256, (6, 6, 3)).astype('uint8'), numpy.random.randint(0, 256, (5, 5, 3)).astype('uint8')] other_sources = ('label', 'height', 'width', 'left', 'top') for source in other_sources: self.f1_mock[split][source] = [ numpy.random.randint(0, 4, (2,)).astype('uint8'), # This ensures that label '10' is converted to '1'. 10 * numpy.ones((1,)).astype('uint8')] with tarfile.open('{}.tar.gz'.format(split), 'w:gz') as tar_file: os.mkdir(split) for i, image in enumerate(self.f1_mock[split]['image']): Image.fromarray(image).save( os.path.join(split, '{}.png'.format(i + 1))) struct_path = os.path.join(split, 'digitStruct.mat') with h5py.File(struct_path, 'w') as f: for source in other_sources: suffixes = [] for i in range(2): suffix = 'i1{}{}'.format(source, i) suffixes.append([suffix.encode('utf8')]) name = 'digitStruct/{}'.format(suffix) f[name] = [[self.f1_mock[split][source][0][i]]] name = 'digitStruct/image_1/{}'.format(source) f[name] = suffixes name = 'digitStruct/image_2/{}'.format(source) f[name] = [[self.f1_mock[split][source][1][0]]] ref_dtype = h5py.special_dtype(ref=h5py.Reference) bbox = f.create_dataset( 'digitStruct/bbox', (2, 1), dtype=ref_dtype) bbox[...] = [[f['digitStruct/image_1'].ref], [f['digitStruct/image_2'].ref]] tar_file.add(split) for split in ('train', 'test', 'extra'): make_mock_format_1(split) self.f2_train_features_mock = numpy.random.randint( 0, 256, (32, 32, 3, 10)).astype('uint8') self.f2_train_targets_mock = numpy.random.randint( 1, 11, (10, 1)).astype('uint8') self.f2_test_features_mock = numpy.random.randint( 0, 256, (32, 32, 3, 10)).astype('uint8') self.f2_test_targets_mock = numpy.random.randint( 1, 11, (10, 1)).astype('uint8') self.f2_extra_features_mock = numpy.random.randint( 0, 256, (32, 32, 3, 10)).astype('uint8') self.f2_extra_targets_mock = numpy.random.randint( 1, 11, (10, 1)).astype('uint8') savemat('train_32x32.mat', {'X': self.f2_train_features_mock, 'y': self.f2_train_targets_mock}) savemat('test_32x32.mat', {'X': self.f2_test_features_mock, 'y': self.f2_test_targets_mock}) savemat('extra_32x32.mat', {'X': self.f2_extra_features_mock, 'y': self.f2_extra_targets_mock})
def setUp(self): numpy.random.seed(9 + 5 + 2015) self.tempdir = tempfile.mkdtemp() with remember_cwd(): os.chdir(self.tempdir) self.f1_mock = {} def make_mock_format_1(split): self.f1_mock[split] = {} self.f1_mock[split]['image'] = [ numpy.random.randint(0, 256, (6, 6, 3)).astype('uint8'), numpy.random.randint(0, 256, (5, 5, 3)).astype('uint8') ] other_sources = ('label', 'height', 'width', 'left', 'top') for source in other_sources: self.f1_mock[split][source] = [ numpy.random.randint(0, 4, (2, )).astype('uint8'), # This ensures that label '10' is converted to '1'. 10 * numpy.ones((1, )).astype('uint8') ] with tarfile.open('{}.tar.gz'.format(split), 'w:gz') as tar_file: os.mkdir(split) for i, image in enumerate(self.f1_mock[split]['image']): Image.fromarray(image).save( os.path.join(split, '{}.png'.format(i + 1))) struct_path = os.path.join(split, 'digitStruct.mat') with h5py.File(struct_path, 'w') as f: for source in other_sources: suffixes = [] for i in range(2): suffix = 'i1{}{}'.format(source, i) suffixes.append([suffix.encode('utf8')]) name = 'digitStruct/{}'.format(suffix) f[name] = [[self.f1_mock[split][source][0][i]]] name = 'digitStruct/image_1/{}'.format(source) f[name] = suffixes name = 'digitStruct/image_2/{}'.format(source) f[name] = [[self.f1_mock[split][source][1][0]]] ref_dtype = h5py.special_dtype(ref=h5py.Reference) bbox = f.create_dataset('digitStruct/bbox', (2, 1), dtype=ref_dtype) bbox[...] = [[f['digitStruct/image_1'].ref], [f['digitStruct/image_2'].ref]] tar_file.add(split) for split in ('train', 'test', 'extra'): make_mock_format_1(split) self.f2_train_features_mock = numpy.random.randint( 0, 256, (32, 32, 3, 10)).astype('uint8') self.f2_train_targets_mock = numpy.random.randint( 1, 11, (10, 1)).astype('uint8') self.f2_test_features_mock = numpy.random.randint( 0, 256, (32, 32, 3, 10)).astype('uint8') self.f2_test_targets_mock = numpy.random.randint( 1, 11, (10, 1)).astype('uint8') self.f2_extra_features_mock = numpy.random.randint( 0, 256, (32, 32, 3, 10)).astype('uint8') self.f2_extra_targets_mock = numpy.random.randint( 1, 11, (10, 1)).astype('uint8') savemat('train_32x32.mat', { 'X': self.f2_train_features_mock, 'y': self.f2_train_targets_mock }) savemat('test_32x32.mat', { 'X': self.f2_test_features_mock, 'y': self.f2_test_targets_mock }) savemat('extra_32x32.mat', { 'X': self.f2_extra_features_mock, 'y': self.f2_extra_targets_mock })