def test_dataframe_iterator_n(all_test_images, tmpdir): # save the images in the tmpdir count = 0 filenames = [] for test_images in all_test_images: for im in test_images: filename = "image-{}.png".format(count) filenames.append(filename) im.save(str(tmpdir / filename)) count += 1 # exclude first two items n_files = len(filenames) input_filenames = filenames[2:] # create dataframes classes = np.random.randint(2, size=len(input_filenames)) classes = [str(c) for c in classes] df = pd.DataFrame({"filename": input_filenames}) df2 = pd.DataFrame({"filename": input_filenames, "class": classes}) # create iterators generator = image_data_generator.ImageDataGenerator() df_iterator = generator.flow_from_dataframe(df, str(tmpdir), class_mode=None) df2_iterator = generator.flow_from_dataframe(df2, str(tmpdir), class_mode='binary') # Test the number of items in iterators assert df_iterator.n == n_files - 2 assert df2_iterator.n == n_files - 2
def test_image_data_generator(all_test_images): for test_images in all_test_images: img_list = [] for im in test_images: img_list.append(utils.img_to_array(im)[None, ...]) image_data_generator.ImageDataGenerator( featurewise_center=True, samplewise_center=True, featurewise_std_normalization=True, samplewise_std_normalization=True, zca_whitening=True, rotation_range=90., width_shift_range=0.1, height_shift_range=0.1, shear_range=0.5, zoom_range=0.2, channel_shift_range=0., brightness_range=(1, 5), fill_mode='nearest', cval=0.5, horizontal_flip=True, vertical_flip=True, interpolation_order=1 )
def test_batch_standardize(all_test_images): # ImageDataGenerator.standardize should work on batches for test_images in all_test_images: img_list = [] for im in test_images: img_list.append(utils.img_to_array(im)[None, ...]) images = np.vstack(img_list) generator = image_data_generator.ImageDataGenerator( featurewise_center=True, samplewise_center=True, featurewise_std_normalization=True, samplewise_std_normalization=True, zca_whitening=True, rotation_range=90., width_shift_range=0.1, height_shift_range=0.1, shear_range=0.5, zoom_range=0.2, channel_shift_range=0., brightness_range=(1, 5), fill_mode='nearest', cval=0.5, horizontal_flip=True, vertical_flip=True) generator.fit(images, augment=True) transformed = np.copy(images) for i, im in enumerate(transformed): transformed[i] = generator.random_transform(im) transformed = generator.standardize(transformed)
def test_dataframe_iterator_class_mode_categorical_multi_label( all_test_images, tmpdir): # save the images in the paths filenames = [] count = 0 for test_images in all_test_images: for im in test_images: filename = 'image-{}.png'.format(count) im.save(str(tmpdir / filename)) filenames.append(filename) count += 1 label_opt = ['a', 'b', ['a'], ['b'], ['a', 'b'], ['b', 'a']] df = pd.DataFrame({ "filename": filenames, "class": [random.choice(label_opt) for _ in filenames[:-2]] + ['b', 'a'] }) generator = image_data_generator.ImageDataGenerator() df_iterator = generator.flow_from_dataframe(df, str(tmpdir)) batch_x, batch_y = next(df_iterator) assert isinstance(batch_x, np.ndarray) assert len(batch_x.shape) == 4 assert isinstance(batch_y, np.ndarray) assert batch_y.shape == (len(batch_x), 2) for labels in batch_y: assert all(label in {0, 1} for label in labels) # on first 3 batches df = pd.DataFrame({ "filename": filenames, "class": [['b', 'a']] + ['b'] + [['c']] + [random.choice(label_opt) for _ in filenames[:-3]] }) generator = image_data_generator.ImageDataGenerator() df_iterator = generator.flow_from_dataframe(df, str(tmpdir), shuffle=False) batch_x, batch_y = next(df_iterator) assert isinstance(batch_x, np.ndarray) assert len(batch_x.shape) == 4 assert isinstance(batch_y, np.ndarray) assert batch_y.shape == (len(batch_x), 3) for labels in batch_y: assert all(label in {0, 1} for label in labels) assert (batch_y[0] == np.array([1, 1, 0])).all() assert (batch_y[1] == np.array([0, 1, 0])).all() assert (batch_y[2] == np.array([0, 0, 1])).all()
def test_random_transforms(): x = np.random.random((2, 28, 28)) # Test get_random_transform with predefined seed seed = 1 generator = image_data_generator.ImageDataGenerator( rotation_range=90., width_shift_range=0.1, height_shift_range=0.1, shear_range=0.5, zoom_range=0.2, channel_shift_range=0.1, brightness_range=(1, 5), horizontal_flip=True, vertical_flip=True) transform_dict = generator.get_random_transform(x.shape, seed) transform_dict2 = generator.get_random_transform(x.shape, seed * 2) assert transform_dict['theta'] != 0 assert transform_dict['theta'] != transform_dict2['theta'] assert transform_dict['tx'] != 0 assert transform_dict['tx'] != transform_dict2['tx'] assert transform_dict['ty'] != 0 assert transform_dict['ty'] != transform_dict2['ty'] assert transform_dict['shear'] != 0 assert transform_dict['shear'] != transform_dict2['shear'] assert transform_dict['zx'] != 0 assert transform_dict['zx'] != transform_dict2['zx'] assert transform_dict['zy'] != 0 assert transform_dict['zy'] != transform_dict2['zy'] assert transform_dict['channel_shift_intensity'] != 0 assert (transform_dict['channel_shift_intensity'] != transform_dict2['channel_shift_intensity']) assert transform_dict['brightness'] != 0 assert transform_dict['brightness'] != transform_dict2['brightness'] # Test get_random_transform without any randomness generator = image_data_generator.ImageDataGenerator() transform_dict = generator.get_random_transform(x.shape, seed) assert transform_dict['theta'] == 0 assert transform_dict['tx'] == 0 assert transform_dict['ty'] == 0 assert transform_dict['shear'] == 0 assert transform_dict['zx'] == 1 assert transform_dict['zy'] == 1 assert transform_dict['channel_shift_intensity'] is None assert transform_dict['brightness'] is None
def test_fit_rescale(all_test_images): rescale = 1. / 255 for test_images in all_test_images: img_list = [] for im in test_images: img_list.append(utils.img_to_array(im)[None, ...]) images = np.vstack(img_list) # featurewise_center test generator = image_data_generator.ImageDataGenerator( rescale=rescale, featurewise_center=True, dtype='float64') generator.fit(images) batch = generator.flow(images, batch_size=8).next() assert abs(np.mean(batch)) < 1e-6 # featurewise_std_normalization test generator = image_data_generator.ImageDataGenerator( rescale=rescale, featurewise_center=True, featurewise_std_normalization=True, dtype='float64') generator.fit(images) batch = generator.flow(images, batch_size=8).next() assert abs(np.mean(batch)) < 1e-6 assert abs(1 - np.std(batch)) < 1e-5 # zca_whitening test generator = image_data_generator.ImageDataGenerator( rescale=rescale, featurewise_center=True, zca_whitening=True, dtype='float64') generator.fit(images) batch = generator.flow(images, batch_size=8).next() batch = np.reshape(batch, (batch.shape[0], batch.shape[1] * batch.shape[2] * batch.shape[3])) # Y * Y_T = n * I, where Y = W * X identity = np.dot(batch, batch.T) / batch.shape[0] assert ((np.abs(identity) - np.identity(identity.shape[0])) < 1e-6).all()
def test_dataframe_iterator_classes_indices_order(all_test_images, tmpdir): # save the images in the paths count = 0 filenames = [] for test_images in all_test_images: for im in test_images: filename = 'image-{}.png'.format(count) im.save(str(tmpdir / filename)) filenames.append(filename) count += 1 # Test the class_indices without classes input generator = image_data_generator.ImageDataGenerator() label_opt = ['a', 'b', ['a'], ['b'], ['a', 'b'], ['b', 'a']] df_f = pd.DataFrame({ "filename": filenames, "class": ['a', 'b'] + [random.choice(label_opt) for _ in filenames[:-2]] }) flow_forward_iter = generator.flow_from_dataframe(df_f, str(tmpdir)) label_rev = ['b', 'a', ['b'], ['a'], ['b', 'a'], ['a', 'b']] df_r = pd.DataFrame({ "filename": filenames, "class": ['b', 'a'] + [random.choice(label_rev) for _ in filenames[:-2]] }) flow_backward_iter = generator.flow_from_dataframe(df_r, str(tmpdir)) # check class_indices assert flow_forward_iter.class_indices == flow_backward_iter.class_indices # Test the class_indices with classes input generator_2 = image_data_generator.ImageDataGenerator() df_f2 = pd.DataFrame([['data/A.jpg', 'A'], ['data/B.jpg', 'B']], columns=['filename', 'class']) flow_forward = generator_2.flow_from_dataframe(df_f2, classes=['A', 'B']) df_b2 = pd.DataFrame([['data/A.jpg', 'A'], ['data/B.jpg', 'B']], columns=['filename', 'class']) flow_backward = generator_2.flow_from_dataframe(df_b2, classes=['B', 'A']) # check class_indices assert flow_forward.class_indices != flow_backward.class_indices
def test_image_data_generator_with_validation_split(all_test_images): for test_images in all_test_images: img_list = [] for im in test_images: img_list.append(utils.img_to_array(im)[None, ...]) images = np.vstack(img_list) labels = np.concatenate([ np.zeros((int(len(images) / 2), )), np.ones((int(len(images) / 2), )) ]) generator = image_data_generator.ImageDataGenerator( validation_split=0.5) # training and validation sets would have different # number of classes, because labels are sorted with pytest.raises(ValueError, match='Training and validation subsets ' 'have different number of classes after ' 'the split.*'): generator.flow(images, labels, shuffle=False, batch_size=10, subset='validation') labels = np.concatenate([ np.zeros((int(len(images) / 4), )), np.ones((int(len(images) / 4), )), np.zeros((int(len(images) / 4), )), np.ones((int(len(images) / 4), )) ]) seq = generator.flow(images, labels, shuffle=False, batch_size=10, subset='validation') x, y = seq[0] assert 2 == len(np.unique(y)) seq = generator.flow(images, labels, shuffle=False, batch_size=10, subset='training') x2, y2 = seq[0] assert 2 == len(np.unique(y2)) with pytest.raises(ValueError): generator.flow(images, np.arange(images.shape[0]), shuffle=False, batch_size=3, subset='foo')
def test_directory_iterator_with_validation_split(all_test_images, validation_split, num_training): num_classes = 2 tmp_folder = tempfile.mkdtemp(prefix='test_images') # create folders and subfolders paths = [] for cl in range(num_classes): class_directory = 'class-{}'.format(cl) classpaths = [ class_directory, os.path.join(class_directory, 'subfolder-1'), os.path.join(class_directory, 'subfolder-2'), os.path.join(class_directory, 'subfolder-1', 'sub-subfolder') ] for path in classpaths: os.mkdir(os.path.join(tmp_folder, path)) paths.append(classpaths) # save the images in the paths count = 0 filenames = [] for test_images in all_test_images: for im in test_images: # rotate image class im_class = count % num_classes # rotate subfolders classpaths = paths[im_class] filename = os.path.join(classpaths[count % len(classpaths)], 'image-{}.png'.format(count)) filenames.append(filename) im.save(os.path.join(tmp_folder, filename)) count += 1 # create iterator generator = image_data_generator.ImageDataGenerator( validation_split=validation_split) with pytest.raises(ValueError): generator.flow_from_directory(tmp_folder, subset='foo') train_iterator = generator.flow_from_directory(tmp_folder, subset='training') assert train_iterator.samples == num_training valid_iterator = generator.flow_from_directory(tmp_folder, subset='validation') assert valid_iterator.samples == count - num_training # check number of classes and images assert len(train_iterator.class_indices) == num_classes assert len(train_iterator.classes) == num_training assert len(set(train_iterator.filenames) & set(filenames)) == num_training shutil.rmtree(tmp_folder)
def test_dataframe_iterator_class_mode_raw(all_test_images, tmpdir): # save the images in the paths filenames = [] count = 0 for test_images in all_test_images: for im in test_images: filename = 'image-{}.png'.format(count) im.save(str(tmpdir / filename)) filenames.append(filename) count += 1 # case for 1D output df = pd.DataFrame({ "filename": filenames }).assign(output_0=np.random.uniform(size=len(filenames)), output_1=np.random.uniform(size=len(filenames))) df_iterator = image_data_generator.ImageDataGenerator( ).flow_from_dataframe(df, y_col='output_0', directory=str(tmpdir), batch_size=3, shuffle=False, class_mode='raw') batch_x, batch_y = next(df_iterator) assert isinstance(batch_x, np.ndarray) assert len(batch_x.shape) == 4 assert isinstance(batch_y, np.ndarray) assert batch_y.shape == (3, ) assert np.array_equal(batch_y, df['output_0'].values[:3]) # case with a 2D output df_iterator = image_data_generator.ImageDataGenerator( ).flow_from_dataframe(df, y_col=['output_0', 'output_1'], directory=str(tmpdir), batch_size=3, shuffle=False, class_mode='raw') batch_x, batch_y = next(df_iterator) assert isinstance(batch_x, np.ndarray) assert len(batch_x.shape) == 4 assert isinstance(batch_y, np.ndarray) assert batch_y.shape == (3, 2) assert np.array_equal(batch_y, df[['output_0', 'output_1']].values[:3])
def test_dataframe_iterator_sample_weights(all_test_images, tmpdir): # save the images in the paths count = 0 filenames = [] for test_images in all_test_images: for im in test_images: filename = 'image-{}.png'.format(count) im.save(str(tmpdir / filename)) filenames.append(filename) count += 1 df = pd.DataFrame({"filename": filenames}) df['weight'] = ([2, 5] * len(df))[:len(df)] generator = image_data_generator.ImageDataGenerator() df_iterator = generator.flow_from_dataframe(df, str(tmpdir), x_col="filename", y_col=None, shuffle=False, batch_size=5, weight_col='weight', class_mode="input") batch = next(df_iterator) assert len(batch) == 3 # (x, y, weights) # check if input and output have the same shape and they're the same assert(batch[0].all() == batch[1].all()) # check if the input and output images are not the same numpy array input_img = batch[0][0] output_img = batch[1][0] output_img[0][0][0] += 1 assert input_img[0][0][0] != output_img[0][0][0] assert np.array_equal(np.array([2, 5, 2, 5, 2]), batch[2]) # fail df['weight'] = (['2', '5'] * len(df))[:len(df)] with pytest.raises(TypeError): image_data_generator.ImageDataGenerator().flow_from_dataframe( df, weight_col='weight', class_mode="input" )
def test_deterministic_transform(): x = np.ones((32, 32, 3)) generator = image_data_generator.ImageDataGenerator(rotation_range=90, fill_mode='constant') x = np.random.random((32, 32, 3)) assert np.allclose(generator.apply_transform(x, {'flip_vertical': True}), x[::-1, :, :]) assert np.allclose(generator.apply_transform(x, {'flip_horizontal': True}), x[:, ::-1, :]) x = np.ones((3, 3, 3)) x_rotated = np.array([[[0., 0., 0.], [0., 0., 0.], [1., 1., 1.]], [[0., 0., 0.], [1., 1., 1.], [1., 1., 1.]], [[0., 0., 0.], [0., 0., 0.], [1., 1., 1.]]]) assert np.allclose(generator.apply_transform(x, {'theta': 45}), x_rotated)
def test_dataframe_iterator_with_validation_split(all_test_images, validation_split, num_training, tmpdir): num_classes = 2 # save the images in the tmpdir count = 0 filenames = [] filenames_without = [] for test_images in all_test_images: for im in test_images: filename = "image-{}.png".format(count) filename_without = "image-{}".format(count) filenames.append(filename) filenames_without.append(filename_without) im.save(str(tmpdir / filename)) count += 1 df = pd.DataFrame({ "filename": filenames, "class": [str(random.randint(0, 1)) for _ in filenames] }) # create iterator generator = image_data_generator.ImageDataGenerator( validation_split=validation_split) df_sparse_iterator = generator.flow_from_dataframe(df, str(tmpdir), class_mode="sparse") if np.isnan(next(df_sparse_iterator)[:][1]).any(): raise ValueError('Invalid values.') with pytest.raises(ValueError): generator.flow_from_dataframe(df, tmpdir, subset='foo') train_iterator = generator.flow_from_dataframe(df, str(tmpdir), subset='training') assert train_iterator.samples == num_training valid_iterator = generator.flow_from_dataframe(df, str(tmpdir), subset='validation') assert valid_iterator.samples == count - num_training # check number of classes and images assert len(train_iterator.class_indices) == num_classes assert len(train_iterator.classes) == num_training assert len(set(train_iterator.filenames) & set(filenames)) == num_training
def test_dataframe_iterator_with_subdirs(all_test_images, tmpdir): num_classes = 2 # create folders and subfolders paths = [] for cl in range(num_classes): class_directory = 'class-{}'.format(cl) classpaths = [ class_directory, os.path.join(class_directory, 'subfolder-1'), os.path.join(class_directory, 'subfolder-2'), os.path.join(class_directory, 'subfolder-1', 'sub-subfolder') ] for path in classpaths: tmpdir.join(path).mkdir() paths.append(classpaths) # save the images in the paths count = 0 filenames = [] for test_images in all_test_images: for im in test_images: # rotate image class im_class = count % num_classes # rotate subfolders classpaths = paths[im_class] filename = os.path.join( classpaths[count % len(classpaths)], 'image-{}.png'.format(count)) filenames.append(filename) im.save(str(tmpdir / filename)) count += 1 # create dataframe classes = np.random.randint(num_classes, size=len(filenames)) classes = [str(c) for c in classes] df = pd.DataFrame({"filename": filenames, "class": classes}) # create iterator generator = image_data_generator.ImageDataGenerator() df_iterator = generator.flow_from_dataframe( df, str(tmpdir), class_mode='binary') # Test the number of items in iterator assert df_iterator.n == len(filenames) assert set(df_iterator.filenames) == set(filenames)
def test_image_data_generator_invalid_data(): generator = image_data_generator.ImageDataGenerator( featurewise_center=True, samplewise_center=True, featurewise_std_normalization=True, samplewise_std_normalization=True, zca_whitening=True, data_format='channels_last') # Test fit with invalid data with pytest.raises(ValueError): x = np.random.random((3, 10, 10)) generator.fit(x) # Test flow with invalid data with pytest.raises(ValueError): x = np.random.random((32, 10, 10)) generator.flow(np.arange(x.shape[0]))
def test_dataframe_iterator_with_custom_indexed_dataframe( all_test_images, tmpdir): num_classes = 2 # save the images in the tmpdir count = 0 filenames = [] for test_images in all_test_images: for im in test_images: filename = "image-{}.png".format(count) filenames.append(filename) im.save(str(tmpdir / filename)) count += 1 # create dataframes classes = np.random.randint(num_classes, size=len(filenames)) classes = [str(c) for c in classes] df = pd.DataFrame({"filename": filenames, "class": classes}) df2 = pd.DataFrame({ "filename": filenames, "class": classes }, index=np.arange(1, len(filenames) + 1)) df3 = pd.DataFrame({ "filename": filenames, "class": classes }, index=filenames) # create iterators seed = 1 generator = image_data_generator.ImageDataGenerator() df_iterator = generator.flow_from_dataframe(df, str(tmpdir), seed=seed) df2_iterator = generator.flow_from_dataframe(df2, str(tmpdir), seed=seed) df3_iterator = generator.flow_from_dataframe(df3, str(tmpdir), seed=seed) # Test all iterators return same pairs of arrays for _ in range(len(filenames)): a1, c1 = next(df_iterator) a2, c2 = next(df2_iterator) a3, c3 = next(df3_iterator) assert np.array_equal(a1, a2) assert np.array_equal(a1, a3) assert np.array_equal(c1, c2) assert np.array_equal(c1, c3)
def test_dataframe_iterator_class_mode_input(all_test_images, tmpdir): # save the images in the paths count = 0 filenames = [] for test_images in all_test_images: for im in test_images: filename = 'image-{}.png'.format(count) im.save(str(tmpdir / filename)) filenames.append(filename) count += 1 df = pd.DataFrame({"filename": filenames}) generator = image_data_generator.ImageDataGenerator() df_autoencoder_iterator = generator.flow_from_dataframe(df, str(tmpdir), x_col="filename", y_col=None, class_mode="input") batch = next(df_autoencoder_iterator) # check if input and output have the same shape and they're the same assert np.allclose(batch[0], batch[1]) # check if the input and output images are not the same numpy array input_img = batch[0][0] output_img = batch[1][0] output_img[0][0][0] += 1 assert (input_img[0][0][0] != output_img[0][0][0]) df_autoencoder_iterator = generator.flow_from_dataframe(df, str(tmpdir), x_col="filename", y_col="class", class_mode="input") batch = next(df_autoencoder_iterator) # check if input and output have the same shape and they're the same assert (batch[0].all() == batch[1].all()) # check if the input and output images are not the same numpy array input_img = batch[0][0] output_img = batch[1][0] output_img[0][0][0] += 1 assert (input_img[0][0][0] != output_img[0][0][0])
def test_dataframe_iterator_validate_filenames(all_test_images, tmpdir): # save the images in the paths count = 0 filenames = [] for test_images in all_test_images: for im in test_images: filename = 'image-{}.png'.format(count) im.save(str(tmpdir / filename)) filenames.append(filename) count += 1 df = pd.DataFrame({"filename": filenames + ['test.jpp', 'test.jpg']}) generator = image_data_generator.ImageDataGenerator() df_iterator = generator.flow_from_dataframe(df, str(tmpdir), class_mode="input") assert len(df_iterator.filenames) == len(df['filename']) - 2 df_iterator = generator.flow_from_dataframe(df, str(tmpdir), class_mode="input", validate_filenames=False) assert len(df_iterator.filenames) == len(df['filename'])
def test_directory_iterator_class_mode_input(all_test_images, tmpdir): tmpdir.join('class-1').mkdir() # save the images in the paths count = 0 for test_images in all_test_images: for im in test_images: filename = str(tmpdir / 'class-1' / 'image-{}.png'.format(count)) im.save(filename) count += 1 # create iterator generator = image_data_generator.ImageDataGenerator() dir_iterator = generator.flow_from_directory(str(tmpdir), class_mode='input') batch = next(dir_iterator) # check if input and output have the same shape assert (batch[0].shape == batch[1].shape) # check if the input and output images are not the same numpy array input_img = batch[0][0] output_img = batch[1][0] output_img[0][0][0] += 1 assert (input_img[0][0][0] != output_img[0][0][0])
img_name = img_n.format(img_counter) imwrite(img_name, frame) print("{} written!".format(img_name)) img_counter += 1 break cam.release() destroyAllWindows() # load the image img = load_img(img_n) # convert to numpy array data = img_to_array(img) # expand dimension to one sample samples = expand_dims(data, 0) # create image data augmentation generator datagen = image_data_generator.ImageDataGenerator(zoom_range=[0.5, 1.0]) # prepare iterator it = datagen.flow(samples, batch_size=1, save_to_dir=dir) # create image data augmentation generator for i in range(10): # generate batch of images batch = it.next() # convert to unsigned integers for viewing image = batch[0].astype('uint8') datagen = image_data_generator.ImageDataGenerator(rotation_range=90) # prepare iterator it = datagen.flow(samples, batch_size=1, save_to_dir=dir) # create image data augmentation generator for i in range(10): # generate batch of images batch = it.next()
def test_dataframe_iterator_absolute_path(all_test_images, tmpdir): # save the images in the tmpdir count = 0 file_paths = [] for test_images in all_test_images: for im in test_images: filename = "image-{:0>5}.png".format(count) file_path = str(tmpdir / filename) file_paths.append(file_path) im.save(file_path) count += 1 # prepare an image with a forbidden extension. file_path_fbd = str(tmpdir / 'image-forbid.fbd') shutil.copy(file_path, file_path_fbd) # create dataframes classes = np.random.randint(2, size=len(file_paths)) classes = [str(c) for c in classes] df = pd.DataFrame({"filename": file_paths}) df2 = pd.DataFrame({"filename": file_paths, "class": classes}) df3 = pd.DataFrame({"filename": ['image-not-exist.png'] + file_paths}) df4 = pd.DataFrame({"filename": file_paths + [file_path_fbd]}) # create iterators generator = image_data_generator.ImageDataGenerator() df_iterator = generator.flow_from_dataframe(df, None, class_mode=None, shuffle=False, batch_size=1) df2_iterator = generator.flow_from_dataframe(df2, None, class_mode='binary', shuffle=False, batch_size=1) df3_iterator = generator.flow_from_dataframe(df3, None, class_mode=None, shuffle=False, batch_size=1) df4_iterator = generator.flow_from_dataframe(df4, None, class_mode=None, shuffle=False, batch_size=1) validation_split = 0.2 generator_split = image_data_generator.ImageDataGenerator( validation_split=validation_split) df_train_iterator = generator_split.flow_from_dataframe(df, None, class_mode=None, shuffle=False, subset='training', batch_size=1) df_val_iterator = generator_split.flow_from_dataframe(df, None, class_mode=None, shuffle=False, subset='validation', batch_size=1) # Test the number of items in iterators assert df_iterator.n == len(file_paths) assert df2_iterator.n == len(file_paths) assert df3_iterator.n == len(file_paths) assert df4_iterator.n == len(file_paths) assert df_val_iterator.n == int(validation_split * len(file_paths)) assert df_train_iterator.n == len(file_paths) - df_val_iterator.n # Test flow_from_dataframe for i in range(len(file_paths)): a1 = next(df_iterator) a2, _ = next(df2_iterator) a3 = next(df3_iterator) a4 = next(df4_iterator) if i < df_val_iterator.n: a5 = next(df_val_iterator) else: a5 = next(df_train_iterator) assert np.array_equal(a1, a2) assert np.array_equal(a1, a3) assert np.array_equal(a1, a4) assert np.array_equal(a1, a5)
def test_image_data_generator_fit(): generator = image_data_generator.ImageDataGenerator( featurewise_center=True, samplewise_center=True, featurewise_std_normalization=True, samplewise_std_normalization=True, zca_whitening=True, rotation_range=90., width_shift_range=0.1, height_shift_range=0.1, shear_range=0.5, zoom_range=(0.2, 0.2), channel_shift_range=0., brightness_range=(1, 5), fill_mode='nearest', cval=0.5, horizontal_flip=True, vertical_flip=True, interpolation_order=1, data_format='channels_last') x = np.random.random((32, 10, 10, 3)) generator.fit(x, augment=True) # Test grayscale x = np.random.random((32, 10, 10, 1)) generator.fit(x) # Test RBG x = np.random.random((32, 10, 10, 3)) generator.fit(x) # Test more samples than dims x = np.random.random((32, 4, 4, 1)) generator.fit(x) generator = image_data_generator.ImageDataGenerator( featurewise_center=True, samplewise_center=True, featurewise_std_normalization=True, samplewise_std_normalization=True, zca_whitening=True, rotation_range=90., width_shift_range=0.1, height_shift_range=0.1, shear_range=0.5, zoom_range=(0.2, 0.2), channel_shift_range=0., brightness_range=(1, 5), fill_mode='nearest', cval=0.5, horizontal_flip=True, vertical_flip=True, interpolation_order=1, data_format='channels_first') x = np.random.random((32, 10, 10, 3)) generator.fit(x, augment=True) # Test grayscale x = np.random.random((32, 1, 10, 10)) generator.fit(x) # Test RBG x = np.random.random((32, 3, 10, 10)) generator.fit(x) # Test more samples than dims x = np.random.random((32, 1, 4, 4)) generator.fit(x)
def test_image_data_generator_with_split_value_error(): with pytest.raises(ValueError): image_data_generator.ImageDataGenerator(validation_split=5)
def test_dataframe_iterator(all_test_images, tmpdir): num_classes = 2 # save the images in the tmpdir count = 0 filenames = [] filepaths = [] filenames_without = [] for test_images in all_test_images: for im in test_images: filename = "image-{}.png".format(count) filename_without = "image-{}".format(count) filenames.append(filename) filepaths.append(os.path.join(str(tmpdir), filename)) filenames_without.append(filename_without) im.save(str(tmpdir / filename)) count += 1 df = pd.DataFrame({ "filename": filenames, "class": [str(random.randint(0, 1)) for _ in filenames], "filepaths": filepaths }) # create iterator iterator = dataframe_iterator.DataFrameIterator(df, str(tmpdir)) batch = next(iterator) assert len(batch) == 2 assert isinstance(batch[0], np.ndarray) assert isinstance(batch[1], np.ndarray) generator = image_data_generator.ImageDataGenerator() df_iterator = generator.flow_from_dataframe(df, x_col='filepaths') df_iterator_dir = generator.flow_from_dataframe(df, str(tmpdir)) df_sparse_iterator = generator.flow_from_dataframe(df, str(tmpdir), class_mode="sparse") assert not np.isnan(df_sparse_iterator.classes).any() # check number of classes and images assert len(df_iterator.class_indices) == num_classes assert len(df_iterator.classes) == count assert set(df_iterator.filenames) == set(filepaths) assert len(df_iterator_dir.class_indices) == num_classes assert len(df_iterator_dir.classes) == count assert set(df_iterator_dir.filenames) == set(filenames) # test without shuffle _, batch_y = next( generator.flow_from_dataframe(df, str(tmpdir), shuffle=False, class_mode="sparse")) assert (batch_y == df['class'].astype('float')[:len(batch_y)]).all() # Test invalid use cases with pytest.raises(ValueError): generator.flow_from_dataframe(df, str(tmpdir), color_mode='cmyk') with pytest.raises(ValueError): generator.flow_from_dataframe(df, str(tmpdir), class_mode='output') with pytest.warns(DeprecationWarning): generator.flow_from_dataframe(df, str(tmpdir), has_ext=True) with pytest.warns(DeprecationWarning): generator.flow_from_dataframe(df, str(tmpdir), has_ext=False) def preprocessing_function(x): """This will fail if not provided by a Numpy array. Note: This is made to enforce backward compatibility. """ assert x.shape == (26, 26, 3) assert type(x) is np.ndarray return np.zeros_like(x) # Test usage as Sequence generator = image_data_generator.ImageDataGenerator( preprocessing_function=preprocessing_function) dir_seq = generator.flow_from_dataframe(df, str(tmpdir), target_size=(26, 26), color_mode='rgb', batch_size=3, class_mode='categorical') assert len(dir_seq) == np.ceil(count / 3) x1, y1 = dir_seq[1] assert x1.shape == (3, 26, 26, 3) assert y1.shape == (3, num_classes) x1, y1 = dir_seq[5] assert (x1 == 0).all() with pytest.raises(ValueError): x1, y1 = dir_seq[9]
def test_image_data_generator_flow(all_test_images, tmpdir): for test_images in all_test_images: img_list = [] for im in test_images: img_list.append(utils.img_to_array(im)[None, ...]) images = np.vstack(img_list) dsize = images.shape[0] generator = image_data_generator.ImageDataGenerator( featurewise_center=True, samplewise_center=True, featurewise_std_normalization=True, samplewise_std_normalization=True, zca_whitening=True, rotation_range=90., width_shift_range=0.1, height_shift_range=0.1, shear_range=0.5, zoom_range=0.2, channel_shift_range=0., brightness_range=(1, 5), fill_mode='nearest', cval=0.5, horizontal_flip=True, vertical_flip=True, interpolation_order=1) generator.flow(images, np.arange(images.shape[0]), shuffle=False, save_to_dir=str(tmpdir), batch_size=3) generator.flow(images, np.arange(images.shape[0]), shuffle=False, sample_weight=np.arange(images.shape[0]) + 1, save_to_dir=str(tmpdir), batch_size=3) # Test with `shuffle=True` generator.flow(images, np.arange(images.shape[0]), shuffle=True, save_to_dir=str(tmpdir), batch_size=3, seed=42) # Test without y generator.flow(images, None, shuffle=True, save_to_dir=str(tmpdir), batch_size=3) # Test with a single miscellaneous input data array x_misc1 = np.random.random(dsize) generator.flow((images, x_misc1), np.arange(dsize), shuffle=False, batch_size=2) # Test with two miscellaneous inputs x_misc2 = np.random.random((dsize, 3, 3)) generator.flow((images, [x_misc1, x_misc2]), np.arange(dsize), shuffle=False, batch_size=2) # Test cases with `y = None` generator.flow(images, None, batch_size=3) generator.flow((images, x_misc1), None, batch_size=3, shuffle=False) generator.flow((images, [x_misc1, x_misc2]), None, batch_size=3, shuffle=False) generator = image_data_generator.ImageDataGenerator( validation_split=0.2) generator.flow(images, batch_size=3) # Test some failure cases: x_misc_err = np.random.random((dsize + 1, 3, 3)) with pytest.raises(ValueError) as e_info: generator.flow((images, x_misc_err), np.arange(dsize), batch_size=3) assert str(e_info.value).find('All of the arrays in') != -1 with pytest.raises(ValueError) as e_info: generator.flow((images, x_misc1), np.arange(dsize + 1), batch_size=3) assert str( e_info.value).find('`x` (images tensor) and `y` (labels) ') != -1 # Test `flow` behavior as Sequence generator.flow(images, np.arange(images.shape[0]), shuffle=False, save_to_dir=str(tmpdir), batch_size=3) # Test with `shuffle=True` generator.flow(images, np.arange(images.shape[0]), shuffle=True, save_to_dir=str(tmpdir), batch_size=3, seed=123) # test order_interpolation labels = np.array([[2, 2, 0, 2, 2], [1, 3, 2, 3, 1], [2, 1, 0, 1, 2], [3, 1, 0, 2, 0], [3, 1, 3, 2, 1]]) label_generator = image_data_generator.ImageDataGenerator( rotation_range=90., interpolation_order=0) label_generator.flow(x=labels[np.newaxis, ..., np.newaxis], seed=123)
def test_dataframe_iterator_class_mode_multi_output(all_test_images, tmpdir): # save the images in the paths filenames = [] count = 0 for test_images in all_test_images: for im in test_images: filename = 'image-{}.png'.format(count) im.save(str(tmpdir / filename)) filenames.append(filename) count += 1 # fit both outputs are a single number df = pd.DataFrame({ "filename": filenames }).assign(output_0=np.random.uniform(size=len(filenames)), output_1=np.random.uniform(size=len(filenames))) df_iterator = image_data_generator.ImageDataGenerator( ).flow_from_dataframe(df, y_col=['output_0', 'output_1'], directory=str(tmpdir), batch_size=3, shuffle=False, class_mode='multi_output') batch_x, batch_y = next(df_iterator) assert isinstance(batch_x, np.ndarray) assert len(batch_x.shape) == 4 assert isinstance(batch_y, list) assert len(batch_y) == 2 assert np.array_equal(batch_y[0], np.array(df['output_0'].tolist()[:3])) assert np.array_equal(batch_y[1], np.array(df['output_1'].tolist()[:3])) # if one of the outputs is a 1D array df['output_1'] = [ np.random.uniform(size=(2, 2, 1)).flatten() for _ in range(len(df)) ] df_iterator = image_data_generator.ImageDataGenerator( ).flow_from_dataframe(df, y_col=['output_0', 'output_1'], directory=str(tmpdir), batch_size=3, shuffle=False, class_mode='multi_output') batch_x, batch_y = next(df_iterator) assert isinstance(batch_x, np.ndarray) assert len(batch_x.shape) == 4 assert isinstance(batch_y, list) assert len(batch_y) == 2 assert np.array_equal(batch_y[0], np.array(df['output_0'].tolist()[:3])) assert np.array_equal(batch_y[1], np.array(df['output_1'].tolist()[:3])) # if one of the outputs is a 2D array df['output_1'] = [ np.random.uniform(size=(2, 2, 1)) for _ in range(len(df)) ] df_iterator = image_data_generator.ImageDataGenerator( ).flow_from_dataframe(df, y_col=['output_0', 'output_1'], directory=str(tmpdir), batch_size=3, shuffle=False, class_mode='multi_output') batch_x, batch_y = next(df_iterator) assert isinstance(batch_x, np.ndarray) assert len(batch_x.shape) == 4 assert isinstance(batch_y, list) assert len(batch_y) == 2 assert np.array_equal(batch_y[0], np.array(df['output_0'].tolist()[:3])) assert np.array_equal(batch_y[1], np.array(df['output_1'].tolist()[:3])) # fail if single column with pytest.raises(TypeError): image_data_generator.ImageDataGenerator().flow_from_dataframe( df, y_col='output_0', directory=str(tmpdir), class_mode='multi_output')
def test_valid_args(): with pytest.raises(ValueError): image_data_generator.ImageDataGenerator(brightness_range=0.1)
def test_directory_iterator(all_test_images, tmpdir): num_classes = 2 # create folders and subfolders paths = [] for cl in range(num_classes): class_directory = 'class-{}'.format(cl) classpaths = [ class_directory, os.path.join(class_directory, 'subfolder-1'), os.path.join(class_directory, 'subfolder-2'), os.path.join(class_directory, 'subfolder-1', 'sub-subfolder') ] for path in classpaths: tmpdir.join(path).mkdir() paths.append(classpaths) # save the images in the paths count = 0 filenames = [] for test_images in all_test_images: for im in test_images: # rotate image class im_class = count % num_classes # rotate subfolders classpaths = paths[im_class] filename = os.path.join(classpaths[count % len(classpaths)], 'image-{}.png'.format(count)) filenames.append(filename) im.save(str(tmpdir / filename)) count += 1 # create iterator generator = image_data_generator.ImageDataGenerator() dir_iterator = generator.flow_from_directory(str(tmpdir)) # check number of classes and images assert len(dir_iterator.class_indices) == num_classes assert len(dir_iterator.classes) == count assert set(dir_iterator.filenames) == set(filenames) # Test invalid use cases with pytest.raises(ValueError): generator.flow_from_directory(str(tmpdir), color_mode='cmyk') with pytest.raises(ValueError): generator.flow_from_directory(str(tmpdir), class_mode='output') def preprocessing_function(x): """This will fail if not provided by a Numpy array. Note: This is made to enforce backward compatibility. """ assert x.shape == (26, 26, 3) assert type(x) is np.ndarray return np.zeros_like(x) # Test usage as Sequence generator = image_data_generator.ImageDataGenerator( preprocessing_function=preprocessing_function) dir_seq = generator.flow_from_directory(str(tmpdir), target_size=(26, 26), color_mode='rgb', batch_size=3, class_mode='categorical') assert len(dir_seq) == np.ceil(count / 3.) x1, y1 = dir_seq[1] assert x1.shape == (3, 26, 26, 3) assert y1.shape == (3, num_classes) x1, y1 = dir_seq[5] assert (x1 == 0).all() with pytest.raises(ValueError): x1, y1 = dir_seq[14] # there are 40 images and batch size is 3