コード例 #1
0
def test_dataframe_iterator_n(all_test_images, tmpdir):

    # save the images in the tmpdir
    count = 0
    filenames = []
    for test_images in all_test_images:
        for im in test_images:
            filename = "image-{}.png".format(count)
            filenames.append(filename)
            im.save(str(tmpdir / filename))
            count += 1

    # exclude first two items
    n_files = len(filenames)
    input_filenames = filenames[2:]

    # create dataframes
    classes = np.random.randint(2, size=len(input_filenames))
    classes = [str(c) for c in classes]
    df = pd.DataFrame({"filename": input_filenames})
    df2 = pd.DataFrame({"filename": input_filenames, "class": classes})

    # create iterators
    generator = image_data_generator.ImageDataGenerator()
    df_iterator = generator.flow_from_dataframe(df,
                                                str(tmpdir),
                                                class_mode=None)
    df2_iterator = generator.flow_from_dataframe(df2,
                                                 str(tmpdir),
                                                 class_mode='binary')

    # Test the number of items in iterators
    assert df_iterator.n == n_files - 2
    assert df2_iterator.n == n_files - 2
コード例 #2
0
def test_image_data_generator(all_test_images):
    for test_images in all_test_images:
        img_list = []
        for im in test_images:
            img_list.append(utils.img_to_array(im)[None, ...])

        image_data_generator.ImageDataGenerator(
            featurewise_center=True,
            samplewise_center=True,
            featurewise_std_normalization=True,
            samplewise_std_normalization=True,
            zca_whitening=True,
            rotation_range=90.,
            width_shift_range=0.1,
            height_shift_range=0.1,
            shear_range=0.5,
            zoom_range=0.2,
            channel_shift_range=0.,
            brightness_range=(1, 5),
            fill_mode='nearest',
            cval=0.5,
            horizontal_flip=True,
            vertical_flip=True,
            interpolation_order=1
        )
コード例 #3
0
def test_batch_standardize(all_test_images):
    # ImageDataGenerator.standardize should work on batches
    for test_images in all_test_images:
        img_list = []
        for im in test_images:
            img_list.append(utils.img_to_array(im)[None, ...])

        images = np.vstack(img_list)
        generator = image_data_generator.ImageDataGenerator(
            featurewise_center=True,
            samplewise_center=True,
            featurewise_std_normalization=True,
            samplewise_std_normalization=True,
            zca_whitening=True,
            rotation_range=90.,
            width_shift_range=0.1,
            height_shift_range=0.1,
            shear_range=0.5,
            zoom_range=0.2,
            channel_shift_range=0.,
            brightness_range=(1, 5),
            fill_mode='nearest',
            cval=0.5,
            horizontal_flip=True,
            vertical_flip=True)
        generator.fit(images, augment=True)

        transformed = np.copy(images)
        for i, im in enumerate(transformed):
            transformed[i] = generator.random_transform(im)
        transformed = generator.standardize(transformed)
コード例 #4
0
def test_dataframe_iterator_class_mode_categorical_multi_label(
        all_test_images, tmpdir):
    # save the images in the paths
    filenames = []
    count = 0
    for test_images in all_test_images:
        for im in test_images:
            filename = 'image-{}.png'.format(count)
            im.save(str(tmpdir / filename))
            filenames.append(filename)
            count += 1
    label_opt = ['a', 'b', ['a'], ['b'], ['a', 'b'], ['b', 'a']]
    df = pd.DataFrame({
        "filename":
        filenames,
        "class":
        [random.choice(label_opt) for _ in filenames[:-2]] + ['b', 'a']
    })
    generator = image_data_generator.ImageDataGenerator()
    df_iterator = generator.flow_from_dataframe(df, str(tmpdir))
    batch_x, batch_y = next(df_iterator)
    assert isinstance(batch_x, np.ndarray)
    assert len(batch_x.shape) == 4
    assert isinstance(batch_y, np.ndarray)
    assert batch_y.shape == (len(batch_x), 2)
    for labels in batch_y:
        assert all(label in {0, 1} for label in labels)

    # on first 3 batches
    df = pd.DataFrame({
        "filename":
        filenames,
        "class": [['b', 'a']] + ['b'] + [['c']] +
        [random.choice(label_opt) for _ in filenames[:-3]]
    })
    generator = image_data_generator.ImageDataGenerator()
    df_iterator = generator.flow_from_dataframe(df, str(tmpdir), shuffle=False)
    batch_x, batch_y = next(df_iterator)
    assert isinstance(batch_x, np.ndarray)
    assert len(batch_x.shape) == 4
    assert isinstance(batch_y, np.ndarray)
    assert batch_y.shape == (len(batch_x), 3)
    for labels in batch_y:
        assert all(label in {0, 1} for label in labels)
    assert (batch_y[0] == np.array([1, 1, 0])).all()
    assert (batch_y[1] == np.array([0, 1, 0])).all()
    assert (batch_y[2] == np.array([0, 0, 1])).all()
コード例 #5
0
def test_random_transforms():
    x = np.random.random((2, 28, 28))
    # Test get_random_transform with predefined seed
    seed = 1
    generator = image_data_generator.ImageDataGenerator(
        rotation_range=90.,
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=0.5,
        zoom_range=0.2,
        channel_shift_range=0.1,
        brightness_range=(1, 5),
        horizontal_flip=True,
        vertical_flip=True)
    transform_dict = generator.get_random_transform(x.shape, seed)
    transform_dict2 = generator.get_random_transform(x.shape, seed * 2)
    assert transform_dict['theta'] != 0
    assert transform_dict['theta'] != transform_dict2['theta']
    assert transform_dict['tx'] != 0
    assert transform_dict['tx'] != transform_dict2['tx']
    assert transform_dict['ty'] != 0
    assert transform_dict['ty'] != transform_dict2['ty']
    assert transform_dict['shear'] != 0
    assert transform_dict['shear'] != transform_dict2['shear']
    assert transform_dict['zx'] != 0
    assert transform_dict['zx'] != transform_dict2['zx']
    assert transform_dict['zy'] != 0
    assert transform_dict['zy'] != transform_dict2['zy']
    assert transform_dict['channel_shift_intensity'] != 0
    assert (transform_dict['channel_shift_intensity'] !=
            transform_dict2['channel_shift_intensity'])
    assert transform_dict['brightness'] != 0
    assert transform_dict['brightness'] != transform_dict2['brightness']

    # Test get_random_transform without any randomness
    generator = image_data_generator.ImageDataGenerator()
    transform_dict = generator.get_random_transform(x.shape, seed)
    assert transform_dict['theta'] == 0
    assert transform_dict['tx'] == 0
    assert transform_dict['ty'] == 0
    assert transform_dict['shear'] == 0
    assert transform_dict['zx'] == 1
    assert transform_dict['zy'] == 1
    assert transform_dict['channel_shift_intensity'] is None
    assert transform_dict['brightness'] is None
コード例 #6
0
def test_fit_rescale(all_test_images):
    rescale = 1. / 255

    for test_images in all_test_images:
        img_list = []
        for im in test_images:
            img_list.append(utils.img_to_array(im)[None, ...])
        images = np.vstack(img_list)

        # featurewise_center test
        generator = image_data_generator.ImageDataGenerator(
            rescale=rescale,
            featurewise_center=True,
            dtype='float64')
        generator.fit(images)
        batch = generator.flow(images, batch_size=8).next()
        assert abs(np.mean(batch)) < 1e-6

        # featurewise_std_normalization test
        generator = image_data_generator.ImageDataGenerator(
            rescale=rescale,
            featurewise_center=True,
            featurewise_std_normalization=True,
            dtype='float64')
        generator.fit(images)
        batch = generator.flow(images, batch_size=8).next()
        assert abs(np.mean(batch)) < 1e-6
        assert abs(1 - np.std(batch)) < 1e-5

        # zca_whitening test
        generator = image_data_generator.ImageDataGenerator(
            rescale=rescale,
            featurewise_center=True,
            zca_whitening=True,
            dtype='float64')
        generator.fit(images)
        batch = generator.flow(images, batch_size=8).next()
        batch = np.reshape(batch,
                           (batch.shape[0],
                            batch.shape[1] * batch.shape[2] * batch.shape[3]))
        # Y * Y_T = n * I, where Y = W * X
        identity = np.dot(batch, batch.T) / batch.shape[0]
        assert ((np.abs(identity) - np.identity(identity.shape[0]))
                < 1e-6).all()
コード例 #7
0
def test_dataframe_iterator_classes_indices_order(all_test_images, tmpdir):
    # save the images in the paths
    count = 0
    filenames = []
    for test_images in all_test_images:
        for im in test_images:
            filename = 'image-{}.png'.format(count)
            im.save(str(tmpdir / filename))
            filenames.append(filename)
            count += 1

    # Test the class_indices without classes input
    generator = image_data_generator.ImageDataGenerator()
    label_opt = ['a', 'b', ['a'], ['b'], ['a', 'b'], ['b', 'a']]
    df_f = pd.DataFrame({
        "filename":
        filenames,
        "class":
        ['a', 'b'] + [random.choice(label_opt) for _ in filenames[:-2]]
    })
    flow_forward_iter = generator.flow_from_dataframe(df_f, str(tmpdir))
    label_rev = ['b', 'a', ['b'], ['a'], ['b', 'a'], ['a', 'b']]
    df_r = pd.DataFrame({
        "filename":
        filenames,
        "class":
        ['b', 'a'] + [random.choice(label_rev) for _ in filenames[:-2]]
    })
    flow_backward_iter = generator.flow_from_dataframe(df_r, str(tmpdir))

    # check class_indices
    assert flow_forward_iter.class_indices == flow_backward_iter.class_indices

    # Test the class_indices with classes input
    generator_2 = image_data_generator.ImageDataGenerator()
    df_f2 = pd.DataFrame([['data/A.jpg', 'A'], ['data/B.jpg', 'B']],
                         columns=['filename', 'class'])
    flow_forward = generator_2.flow_from_dataframe(df_f2, classes=['A', 'B'])
    df_b2 = pd.DataFrame([['data/A.jpg', 'A'], ['data/B.jpg', 'B']],
                         columns=['filename', 'class'])
    flow_backward = generator_2.flow_from_dataframe(df_b2, classes=['B', 'A'])

    # check class_indices
    assert flow_forward.class_indices != flow_backward.class_indices
コード例 #8
0
def test_image_data_generator_with_validation_split(all_test_images):
    for test_images in all_test_images:
        img_list = []
        for im in test_images:
            img_list.append(utils.img_to_array(im)[None, ...])

        images = np.vstack(img_list)
        labels = np.concatenate([
            np.zeros((int(len(images) / 2), )),
            np.ones((int(len(images) / 2), ))
        ])
        generator = image_data_generator.ImageDataGenerator(
            validation_split=0.5)

        # training and validation sets would have different
        # number of classes, because labels are sorted
        with pytest.raises(ValueError,
                           match='Training and validation subsets '
                           'have different number of classes after '
                           'the split.*'):
            generator.flow(images,
                           labels,
                           shuffle=False,
                           batch_size=10,
                           subset='validation')

        labels = np.concatenate([
            np.zeros((int(len(images) / 4), )),
            np.ones((int(len(images) / 4), )),
            np.zeros((int(len(images) / 4), )),
            np.ones((int(len(images) / 4), ))
        ])

        seq = generator.flow(images,
                             labels,
                             shuffle=False,
                             batch_size=10,
                             subset='validation')

        x, y = seq[0]
        assert 2 == len(np.unique(y))

        seq = generator.flow(images,
                             labels,
                             shuffle=False,
                             batch_size=10,
                             subset='training')
        x2, y2 = seq[0]
        assert 2 == len(np.unique(y2))

        with pytest.raises(ValueError):
            generator.flow(images,
                           np.arange(images.shape[0]),
                           shuffle=False,
                           batch_size=3,
                           subset='foo')
コード例 #9
0
def test_directory_iterator_with_validation_split(all_test_images,
                                                  validation_split,
                                                  num_training):
    num_classes = 2
    tmp_folder = tempfile.mkdtemp(prefix='test_images')

    # create folders and subfolders
    paths = []
    for cl in range(num_classes):
        class_directory = 'class-{}'.format(cl)
        classpaths = [
            class_directory,
            os.path.join(class_directory, 'subfolder-1'),
            os.path.join(class_directory, 'subfolder-2'),
            os.path.join(class_directory, 'subfolder-1', 'sub-subfolder')
        ]
        for path in classpaths:
            os.mkdir(os.path.join(tmp_folder, path))
        paths.append(classpaths)

    # save the images in the paths
    count = 0
    filenames = []
    for test_images in all_test_images:
        for im in test_images:
            # rotate image class
            im_class = count % num_classes
            # rotate subfolders
            classpaths = paths[im_class]
            filename = os.path.join(classpaths[count % len(classpaths)],
                                    'image-{}.png'.format(count))
            filenames.append(filename)
            im.save(os.path.join(tmp_folder, filename))
            count += 1

    # create iterator
    generator = image_data_generator.ImageDataGenerator(
        validation_split=validation_split)

    with pytest.raises(ValueError):
        generator.flow_from_directory(tmp_folder, subset='foo')

    train_iterator = generator.flow_from_directory(tmp_folder,
                                                   subset='training')
    assert train_iterator.samples == num_training

    valid_iterator = generator.flow_from_directory(tmp_folder,
                                                   subset='validation')
    assert valid_iterator.samples == count - num_training

    # check number of classes and images
    assert len(train_iterator.class_indices) == num_classes
    assert len(train_iterator.classes) == num_training
    assert len(set(train_iterator.filenames) & set(filenames)) == num_training

    shutil.rmtree(tmp_folder)
コード例 #10
0
def test_dataframe_iterator_class_mode_raw(all_test_images, tmpdir):
    # save the images in the paths
    filenames = []
    count = 0
    for test_images in all_test_images:
        for im in test_images:
            filename = 'image-{}.png'.format(count)
            im.save(str(tmpdir / filename))
            filenames.append(filename)
            count += 1
    # case for 1D output
    df = pd.DataFrame({
        "filename": filenames
    }).assign(output_0=np.random.uniform(size=len(filenames)),
              output_1=np.random.uniform(size=len(filenames)))
    df_iterator = image_data_generator.ImageDataGenerator(
    ).flow_from_dataframe(df,
                          y_col='output_0',
                          directory=str(tmpdir),
                          batch_size=3,
                          shuffle=False,
                          class_mode='raw')
    batch_x, batch_y = next(df_iterator)
    assert isinstance(batch_x, np.ndarray)
    assert len(batch_x.shape) == 4
    assert isinstance(batch_y, np.ndarray)
    assert batch_y.shape == (3, )
    assert np.array_equal(batch_y, df['output_0'].values[:3])
    # case with a 2D output
    df_iterator = image_data_generator.ImageDataGenerator(
    ).flow_from_dataframe(df,
                          y_col=['output_0', 'output_1'],
                          directory=str(tmpdir),
                          batch_size=3,
                          shuffle=False,
                          class_mode='raw')
    batch_x, batch_y = next(df_iterator)
    assert isinstance(batch_x, np.ndarray)
    assert len(batch_x.shape) == 4
    assert isinstance(batch_y, np.ndarray)
    assert batch_y.shape == (3, 2)
    assert np.array_equal(batch_y, df[['output_0', 'output_1']].values[:3])
コード例 #11
0
def test_dataframe_iterator_sample_weights(all_test_images, tmpdir):
    # save the images in the paths
    count = 0
    filenames = []
    for test_images in all_test_images:
        for im in test_images:
            filename = 'image-{}.png'.format(count)
            im.save(str(tmpdir / filename))
            filenames.append(filename)
            count += 1
    df = pd.DataFrame({"filename": filenames})
    df['weight'] = ([2, 5] * len(df))[:len(df)]
    generator = image_data_generator.ImageDataGenerator()
    df_iterator = generator.flow_from_dataframe(df, str(tmpdir),
                                                x_col="filename",
                                                y_col=None,
                                                shuffle=False,
                                                batch_size=5,
                                                weight_col='weight',
                                                class_mode="input")

    batch = next(df_iterator)
    assert len(batch) == 3  # (x, y, weights)
    # check if input and output have the same shape and they're the same
    assert(batch[0].all() == batch[1].all())
    # check if the input and output images are not the same numpy array
    input_img = batch[0][0]
    output_img = batch[1][0]
    output_img[0][0][0] += 1
    assert input_img[0][0][0] != output_img[0][0][0]
    assert np.array_equal(np.array([2, 5, 2, 5, 2]), batch[2])

    # fail
    df['weight'] = (['2', '5'] * len(df))[:len(df)]
    with pytest.raises(TypeError):
        image_data_generator.ImageDataGenerator().flow_from_dataframe(
            df,
            weight_col='weight',
            class_mode="input"
        )
コード例 #12
0
def test_deterministic_transform():
    x = np.ones((32, 32, 3))
    generator = image_data_generator.ImageDataGenerator(rotation_range=90,
                                                        fill_mode='constant')
    x = np.random.random((32, 32, 3))
    assert np.allclose(generator.apply_transform(x, {'flip_vertical': True}),
                       x[::-1, :, :])
    assert np.allclose(generator.apply_transform(x, {'flip_horizontal': True}),
                       x[:, ::-1, :])
    x = np.ones((3, 3, 3))
    x_rotated = np.array([[[0., 0., 0.], [0., 0., 0.], [1., 1., 1.]],
                          [[0., 0., 0.], [1., 1., 1.], [1., 1., 1.]],
                          [[0., 0., 0.], [0., 0., 0.], [1., 1., 1.]]])
    assert np.allclose(generator.apply_transform(x, {'theta': 45}), x_rotated)
コード例 #13
0
def test_dataframe_iterator_with_validation_split(all_test_images,
                                                  validation_split,
                                                  num_training, tmpdir):
    num_classes = 2

    # save the images in the tmpdir
    count = 0
    filenames = []
    filenames_without = []
    for test_images in all_test_images:
        for im in test_images:
            filename = "image-{}.png".format(count)
            filename_without = "image-{}".format(count)
            filenames.append(filename)
            filenames_without.append(filename_without)
            im.save(str(tmpdir / filename))
            count += 1

    df = pd.DataFrame({
        "filename": filenames,
        "class": [str(random.randint(0, 1)) for _ in filenames]
    })
    # create iterator
    generator = image_data_generator.ImageDataGenerator(
        validation_split=validation_split)
    df_sparse_iterator = generator.flow_from_dataframe(df,
                                                       str(tmpdir),
                                                       class_mode="sparse")
    if np.isnan(next(df_sparse_iterator)[:][1]).any():
        raise ValueError('Invalid values.')

    with pytest.raises(ValueError):
        generator.flow_from_dataframe(df, tmpdir, subset='foo')

    train_iterator = generator.flow_from_dataframe(df,
                                                   str(tmpdir),
                                                   subset='training')
    assert train_iterator.samples == num_training

    valid_iterator = generator.flow_from_dataframe(df,
                                                   str(tmpdir),
                                                   subset='validation')
    assert valid_iterator.samples == count - num_training

    # check number of classes and images
    assert len(train_iterator.class_indices) == num_classes
    assert len(train_iterator.classes) == num_training
    assert len(set(train_iterator.filenames) & set(filenames)) == num_training
コード例 #14
0
def test_dataframe_iterator_with_subdirs(all_test_images, tmpdir):
    num_classes = 2

    # create folders and subfolders
    paths = []
    for cl in range(num_classes):
        class_directory = 'class-{}'.format(cl)
        classpaths = [
            class_directory,
            os.path.join(class_directory, 'subfolder-1'),
            os.path.join(class_directory, 'subfolder-2'),
            os.path.join(class_directory, 'subfolder-1', 'sub-subfolder')
        ]
        for path in classpaths:
            tmpdir.join(path).mkdir()
        paths.append(classpaths)

    # save the images in the paths
    count = 0
    filenames = []
    for test_images in all_test_images:
        for im in test_images:
            # rotate image class
            im_class = count % num_classes
            # rotate subfolders
            classpaths = paths[im_class]
            filename = os.path.join(
                classpaths[count % len(classpaths)],
                'image-{}.png'.format(count))
            filenames.append(filename)
            im.save(str(tmpdir / filename))
            count += 1

    # create dataframe
    classes = np.random.randint(num_classes, size=len(filenames))
    classes = [str(c) for c in classes]
    df = pd.DataFrame({"filename": filenames,
                       "class": classes})

    # create iterator
    generator = image_data_generator.ImageDataGenerator()
    df_iterator = generator.flow_from_dataframe(
        df, str(tmpdir), class_mode='binary')

    # Test the number of items in iterator
    assert df_iterator.n == len(filenames)
    assert set(df_iterator.filenames) == set(filenames)
コード例 #15
0
def test_image_data_generator_invalid_data():
    generator = image_data_generator.ImageDataGenerator(
        featurewise_center=True,
        samplewise_center=True,
        featurewise_std_normalization=True,
        samplewise_std_normalization=True,
        zca_whitening=True,
        data_format='channels_last')
    # Test fit with invalid data
    with pytest.raises(ValueError):
        x = np.random.random((3, 10, 10))
        generator.fit(x)

    # Test flow with invalid data
    with pytest.raises(ValueError):
        x = np.random.random((32, 10, 10))
        generator.flow(np.arange(x.shape[0]))
コード例 #16
0
def test_dataframe_iterator_with_custom_indexed_dataframe(
        all_test_images, tmpdir):
    num_classes = 2

    # save the images in the tmpdir
    count = 0
    filenames = []
    for test_images in all_test_images:
        for im in test_images:
            filename = "image-{}.png".format(count)
            filenames.append(filename)
            im.save(str(tmpdir / filename))
            count += 1

    # create dataframes
    classes = np.random.randint(num_classes, size=len(filenames))
    classes = [str(c) for c in classes]
    df = pd.DataFrame({"filename": filenames, "class": classes})
    df2 = pd.DataFrame({
        "filename": filenames,
        "class": classes
    },
                       index=np.arange(1,
                                       len(filenames) + 1))
    df3 = pd.DataFrame({
        "filename": filenames,
        "class": classes
    },
                       index=filenames)

    # create iterators
    seed = 1
    generator = image_data_generator.ImageDataGenerator()
    df_iterator = generator.flow_from_dataframe(df, str(tmpdir), seed=seed)
    df2_iterator = generator.flow_from_dataframe(df2, str(tmpdir), seed=seed)
    df3_iterator = generator.flow_from_dataframe(df3, str(tmpdir), seed=seed)

    # Test all iterators return same pairs of arrays
    for _ in range(len(filenames)):
        a1, c1 = next(df_iterator)
        a2, c2 = next(df2_iterator)
        a3, c3 = next(df3_iterator)
        assert np.array_equal(a1, a2)
        assert np.array_equal(a1, a3)
        assert np.array_equal(c1, c2)
        assert np.array_equal(c1, c3)
コード例 #17
0
def test_dataframe_iterator_class_mode_input(all_test_images, tmpdir):
    # save the images in the paths
    count = 0
    filenames = []
    for test_images in all_test_images:
        for im in test_images:
            filename = 'image-{}.png'.format(count)
            im.save(str(tmpdir / filename))
            filenames.append(filename)
            count += 1
    df = pd.DataFrame({"filename": filenames})
    generator = image_data_generator.ImageDataGenerator()
    df_autoencoder_iterator = generator.flow_from_dataframe(df,
                                                            str(tmpdir),
                                                            x_col="filename",
                                                            y_col=None,
                                                            class_mode="input")

    batch = next(df_autoencoder_iterator)

    # check if input and output have the same shape and they're the same
    assert np.allclose(batch[0], batch[1])
    # check if the input and output images are not the same numpy array
    input_img = batch[0][0]
    output_img = batch[1][0]
    output_img[0][0][0] += 1
    assert (input_img[0][0][0] != output_img[0][0][0])

    df_autoencoder_iterator = generator.flow_from_dataframe(df,
                                                            str(tmpdir),
                                                            x_col="filename",
                                                            y_col="class",
                                                            class_mode="input")

    batch = next(df_autoencoder_iterator)

    # check if input and output have the same shape and they're the same
    assert (batch[0].all() == batch[1].all())
    # check if the input and output images are not the same numpy array
    input_img = batch[0][0]
    output_img = batch[1][0]
    output_img[0][0][0] += 1
    assert (input_img[0][0][0] != output_img[0][0][0])
コード例 #18
0
def test_dataframe_iterator_validate_filenames(all_test_images, tmpdir):
    # save the images in the paths
    count = 0
    filenames = []
    for test_images in all_test_images:
        for im in test_images:
            filename = 'image-{}.png'.format(count)
            im.save(str(tmpdir / filename))
            filenames.append(filename)
            count += 1
    df = pd.DataFrame({"filename": filenames + ['test.jpp', 'test.jpg']})
    generator = image_data_generator.ImageDataGenerator()
    df_iterator = generator.flow_from_dataframe(df,
                                                str(tmpdir),
                                                class_mode="input")
    assert len(df_iterator.filenames) == len(df['filename']) - 2
    df_iterator = generator.flow_from_dataframe(df,
                                                str(tmpdir),
                                                class_mode="input",
                                                validate_filenames=False)
    assert len(df_iterator.filenames) == len(df['filename'])
コード例 #19
0
def test_directory_iterator_class_mode_input(all_test_images, tmpdir):
    tmpdir.join('class-1').mkdir()

    # save the images in the paths
    count = 0
    for test_images in all_test_images:
        for im in test_images:
            filename = str(tmpdir / 'class-1' / 'image-{}.png'.format(count))
            im.save(filename)
            count += 1

    # create iterator
    generator = image_data_generator.ImageDataGenerator()
    dir_iterator = generator.flow_from_directory(str(tmpdir),
                                                 class_mode='input')
    batch = next(dir_iterator)

    # check if input and output have the same shape
    assert (batch[0].shape == batch[1].shape)
    # check if the input and output images are not the same numpy array
    input_img = batch[0][0]
    output_img = batch[1][0]
    output_img[0][0][0] += 1
    assert (input_img[0][0][0] != output_img[0][0][0])
コード例 #20
0
        img_name = img_n.format(img_counter)
        imwrite(img_name, frame)
        print("{} written!".format(img_name))
        img_counter += 1
        break
cam.release()
destroyAllWindows()

# load the image
img = load_img(img_n)
# convert to numpy array
data = img_to_array(img)
# expand dimension to one sample
samples = expand_dims(data, 0)
# create image data augmentation generator
datagen = image_data_generator.ImageDataGenerator(zoom_range=[0.5, 1.0])
# prepare iterator
it = datagen.flow(samples, batch_size=1, save_to_dir=dir)
# create image data augmentation generator
for i in range(10):
    # generate batch of images
    batch = it.next()
    # convert to unsigned integers for viewing
    image = batch[0].astype('uint8')
datagen = image_data_generator.ImageDataGenerator(rotation_range=90)
# prepare iterator
it = datagen.flow(samples, batch_size=1, save_to_dir=dir)
# create image data augmentation generator
for i in range(10):
    # generate batch of images
    batch = it.next()
コード例 #21
0
def test_dataframe_iterator_absolute_path(all_test_images, tmpdir):

    # save the images in the tmpdir
    count = 0
    file_paths = []
    for test_images in all_test_images:
        for im in test_images:
            filename = "image-{:0>5}.png".format(count)
            file_path = str(tmpdir / filename)
            file_paths.append(file_path)
            im.save(file_path)
            count += 1

    # prepare an image with a forbidden extension.
    file_path_fbd = str(tmpdir / 'image-forbid.fbd')
    shutil.copy(file_path, file_path_fbd)

    # create dataframes
    classes = np.random.randint(2, size=len(file_paths))
    classes = [str(c) for c in classes]
    df = pd.DataFrame({"filename": file_paths})
    df2 = pd.DataFrame({"filename": file_paths, "class": classes})
    df3 = pd.DataFrame({"filename": ['image-not-exist.png'] + file_paths})
    df4 = pd.DataFrame({"filename": file_paths + [file_path_fbd]})

    # create iterators
    generator = image_data_generator.ImageDataGenerator()
    df_iterator = generator.flow_from_dataframe(df,
                                                None,
                                                class_mode=None,
                                                shuffle=False,
                                                batch_size=1)
    df2_iterator = generator.flow_from_dataframe(df2,
                                                 None,
                                                 class_mode='binary',
                                                 shuffle=False,
                                                 batch_size=1)
    df3_iterator = generator.flow_from_dataframe(df3,
                                                 None,
                                                 class_mode=None,
                                                 shuffle=False,
                                                 batch_size=1)
    df4_iterator = generator.flow_from_dataframe(df4,
                                                 None,
                                                 class_mode=None,
                                                 shuffle=False,
                                                 batch_size=1)

    validation_split = 0.2
    generator_split = image_data_generator.ImageDataGenerator(
        validation_split=validation_split)
    df_train_iterator = generator_split.flow_from_dataframe(df,
                                                            None,
                                                            class_mode=None,
                                                            shuffle=False,
                                                            subset='training',
                                                            batch_size=1)
    df_val_iterator = generator_split.flow_from_dataframe(df,
                                                          None,
                                                          class_mode=None,
                                                          shuffle=False,
                                                          subset='validation',
                                                          batch_size=1)

    # Test the number of items in iterators
    assert df_iterator.n == len(file_paths)
    assert df2_iterator.n == len(file_paths)
    assert df3_iterator.n == len(file_paths)
    assert df4_iterator.n == len(file_paths)
    assert df_val_iterator.n == int(validation_split * len(file_paths))
    assert df_train_iterator.n == len(file_paths) - df_val_iterator.n

    # Test flow_from_dataframe
    for i in range(len(file_paths)):
        a1 = next(df_iterator)
        a2, _ = next(df2_iterator)
        a3 = next(df3_iterator)
        a4 = next(df4_iterator)

        if i < df_val_iterator.n:
            a5 = next(df_val_iterator)
        else:
            a5 = next(df_train_iterator)

        assert np.array_equal(a1, a2)
        assert np.array_equal(a1, a3)
        assert np.array_equal(a1, a4)
        assert np.array_equal(a1, a5)
コード例 #22
0
def test_image_data_generator_fit():
    generator = image_data_generator.ImageDataGenerator(
        featurewise_center=True,
        samplewise_center=True,
        featurewise_std_normalization=True,
        samplewise_std_normalization=True,
        zca_whitening=True,
        rotation_range=90.,
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=0.5,
        zoom_range=(0.2, 0.2),
        channel_shift_range=0.,
        brightness_range=(1, 5),
        fill_mode='nearest',
        cval=0.5,
        horizontal_flip=True,
        vertical_flip=True,
        interpolation_order=1,
        data_format='channels_last')
    x = np.random.random((32, 10, 10, 3))
    generator.fit(x, augment=True)
    # Test grayscale
    x = np.random.random((32, 10, 10, 1))
    generator.fit(x)
    # Test RBG
    x = np.random.random((32, 10, 10, 3))
    generator.fit(x)
    # Test more samples than dims
    x = np.random.random((32, 4, 4, 1))
    generator.fit(x)
    generator = image_data_generator.ImageDataGenerator(
        featurewise_center=True,
        samplewise_center=True,
        featurewise_std_normalization=True,
        samplewise_std_normalization=True,
        zca_whitening=True,
        rotation_range=90.,
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=0.5,
        zoom_range=(0.2, 0.2),
        channel_shift_range=0.,
        brightness_range=(1, 5),
        fill_mode='nearest',
        cval=0.5,
        horizontal_flip=True,
        vertical_flip=True,
        interpolation_order=1,
        data_format='channels_first')
    x = np.random.random((32, 10, 10, 3))
    generator.fit(x, augment=True)
    # Test grayscale
    x = np.random.random((32, 1, 10, 10))
    generator.fit(x)
    # Test RBG
    x = np.random.random((32, 3, 10, 10))
    generator.fit(x)
    # Test more samples than dims
    x = np.random.random((32, 1, 4, 4))
    generator.fit(x)
コード例 #23
0
def test_image_data_generator_with_split_value_error():
    with pytest.raises(ValueError):
        image_data_generator.ImageDataGenerator(validation_split=5)
コード例 #24
0
def test_dataframe_iterator(all_test_images, tmpdir):
    num_classes = 2

    # save the images in the tmpdir
    count = 0
    filenames = []
    filepaths = []
    filenames_without = []
    for test_images in all_test_images:
        for im in test_images:
            filename = "image-{}.png".format(count)
            filename_without = "image-{}".format(count)
            filenames.append(filename)
            filepaths.append(os.path.join(str(tmpdir), filename))
            filenames_without.append(filename_without)
            im.save(str(tmpdir / filename))
            count += 1

    df = pd.DataFrame({
        "filename": filenames,
        "class": [str(random.randint(0, 1)) for _ in filenames],
        "filepaths": filepaths
    })

    # create iterator
    iterator = dataframe_iterator.DataFrameIterator(df, str(tmpdir))
    batch = next(iterator)
    assert len(batch) == 2
    assert isinstance(batch[0], np.ndarray)
    assert isinstance(batch[1], np.ndarray)
    generator = image_data_generator.ImageDataGenerator()
    df_iterator = generator.flow_from_dataframe(df, x_col='filepaths')
    df_iterator_dir = generator.flow_from_dataframe(df, str(tmpdir))
    df_sparse_iterator = generator.flow_from_dataframe(df,
                                                       str(tmpdir),
                                                       class_mode="sparse")
    assert not np.isnan(df_sparse_iterator.classes).any()
    # check number of classes and images
    assert len(df_iterator.class_indices) == num_classes
    assert len(df_iterator.classes) == count
    assert set(df_iterator.filenames) == set(filepaths)
    assert len(df_iterator_dir.class_indices) == num_classes
    assert len(df_iterator_dir.classes) == count
    assert set(df_iterator_dir.filenames) == set(filenames)
    # test without shuffle
    _, batch_y = next(
        generator.flow_from_dataframe(df,
                                      str(tmpdir),
                                      shuffle=False,
                                      class_mode="sparse"))
    assert (batch_y == df['class'].astype('float')[:len(batch_y)]).all()
    # Test invalid use cases
    with pytest.raises(ValueError):
        generator.flow_from_dataframe(df, str(tmpdir), color_mode='cmyk')
    with pytest.raises(ValueError):
        generator.flow_from_dataframe(df, str(tmpdir), class_mode='output')
    with pytest.warns(DeprecationWarning):
        generator.flow_from_dataframe(df, str(tmpdir), has_ext=True)
    with pytest.warns(DeprecationWarning):
        generator.flow_from_dataframe(df, str(tmpdir), has_ext=False)

    def preprocessing_function(x):
        """This will fail if not provided by a Numpy array.
        Note: This is made to enforce backward compatibility.
        """

        assert x.shape == (26, 26, 3)
        assert type(x) is np.ndarray

        return np.zeros_like(x)

    # Test usage as Sequence
    generator = image_data_generator.ImageDataGenerator(
        preprocessing_function=preprocessing_function)
    dir_seq = generator.flow_from_dataframe(df,
                                            str(tmpdir),
                                            target_size=(26, 26),
                                            color_mode='rgb',
                                            batch_size=3,
                                            class_mode='categorical')
    assert len(dir_seq) == np.ceil(count / 3)
    x1, y1 = dir_seq[1]
    assert x1.shape == (3, 26, 26, 3)
    assert y1.shape == (3, num_classes)
    x1, y1 = dir_seq[5]
    assert (x1 == 0).all()

    with pytest.raises(ValueError):
        x1, y1 = dir_seq[9]
コード例 #25
0
def test_image_data_generator_flow(all_test_images, tmpdir):
    for test_images in all_test_images:
        img_list = []
        for im in test_images:
            img_list.append(utils.img_to_array(im)[None, ...])

        images = np.vstack(img_list)
        dsize = images.shape[0]
        generator = image_data_generator.ImageDataGenerator(
            featurewise_center=True,
            samplewise_center=True,
            featurewise_std_normalization=True,
            samplewise_std_normalization=True,
            zca_whitening=True,
            rotation_range=90.,
            width_shift_range=0.1,
            height_shift_range=0.1,
            shear_range=0.5,
            zoom_range=0.2,
            channel_shift_range=0.,
            brightness_range=(1, 5),
            fill_mode='nearest',
            cval=0.5,
            horizontal_flip=True,
            vertical_flip=True,
            interpolation_order=1)

        generator.flow(images,
                       np.arange(images.shape[0]),
                       shuffle=False,
                       save_to_dir=str(tmpdir),
                       batch_size=3)

        generator.flow(images,
                       np.arange(images.shape[0]),
                       shuffle=False,
                       sample_weight=np.arange(images.shape[0]) + 1,
                       save_to_dir=str(tmpdir),
                       batch_size=3)

        # Test with `shuffle=True`
        generator.flow(images,
                       np.arange(images.shape[0]),
                       shuffle=True,
                       save_to_dir=str(tmpdir),
                       batch_size=3,
                       seed=42)

        # Test without y
        generator.flow(images,
                       None,
                       shuffle=True,
                       save_to_dir=str(tmpdir),
                       batch_size=3)

        # Test with a single miscellaneous input data array
        x_misc1 = np.random.random(dsize)
        generator.flow((images, x_misc1),
                       np.arange(dsize),
                       shuffle=False,
                       batch_size=2)

        # Test with two miscellaneous inputs
        x_misc2 = np.random.random((dsize, 3, 3))
        generator.flow((images, [x_misc1, x_misc2]),
                       np.arange(dsize),
                       shuffle=False,
                       batch_size=2)

        # Test cases with `y = None`
        generator.flow(images, None, batch_size=3)
        generator.flow((images, x_misc1), None, batch_size=3, shuffle=False)
        generator.flow((images, [x_misc1, x_misc2]),
                       None,
                       batch_size=3,
                       shuffle=False)
        generator = image_data_generator.ImageDataGenerator(
            validation_split=0.2)
        generator.flow(images, batch_size=3)

        # Test some failure cases:
        x_misc_err = np.random.random((dsize + 1, 3, 3))
        with pytest.raises(ValueError) as e_info:
            generator.flow((images, x_misc_err),
                           np.arange(dsize),
                           batch_size=3)
        assert str(e_info.value).find('All of the arrays in') != -1

        with pytest.raises(ValueError) as e_info:
            generator.flow((images, x_misc1),
                           np.arange(dsize + 1),
                           batch_size=3)
        assert str(
            e_info.value).find('`x` (images tensor) and `y` (labels) ') != -1

        # Test `flow` behavior as Sequence
        generator.flow(images,
                       np.arange(images.shape[0]),
                       shuffle=False,
                       save_to_dir=str(tmpdir),
                       batch_size=3)

        # Test with `shuffle=True`
        generator.flow(images,
                       np.arange(images.shape[0]),
                       shuffle=True,
                       save_to_dir=str(tmpdir),
                       batch_size=3,
                       seed=123)

    # test order_interpolation
    labels = np.array([[2, 2, 0, 2, 2], [1, 3, 2, 3, 1], [2, 1, 0, 1, 2],
                       [3, 1, 0, 2, 0], [3, 1, 3, 2, 1]])

    label_generator = image_data_generator.ImageDataGenerator(
        rotation_range=90., interpolation_order=0)
    label_generator.flow(x=labels[np.newaxis, ..., np.newaxis], seed=123)
コード例 #26
0
def test_dataframe_iterator_class_mode_multi_output(all_test_images, tmpdir):
    # save the images in the paths
    filenames = []
    count = 0
    for test_images in all_test_images:
        for im in test_images:
            filename = 'image-{}.png'.format(count)
            im.save(str(tmpdir / filename))
            filenames.append(filename)
            count += 1
    # fit both outputs are a single number
    df = pd.DataFrame({
        "filename": filenames
    }).assign(output_0=np.random.uniform(size=len(filenames)),
              output_1=np.random.uniform(size=len(filenames)))
    df_iterator = image_data_generator.ImageDataGenerator(
    ).flow_from_dataframe(df,
                          y_col=['output_0', 'output_1'],
                          directory=str(tmpdir),
                          batch_size=3,
                          shuffle=False,
                          class_mode='multi_output')
    batch_x, batch_y = next(df_iterator)
    assert isinstance(batch_x, np.ndarray)
    assert len(batch_x.shape) == 4
    assert isinstance(batch_y, list)
    assert len(batch_y) == 2
    assert np.array_equal(batch_y[0], np.array(df['output_0'].tolist()[:3]))
    assert np.array_equal(batch_y[1], np.array(df['output_1'].tolist()[:3]))
    # if one of the outputs is a 1D array
    df['output_1'] = [
        np.random.uniform(size=(2, 2, 1)).flatten() for _ in range(len(df))
    ]
    df_iterator = image_data_generator.ImageDataGenerator(
    ).flow_from_dataframe(df,
                          y_col=['output_0', 'output_1'],
                          directory=str(tmpdir),
                          batch_size=3,
                          shuffle=False,
                          class_mode='multi_output')
    batch_x, batch_y = next(df_iterator)
    assert isinstance(batch_x, np.ndarray)
    assert len(batch_x.shape) == 4
    assert isinstance(batch_y, list)
    assert len(batch_y) == 2
    assert np.array_equal(batch_y[0], np.array(df['output_0'].tolist()[:3]))
    assert np.array_equal(batch_y[1], np.array(df['output_1'].tolist()[:3]))
    # if one of the outputs is a 2D array
    df['output_1'] = [
        np.random.uniform(size=(2, 2, 1)) for _ in range(len(df))
    ]
    df_iterator = image_data_generator.ImageDataGenerator(
    ).flow_from_dataframe(df,
                          y_col=['output_0', 'output_1'],
                          directory=str(tmpdir),
                          batch_size=3,
                          shuffle=False,
                          class_mode='multi_output')
    batch_x, batch_y = next(df_iterator)
    assert isinstance(batch_x, np.ndarray)
    assert len(batch_x.shape) == 4
    assert isinstance(batch_y, list)
    assert len(batch_y) == 2
    assert np.array_equal(batch_y[0], np.array(df['output_0'].tolist()[:3]))
    assert np.array_equal(batch_y[1], np.array(df['output_1'].tolist()[:3]))
    # fail if single column
    with pytest.raises(TypeError):
        image_data_generator.ImageDataGenerator().flow_from_dataframe(
            df,
            y_col='output_0',
            directory=str(tmpdir),
            class_mode='multi_output')
コード例 #27
0
def test_valid_args():
    with pytest.raises(ValueError):
        image_data_generator.ImageDataGenerator(brightness_range=0.1)
コード例 #28
0
def test_directory_iterator(all_test_images, tmpdir):
    num_classes = 2

    # create folders and subfolders
    paths = []
    for cl in range(num_classes):
        class_directory = 'class-{}'.format(cl)
        classpaths = [
            class_directory,
            os.path.join(class_directory, 'subfolder-1'),
            os.path.join(class_directory, 'subfolder-2'),
            os.path.join(class_directory, 'subfolder-1', 'sub-subfolder')
        ]
        for path in classpaths:
            tmpdir.join(path).mkdir()
        paths.append(classpaths)

    # save the images in the paths
    count = 0
    filenames = []
    for test_images in all_test_images:
        for im in test_images:
            # rotate image class
            im_class = count % num_classes
            # rotate subfolders
            classpaths = paths[im_class]
            filename = os.path.join(classpaths[count % len(classpaths)],
                                    'image-{}.png'.format(count))
            filenames.append(filename)
            im.save(str(tmpdir / filename))
            count += 1

    # create iterator
    generator = image_data_generator.ImageDataGenerator()
    dir_iterator = generator.flow_from_directory(str(tmpdir))

    # check number of classes and images
    assert len(dir_iterator.class_indices) == num_classes
    assert len(dir_iterator.classes) == count
    assert set(dir_iterator.filenames) == set(filenames)

    # Test invalid use cases
    with pytest.raises(ValueError):
        generator.flow_from_directory(str(tmpdir), color_mode='cmyk')
    with pytest.raises(ValueError):
        generator.flow_from_directory(str(tmpdir), class_mode='output')

    def preprocessing_function(x):
        """This will fail if not provided by a Numpy array.
        Note: This is made to enforce backward compatibility.
        """

        assert x.shape == (26, 26, 3)
        assert type(x) is np.ndarray

        return np.zeros_like(x)

    # Test usage as Sequence
    generator = image_data_generator.ImageDataGenerator(
        preprocessing_function=preprocessing_function)
    dir_seq = generator.flow_from_directory(str(tmpdir),
                                            target_size=(26, 26),
                                            color_mode='rgb',
                                            batch_size=3,
                                            class_mode='categorical')
    assert len(dir_seq) == np.ceil(count / 3.)
    x1, y1 = dir_seq[1]
    assert x1.shape == (3, 26, 26, 3)
    assert y1.shape == (3, num_classes)
    x1, y1 = dir_seq[5]
    assert (x1 == 0).all()

    with pytest.raises(ValueError):
        x1, y1 = dir_seq[14]  # there are 40 images and batch size is 3