def test_dataframe_iterator_n(self, tmpdir):

        # save the images in the tmpdir
        count = 0
        filenames = []
        for test_images in self.all_test_images:
            for im in test_images:
                filename = "image-{}.png".format(count)
                filenames.append(filename)
                im.save(str(tmpdir / filename))
                count += 1

        # exclude first two items
        n_files = len(filenames)
        input_filenames = filenames[2:]

        # create dataframes
        classes = np.random.randint(2, size=len(input_filenames))
        df = pd.DataFrame({"filename": input_filenames})
        df2 = pd.DataFrame({"filename": input_filenames, "class": classes})

        # create iterators
        generator = image.ImageDataGenerator()
        df_iterator = generator.flow_from_dataframe(df,
                                                    str(tmpdir),
                                                    has_ext=True,
                                                    class_mode=None)
        df2_iterator = generator.flow_from_dataframe(df2,
                                                     str(tmpdir),
                                                     has_ext=True,
                                                     class_mode='binary')

        # Test the number of items in iterators
        assert df_iterator.n == n_files - 2
        assert df2_iterator.n == n_files - 2
Example #2
0
    def test_batch_standardize(self):
        # ImageDataGenerator.standardize should work on batches
        for test_images in self.all_test_images:
            img_list = []
            for im in test_images:
                img_list.append(image.img_to_array(im)[None, ...])

            images = np.vstack(img_list)
            generator = image.ImageDataGenerator(
                featurewise_center=True,
                samplewise_center=True,
                featurewise_std_normalization=True,
                samplewise_std_normalization=True,
                zca_whitening=True,
                rotation_range=90.,
                width_shift_range=0.1,
                height_shift_range=0.1,
                shear_range=0.5,
                zoom_range=0.2,
                channel_shift_range=0.,
                brightness_range=(1, 5),
                fill_mode='nearest',
                cval=0.5,
                horizontal_flip=True,
                vertical_flip=True)
            generator.fit(images, augment=True)

            transformed = np.copy(images)
            for i, im in enumerate(transformed):
                transformed[i] = generator.random_transform(im)
            transformed = generator.standardize(transformed)
    def test_directory_iterator_class_mode_input(self, tmpdir):
        tmpdir.join('class-1').mkdir()

        # save the images in the paths
        count = 0
        for test_images in self.all_test_images:
            for im in test_images:
                filename = str(
                    tmpdir / 'class-1' / 'image-{}.jpg'.format(count))
                im.save(filename)
                count += 1

        # create iterator
        generator = image.ImageDataGenerator()
        dir_iterator = generator.flow_from_directory(str(tmpdir),
                                                     class_mode='input')
        batch = next(dir_iterator)

        # check if input and output have the same shape
        assert(batch[0].shape == batch[1].shape)
        # check if the input and output images are not the same numpy array
        input_img = batch[0][0]
        output_img = batch[1][0]
        output_img[0][0][0] += 1
        assert(input_img[0][0][0] != output_img[0][0][0])
    def test_directory_iterator_with_validation_split(self,
                                                      validation_split,
                                                      num_training):
        num_classes = 2
        tmp_folder = tempfile.mkdtemp(prefix='test_images')

        # create folders and subfolders
        paths = []
        for cl in range(num_classes):
            class_directory = 'class-{}'.format(cl)
            classpaths = [
                class_directory,
                os.path.join(class_directory, 'subfolder-1'),
                os.path.join(class_directory, 'subfolder-2'),
                os.path.join(class_directory, 'subfolder-1', 'sub-subfolder')
            ]
            for path in classpaths:
                os.mkdir(os.path.join(tmp_folder, path))
            paths.append(classpaths)

        # save the images in the paths
        count = 0
        filenames = []
        for test_images in self.all_test_images:
            for im in test_images:
                # rotate image class
                im_class = count % num_classes
                # rotate subfolders
                classpaths = paths[im_class]
                filename = os.path.join(
                    classpaths[count % len(classpaths)],
                    'image-{}.png'.format(count))
                filenames.append(filename)
                im.save(os.path.join(tmp_folder, filename))
                count += 1

        # create iterator
        generator = image.ImageDataGenerator(validation_split=validation_split)

        with pytest.raises(ValueError):
            generator.flow_from_directory(tmp_folder, subset='foo')

        train_iterator = generator.flow_from_directory(tmp_folder,
                                                       subset='training')
        assert train_iterator.samples == num_training

        valid_iterator = generator.flow_from_directory(tmp_folder,
                                                       subset='validation')
        assert valid_iterator.samples == count - num_training

        # check number of classes and images
        assert len(train_iterator.class_indices) == num_classes
        assert len(train_iterator.classes) == num_training
        assert len(set(train_iterator.filenames) &
                   set(filenames)) == num_training

        shutil.rmtree(tmp_folder)
Example #5
0
    def test_image_data_generator_with_validation_split(self):
        for test_images in self.all_test_images:
            img_list = []
            for im in test_images:
                img_list.append(image.img_to_array(im)[None, ...])

            images = np.vstack(img_list)
            labels = np.concatenate([
                np.zeros((int(len(images) / 2), )),
                np.ones((int(len(images) / 2), ))
            ])
            generator = image.ImageDataGenerator(validation_split=0.5)

            # training and validation sets would have different
            # number of classes, because labels are sorted
            with pytest.raises(ValueError,
                               match='Training and validation subsets '
                               'have different number of classes after '
                               'the split.*'):
                generator.flow(images,
                               labels,
                               shuffle=False,
                               batch_size=10,
                               subset='validation')

            labels = np.concatenate([
                np.zeros((int(len(images) / 4), )),
                np.ones((int(len(images) / 4), )),
                np.zeros((int(len(images) / 4), )),
                np.ones((int(len(images) / 4), ))
            ])

            seq = generator.flow(images,
                                 labels,
                                 shuffle=False,
                                 batch_size=10,
                                 subset='validation')

            x, y = seq[0]
            assert 2 == len(np.unique(y))

            seq = generator.flow(images,
                                 labels,
                                 shuffle=False,
                                 batch_size=10,
                                 subset='training')
            x2, y2 = seq[0]
            assert 2 == len(np.unique(y2))

            with pytest.raises(ValueError):
                generator.flow(images,
                               np.arange(images.shape[0]),
                               shuffle=False,
                               batch_size=3,
                               subset='foo')
    def test_dataframe_iterator_with_custom_indexed_dataframe(self, tmpdir):
        num_classes = 2

        # save the images in the tmpdir
        count = 0
        filenames = []
        for test_images in self.all_test_images:
            for im in test_images:
                filename = "image-{}.png".format(count)
                filenames.append(filename)
                im.save(str(tmpdir / filename))
                count += 1

        # create dataframes
        classes = np.random.randint(num_classes, size=len(filenames))
        df = pd.DataFrame({"filename": filenames, "class": classes})
        df2 = pd.DataFrame({
            "filename": filenames,
            "class": classes
        },
                           index=np.arange(1,
                                           len(filenames) + 1))
        df3 = pd.DataFrame({
            "filename": filenames,
            "class": classes
        },
                           index=filenames)

        # create iterators
        seed = 1
        generator = image.ImageDataGenerator()
        df_iterator = generator.flow_from_dataframe(df,
                                                    str(tmpdir),
                                                    has_ext=True,
                                                    seed=seed)
        df2_iterator = generator.flow_from_dataframe(df2,
                                                     str(tmpdir),
                                                     has_ext=True,
                                                     seed=seed)
        df3_iterator = generator.flow_from_dataframe(df3,
                                                     str(tmpdir),
                                                     has_ext=True,
                                                     seed=seed)

        # Test all iterators return same pairs of arrays
        for _ in range(len(filenames)):
            a1, c1 = next(df_iterator)
            a2, c2 = next(df2_iterator)
            a3, c3 = next(df3_iterator)
            assert np.array_equal(a1, a2)
            assert np.array_equal(a1, a3)
            assert np.array_equal(c1, c2)
            assert np.array_equal(c1, c3)
Example #7
0
    def test_dataframe_iterator_with_sort_and_drop_duplicates(self, tmpdir):

        # save the images in the tmpdir
        count = 0
        filenames = []
        for test_images in self.all_test_images:
            for im in test_images:
                filename = "image-{:0>5}.png".format(count)
                filenames.append(filename)
                im.save(str(tmpdir / filename))
                count += 1

        # prepare input_filenames
        n_files = len(filenames)
        idx_rand, idx_rand2 = np.random.randint(1, n_files, size=2)
        input_filenames = filenames[::-1]  # reversed
        input_filenames2 = filenames[:idx_rand] + filenames[:idx_rand2]

        # create dataframes
        df = pd.DataFrame({"filename": input_filenames})
        df2 = pd.DataFrame({"filename": input_filenames2})

        # create iterators
        generator = image.ImageDataGenerator()
        df_sort_iterator = generator.flow_from_dataframe(df,
                                                         str(tmpdir),
                                                         class_mode=None,
                                                         sort=True,
                                                         shuffle=False)
        df_no_sort_iterator = generator.flow_from_dataframe(df,
                                                            str(tmpdir),
                                                            class_mode=None,
                                                            sort=False,
                                                            shuffle=False)
        df_drop_iterator = generator.flow_from_dataframe(df2,
                                                         str(tmpdir),
                                                         class_mode=None,
                                                         drop_duplicates=True)
        df_no_drop_iterator = generator.flow_from_dataframe(
            df2, str(tmpdir), class_mode=None, drop_duplicates=False)

        # Test sort
        assert df_sort_iterator.filenames == df_no_sort_iterator.filenames[::
                                                                           -1]
        assert df_sort_iterator.filenames[0] == filenames[0]
        assert df_no_sort_iterator.filenames[0] == filenames[-1]

        # Test drop_duplicates
        assert df_drop_iterator.n == len(set(input_filenames2))
        assert df_no_drop_iterator.n == len(input_filenames2)
Example #8
0
    def test_dataframe_iterator_class_mode_input(self, tmpdir):
        # save the images in the paths
        count = 0
        filenames = []
        for test_images in self.all_test_images:
            for im in test_images:
                filename = str(tmpdir / 'image-{}.png'.format(count))
                im.save(filename)
                filenames.append(filename)
                count += 1
        df = pd.DataFrame({"filename": filenames})
        generator = image.ImageDataGenerator()
        df_autoencoder_iterator = generator.flow_from_dataframe(
            df,
            str(tmpdir),
            x_col="filename",
            y_col=None,
            has_ext=True,
            class_mode="input")

        batch = next(df_autoencoder_iterator)

        # check if input and output have the same shape and they're the same
        assert (batch[0].all() == batch[1].all())
        # check if the input and output images are not the same numpy array
        input_img = batch[0][0]
        output_img = batch[1][0]
        output_img[0][0][0] += 1
        assert (input_img[0][0][0] != output_img[0][0][0])

        df_autoencoder_iterator = generator.flow_from_dataframe(
            df,
            str(tmpdir),
            x_col="filename",
            y_col="class",
            has_ext=True,
            class_mode="input")

        batch = next(df_autoencoder_iterator)

        # check if input and output have the same shape and they're the same
        assert (batch[0].all() == batch[1].all())
        # check if the input and output images are not the same numpy array
        input_img = batch[0][0]
        output_img = batch[1][0]
        output_img[0][0][0] += 1
        assert (input_img[0][0][0] != output_img[0][0][0])
Example #9
0
    def test_image_data_generator_invalid_data(self):
        generator = image.ImageDataGenerator(
            featurewise_center=True,
            samplewise_center=True,
            featurewise_std_normalization=True,
            samplewise_std_normalization=True,
            zca_whitening=True,
            data_format='channels_last')
        # Test fit with invalid data
        with pytest.raises(ValueError):
            x = np.random.random((3, 10, 10))
            generator.fit(x)

        # Test flow with invalid data
        with pytest.raises(ValueError):
            x = np.random.random((32, 10, 10))
            generator.flow(np.arange(x.shape[0]))
Example #10
0
def predict(avg_year, img_filename):

    datagen = image.ImageDataGenerator(rescale=1. / 255)

    testgen = datagen.flow_from_directory(directory=dataset_dir_path,
                                          target_size=(96, 96),
                                          batch_size=batch_size,
                                          class_mode="categorical")

    # class indices dictionary with the mapping: class_name -> class_index
    class_indices = testgen.class_indices
    class_names = []
    for i, class_name in enumerate(class_indices):
        class_names.append(class_name)

    # load the test image
    img_path = os.path.join(dataset_dir_path, str(avg_year), img_filename)
    img = image.load_img(img_path, target_size=(96, 96))
    img_array = image.img_to_array(img)
    img_array = img_array.reshape((1, ) + img_array.shape)
    img_array = img_array / 255.

    # load the model and predict the label
    model = models.load_model(model_path)
    pred = model.predict(img_array)

    predicted_label = class_names[np.argmax(pred)]
    predicted_prob = np.max(pred)

    true_label = str(avg_year)

    if predicted_label == true_label:
        color = 'blue'
    else:
        color = 'red'

    # plot the image
    plt.imshow(img, cmap=plt.cm.binary)
    plt.xlabel("{} {:2.0f}% ({})".format(predicted_label, 100 * predicted_prob,
                                         true_label),
               color=color)
    plt.show()
Example #11
0
def generatePredictionsRGB(model):
    labels = ['No Finding', 'Enlarged Cardiomediastinum',
              'Cardiomegaly', 'Lung Opacity', 'Lung Lesion',
              'Edema', 'Consolidation', 'Pneumonia', 'Atelectasis',
              'Pneumothorax', 'Pleural Effusion', 'Pleural Other', 'Fracture', 'Support Devices']
    datagen = image.ImageDataGenerator(rescale=1. / 255)
    testdf = pd.read_csv(TEST)
    testgenerator = datagen.flow_from_dataframe(testdf,
                                                    directory=None,
                                                    color_mode='rgb',
                                                    target_size=(256, 256),
                                                    x_col='Path',
                                                    y_col=labels,
                                                    class_mode="other",
                                                    shuffle=False,
                                                    batch_size=64,
                                                    drop_duplicates=False)
    predictions = model.predict_generator(testgenerator, testgenerator.n / 64, verbose=1)
    array = np.array([predictions, testgenerator.labels])
    np.save("DenseNet_RGB_Predictions", array)
Example #12
0
def preprocess():

    train_data_generator = image.ImageDataGenerator(rescale=1. / 255,
                                                    validation_split=0.2)

    training_generator = train_data_generator.flow_from_directory(
        directory=dataset_dir_path,
        target_size=(96, 96),
        batch_size=batch_size,
        class_mode="categorical",
        subset="training")

    validation_generator = train_data_generator.flow_from_directory(
        directory=dataset_dir_path,
        target_size=(96, 96),
        batch_size=batch_size,
        class_mode="categorical",
        subset="validation")

    return training_generator, validation_generator
Example #13
0
 def test_deterministic_transform(self):
     x = np.ones((32, 32, 3))
     generator = image.ImageDataGenerator(rotation_range=90,
                                          fill_mode='constant')
     x = np.random.random((32, 32, 3))
     assert np.allclose(
         generator.apply_transform(x, {'flip_vertical': True}),
         x[::-1, :, :])
     assert np.allclose(
         generator.apply_transform(x, {'flip_horizontal': True}),
         x[:, ::-1, :])
     x = np.ones((3, 3, 3))
     x_rotated = np.array([[[0., 0., 0.], [0., 0., 0.], [1., 1., 1.]],
                           [[0., 0., 0.], [1., 1., 1.], [1., 1., 1.]],
                           [[0., 0., 0.], [0., 0., 0.], [1., 1., 1.]]])
     assert np.allclose(generator.apply_transform(x, {'theta': 45}),
                        x_rotated)
     assert np.allclose(
         image.apply_affine_transform(x,
                                      theta=45,
                                      channel_axis=2,
                                      fill_mode='constant'), x_rotated)
    def test_image_data_generator_with_validation_split(self):
        for test_images in self.all_test_images:
            img_list = []
            for im in test_images:
                img_list.append(image.img_to_array(im)[None, ...])

            images = np.vstack(img_list)
            generator = image.ImageDataGenerator(validation_split=0.5)
            seq = generator.flow(images, np.arange(images.shape[0]),
                                 shuffle=False, batch_size=3,
                                 subset='validation')
            x, y = seq[0]
            assert list(y) == [0, 1, 2]
            seq = generator.flow(images, np.arange(images.shape[0]),
                                 shuffle=False, batch_size=3,
                                 subset='training')
            x2, y2 = seq[0]
            assert list(y2) == [4, 5, 6]

            with pytest.raises(ValueError):
                generator.flow(images, np.arange(images.shape[0]),
                               shuffle=False, batch_size=3,
                               subset='foo')
    def test_dataframe_iterator_absolute_path(self, tmpdir):

        # save the images in the tmpdir
        count = 0
        file_paths = []
        for test_images in self.all_test_images:
            for im in test_images:
                filename = "image-{:0>5}.png".format(count)
                file_path = str(tmpdir / filename)
                file_paths.append(file_path)
                im.save(file_path)
                count += 1

        # prepare an image with a forbidden extension.
        file_path_fbd = str(tmpdir / 'image-forbid.fbd')
        shutil.copy(file_path, file_path_fbd)

        # create dataframes
        classes = np.random.randint(2, size=len(file_paths))
        df = pd.DataFrame({"filename": file_paths})
        df2 = pd.DataFrame({"filename": file_paths,
                            "class": classes})
        df3 = pd.DataFrame({"filename": ['image-not-exist.png'] + file_paths})
        df4 = pd.DataFrame({"filename": file_paths + [file_path_fbd]})

        # create iterators
        generator = image.ImageDataGenerator()
        df_iterator = generator.flow_from_dataframe(
            df, None, has_ext=True, class_mode=None,
            shuffle=False, batch_size=1)
        df2_iterator = generator.flow_from_dataframe(
            df2, None, has_ext=True, class_mode='binary',
            shuffle=False, batch_size=1)
        df3_iterator = generator.flow_from_dataframe(
            df3, None, has_ext=True, class_mode=None,
            shuffle=False, batch_size=1)
        df4_iterator = generator.flow_from_dataframe(
            df4, None, has_ext=True, class_mode=None,
            shuffle=False, batch_size=1)

        validation_split = 0.2
        generator_split = image.ImageDataGenerator(validation_split=validation_split)
        df_train_iterator = generator_split.flow_from_dataframe(
            df, None, has_ext=True, class_mode=None,
            shuffle=False, subset='training', batch_size=1)
        df_val_iterator = generator_split.flow_from_dataframe(
            df, None, has_ext=True, class_mode=None,
            shuffle=False, subset='validation', batch_size=1)

        # Test invalid use cases
        with pytest.raises(ValueError):
            generator.flow_from_dataframe(df, None,
                                          has_ext=False, class_mode=None)
        with pytest.raises(ValueError):
            generator.flow_from_dataframe(df2, None,
                                          has_ext=False, class_mode='binary')

        # Test the number of items in iterators
        assert df_iterator.n == len(file_paths)
        assert df2_iterator.n == len(file_paths)
        assert df3_iterator.n == len(file_paths)
        assert df4_iterator.n == len(file_paths)
        assert df_val_iterator.n == int(validation_split * len(file_paths))
        assert df_train_iterator.n == len(file_paths) - df_val_iterator.n

        # Test flow_from_dataframe
        for i in range(len(file_paths)):
            a1 = next(df_iterator)
            a2, _ = next(df2_iterator)
            a3 = next(df3_iterator)
            a4 = next(df4_iterator)

            if i < df_val_iterator.n:
                a5 = next(df_val_iterator)
            else:
                a5 = next(df_train_iterator)

            assert np.array_equal(a1, a2)
            assert np.array_equal(a1, a3)
            assert np.array_equal(a1, a4)
            assert np.array_equal(a1, a5)
Example #16
0
    def test_dataframe_iterator(self, tmpdir):
        num_classes = 2

        # save the images in the tmpdir
        count = 0
        filenames = []
        filenames_without = []
        for test_images in self.all_test_images:
            for im in test_images:
                filename = "image-{}.png".format(count)
                filename_without = "image-{}".format(count)
                filenames.append(filename)
                filenames_without.append(filename_without)
                im.save(str(tmpdir / filename))
                count += 1

        df = pd.DataFrame({
            "filename": filenames,
            "class": [random.randint(0, 1) for _ in filenames]
        })

        # create iterator
        generator = image.ImageDataGenerator()
        df_iterator = generator.flow_from_dataframe(df,
                                                    str(tmpdir),
                                                    has_ext=True)
        df_sparse_iterator = generator.flow_from_dataframe(df,
                                                           str(tmpdir),
                                                           has_ext=True,
                                                           class_mode="sparse")
        if np.isnan(df_sparse_iterator.classes).any():
            raise ValueError('Invalid values.')
        df_without_ext = pd.DataFrame({
            "filename":
            filenames_without,
            "class": [random.randint(0, 1) for _ in filenames_without]
        })
        df_without_ext_iterator = generator.flow_from_dataframe(df_without_ext,
                                                                str(tmpdir),
                                                                has_ext=False)
        df_regression = pd.DataFrame({
            "filename":
            filenames,
            "col1": [random.randrange(0, 1) for _ in filenames],
            "col2": [random.randrange(0, 1) for _ in filenames]
        })
        df_multiple_y_iterator = generator.flow_from_dataframe(
            df_regression,
            str(tmpdir),
            y_col=["col1", "col2"],
            has_ext=True,
            class_mode="other")
        df_regression = pd.DataFrame(
            {
                "filename": filenames,
                "col1": [random.randrange(0, 1) for _ in filenames],
                "col2": [random.randrange(0, 1) for _ in filenames]
            },
            dtype=str)
        batch_x, batch_y = next(df_multiple_y_iterator)
        with pytest.raises(TypeError):
            df_multiple_y_iterator = generator.flow_from_dataframe(
                df_regression,
                str(tmpdir),
                y_col=["col1", "col2"],
                has_ext=True,
                class_mode="other")
        # check number of classes and images
        assert len(df_iterator.class_indices) == num_classes
        assert len(df_iterator.classes) == count
        assert set(df_iterator.filenames) == set(filenames)
        assert len(df_without_ext_iterator.class_indices) == num_classes
        assert len(df_without_ext_iterator.classes) == count
        assert set(df_without_ext_iterator.filenames) == set(filenames)
        assert batch_y.shape[1] == 2
        # Test invalid use cases
        with pytest.raises(ValueError):
            generator.flow_from_dataframe(df,
                                          str(tmpdir),
                                          color_mode='cmyk',
                                          has_ext=True)
        with pytest.raises(ValueError):
            generator.flow_from_dataframe(df,
                                          str(tmpdir),
                                          class_mode='output',
                                          has_ext=True)
        with pytest.raises(ValueError):
            generator.flow_from_dataframe(df_without_ext,
                                          str(tmpdir),
                                          has_ext=True)

        def preprocessing_function(x):
            """This will fail if not provided by a Numpy array.
            Note: This is made to enforce backward compatibility.
            """

            assert x.shape == (26, 26, 3)
            assert type(x) is np.ndarray

            return np.zeros_like(x)

        # Test usage as Sequence
        generator = image.ImageDataGenerator(
            preprocessing_function=preprocessing_function)
        dir_seq = generator.flow_from_dataframe(df,
                                                str(tmpdir),
                                                target_size=(26, 26),
                                                color_mode='rgb',
                                                batch_size=3,
                                                class_mode='categorical',
                                                has_ext=True)
        assert len(dir_seq) == np.ceil(count / 3)
        x1, y1 = dir_seq[1]
        assert x1.shape == (3, 26, 26, 3)
        assert y1.shape == (3, num_classes)
        x1, y1 = dir_seq[5]
        assert (x1 == 0).all()

        with pytest.raises(ValueError):
            x1, y1 = dir_seq[9]
Example #17
0
    def test_image_data_generator(self, tmpdir):
        for test_images in self.all_test_images:
            img_list = []
            for im in test_images:
                img_list.append(image.img_to_array(im)[None, ...])

            images = np.vstack(img_list)
            generator = image.ImageDataGenerator(
                featurewise_center=True,
                samplewise_center=True,
                featurewise_std_normalization=True,
                samplewise_std_normalization=True,
                zca_whitening=True,
                rotation_range=90.,
                width_shift_range=0.1,
                height_shift_range=0.1,
                shear_range=0.5,
                zoom_range=0.2,
                channel_shift_range=0.,
                brightness_range=(1, 5),
                fill_mode='nearest',
                cval=0.5,
                horizontal_flip=True,
                vertical_flip=True)
            generator.fit(images, augment=True)

            for x, y in generator.flow(images,
                                       np.arange(images.shape[0]),
                                       shuffle=False,
                                       save_to_dir=str(tmpdir),
                                       batch_size=3):
                assert x.shape == images[:3].shape
                assert list(y) == [0, 1, 2]
                break

            # Test with sample weights
            for x, y, w in generator.flow(
                    images,
                    np.arange(images.shape[0]),
                    shuffle=False,
                    sample_weight=np.arange(images.shape[0]) + 1,
                    save_to_dir=str(tmpdir),
                    batch_size=3):
                assert x.shape == images[:3].shape
                assert list(y) == [0, 1, 2]
                assert list(w) == [1, 2, 3]
                break

            # Test with `shuffle=True`
            for x, y in generator.flow(images,
                                       np.arange(images.shape[0]),
                                       shuffle=True,
                                       save_to_dir=str(tmpdir),
                                       batch_size=3):
                assert x.shape == images[:3].shape
                # Check that the sequence is shuffled.
                assert list(y) != [0, 1, 2]
                break

            # Test without y
            for x in generator.flow(images,
                                    None,
                                    shuffle=True,
                                    save_to_dir=str(tmpdir),
                                    batch_size=3):
                assert type(x) is np.ndarray
                assert x.shape == images[:3].shape
                # Check that the sequence is shuffled.
                break

            # Test with a single miscellaneous input data array
            dsize = images.shape[0]
            x_misc1 = np.random.random(dsize)

            for i, (x, y) in enumerate(
                    generator.flow((images, x_misc1),
                                   np.arange(dsize),
                                   shuffle=False,
                                   batch_size=2)):
                assert x[0].shape == images[:2].shape
                assert (x[1] == x_misc1[(i * 2):((i + 1) * 2)]).all()
                if i == 2:
                    break

            # Test with two miscellaneous inputs
            x_misc2 = np.random.random((dsize, 3, 3))

            for i, (x, y) in enumerate(
                    generator.flow((images, [x_misc1, x_misc2]),
                                   np.arange(dsize),
                                   shuffle=False,
                                   batch_size=2)):
                assert x[0].shape == images[:2].shape
                assert (x[1] == x_misc1[(i * 2):((i + 1) * 2)]).all()
                assert (x[2] == x_misc2[(i * 2):((i + 1) * 2)]).all()
                if i == 2:
                    break

            # Test cases with `y = None`
            x = generator.flow(images, None, batch_size=3).next()
            assert type(x) is np.ndarray
            assert x.shape == images[:3].shape
            x = generator.flow((images, x_misc1),
                               None,
                               batch_size=3,
                               shuffle=False).next()
            assert type(x) is list
            assert x[0].shape == images[:3].shape
            assert (x[1] == x_misc1[:3]).all()
            x = generator.flow((images, [x_misc1, x_misc2]),
                               None,
                               batch_size=3,
                               shuffle=False).next()
            assert type(x) is list
            assert x[0].shape == images[:3].shape
            assert (x[1] == x_misc1[:3]).all()
            assert (x[2] == x_misc2[:3]).all()

            # Test some failure cases:
            x_misc_err = np.random.random((dsize + 1, 3, 3))

            with pytest.raises(ValueError) as e_info:
                generator.flow((images, x_misc_err),
                               np.arange(dsize),
                               batch_size=3)
            assert str(e_info.value).find('All of the arrays in') != -1

            with pytest.raises(ValueError) as e_info:
                generator.flow((images, x_misc1),
                               np.arange(dsize + 1),
                               batch_size=3)
            assert str(e_info.value).find(
                '`x` (images tensor) and `y` (labels) ') != -1

            # Test `flow` behavior as Sequence
            seq = generator.flow(images,
                                 np.arange(images.shape[0]),
                                 shuffle=False,
                                 save_to_dir=str(tmpdir),
                                 batch_size=3)
            assert len(seq) == images.shape[0] // 3 + 1
            x, y = seq[0]
            assert x.shape == images[:3].shape
            assert list(y) == [0, 1, 2]

            # Test with `shuffle=True`
            seq = generator.flow(images,
                                 np.arange(images.shape[0]),
                                 shuffle=True,
                                 save_to_dir=str(tmpdir),
                                 batch_size=3,
                                 seed=123)
            x, y = seq[0]
            # Check that the sequence is shuffled.
            assert list(y) != [0, 1, 2]

            # `on_epoch_end` should reshuffle the sequence.
            seq.on_epoch_end()
            x2, y2 = seq[0]
            assert list(y) != list(y2)
Example #18
0
    def test_directory_iterator(self, tmpdir):
        num_classes = 2

        # create folders and subfolders
        paths = []
        for cl in range(num_classes):
            class_directory = 'class-{}'.format(cl)
            classpaths = [
                class_directory,
                os.path.join(class_directory, 'subfolder-1'),
                os.path.join(class_directory, 'subfolder-2'),
                os.path.join(class_directory, 'subfolder-1', 'sub-subfolder')
            ]
            for path in classpaths:
                tmpdir.join(path).mkdir()
            paths.append(classpaths)

        # save the images in the paths
        count = 0
        filenames = []
        for test_images in self.all_test_images:
            for im in test_images:
                # rotate image class
                im_class = count % num_classes
                # rotate subfolders
                classpaths = paths[im_class]
                filename = os.path.join(classpaths[count % len(classpaths)],
                                        'image-{}.png'.format(count))
                filenames.append(filename)
                im.save(str(tmpdir / filename))
                count += 1

        # create iterator
        generator = image.ImageDataGenerator()
        dir_iterator = generator.flow_from_directory(str(tmpdir))

        # check number of classes and images
        assert len(dir_iterator.class_indices) == num_classes
        assert len(dir_iterator.classes) == count
        assert set(dir_iterator.filenames) == set(filenames)

        # Test invalid use cases
        with pytest.raises(ValueError):
            generator.flow_from_directory(str(tmpdir), color_mode='cmyk')
        with pytest.raises(ValueError):
            generator.flow_from_directory(str(tmpdir), class_mode='output')

        def preprocessing_function(x):
            """This will fail if not provided by a Numpy array.
            Note: This is made to enforce backward compatibility.
            """

            assert x.shape == (26, 26, 3)
            assert type(x) is np.ndarray

            return np.zeros_like(x)

        # Test usage as Sequence
        generator = image.ImageDataGenerator(
            preprocessing_function=preprocessing_function)
        dir_seq = generator.flow_from_directory(str(tmpdir),
                                                target_size=(26, 26),
                                                color_mode='rgb',
                                                batch_size=3,
                                                class_mode='categorical')
        assert len(dir_seq) == np.ceil(count / 3)
        x1, y1 = dir_seq[1]
        assert x1.shape == (3, 26, 26, 3)
        assert y1.shape == (3, num_classes)
        x1, y1 = dir_seq[5]
        assert (x1 == 0).all()

        with pytest.raises(ValueError):
            x1, y1 = dir_seq[9]
Example #19
0
from keras_preprocessing import image
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Activation, Dense, Flatten, Dropout
from keras.models import load_model
from keras.optimizers import SGD

datagen_train = image.ImageDataGenerator(rescale=1. / 255,
                                         rotation_range=10,
                                         width_shift_range=0.2,
                                         height_shift_range=0.2,
                                         shear_range=0.2,
                                         zoom_range=0.2,
                                         fill_mode="nearest",
                                         validation_split=0.20)

datagen_test = image.ImageDataGenerator(rescale=1. / 255)

traindf = pd.read_csv('trainLabels.csv')
testdf = pd.read_csv('testLabels.csv')


def append_text(fn):
    return str(fn) + '.BMP'


traindf['ID'] = traindf['ID'].apply(append_text)

training = datagen_train.flow_from_dataframe(dataframe=traindf,
                                             directory="./train/",
Example #20
0
 def test_image_data_generator_with_split_value_error(self):
     with pytest.raises(ValueError):
         generator = image.ImageDataGenerator(validation_split=5)
Example #21
0
def main(args):
    # set the image parameters
    img_rows = args.img_rows
    img_cols = args.img_cols
    img_dim_color = args.img_channels
    # mix_prop = 1.0                                                    # set the value of the mixing proportion

    #############################################################################################################
    ##################################  DEFINING MODEL  ##########################################################
    ##############################################################################################################
    model_alex = cnn_hybrid_color_single(img_rows, img_cols,
                                         img_dim_color)  # load the model

    # model_final = Model(model_alex.input, model_alex.output)  # specify the input and output of the model
    model_final = model_alex
    print(model_final.summary())  # print the model summary

    plot_model(model_final,
               to_file='./NIN_hybrid_bin_resnet_1x1-class',
               show_shapes=True)  # save the model summary as a png file

    lr = args.learning_rate  # set the learning rate

    # set the optimizer
    optimizer = SGD(lr=lr, decay=1e-6, momentum=0.9)

    # model compilation
    model_final.compile(optimizer=optimizer,
                        loss='sparse_categorical_crossentropy',
                        metrics=['accuracy'])

    # print the value of the learning rate
    print(K.get_value(optimizer.lr))

    # --------------------------------------------------
    #############################################################################################################
    ########################## GETTING TRAINING DATA AND TESTING DATA  ##########################################
    ##############################################################################################################

    # get the training data by calling the pairs function
    # read the training data

    train_pairs_r, training_data_r, training_label_r = read_pairs(
        args.tr_img_lab_r)
    train_pairs_l, training_data_l, training_label_l = read_pairs(
        args.tr_img_lab_l)

    assert len(training_data_r) == len(training_data_l)

    # combine the left and right image in the training data to make a X x Y x 6 tensor
    training_data = []
    for i in range(len(training_data_r)):
        # define the stereo pair
        stereo_pair = [training_data_r[i], training_data_l[i]]
        training_data.append(stereo_pair)

    batch_num = 0

    # initialize the live samples and fake samples
    live_samples_ub = 0
    attack_samples_ub = 0

    live_samples = []
    live_labels = []
    attack_samples = []
    attack_labels = []

    # separate the live samples and fake samples to balance the both classes, i.e. live class and fake class
    assert len(training_label_r) == len(training_label_l)

    for i in range(len(training_data)):
        if training_label_r[i] == 0:
            live_samples.append(training_data[i])
            live_labels.append(training_label_r[i])

            live_samples_ub += 1
        elif (training_label_r[i] == 1) | (training_label_r[i] == 2) | (
                training_label_r[i] == 3):  # protocol_1
            attack_samples.append(training_data[i])
            attack_labels.append(training_label_r[i])

            attack_samples_ub += 1

    print("Live samples are %g ,\t attack samples are %g" %
          (live_samples_ub, attack_samples_ub))

    # compute the difference; the live samples are always less than the fake samples in our case
    diff = 0
    if live_samples_ub < attack_samples_ub:
        # compute the ratio
        diff = np.int(attack_samples_ub / live_samples_ub)
        print("The difference is :%g " % (diff))
    else:
        ValueError("The fake samples are less than then live samples")

    # number of times the dataset has to be copied:
    live_samples_b = live_samples
    live_labels_b = live_labels
    for i in range(diff - 1):
        # print("length before balancing: %g" %len(live_samples_b))
        sl_copy = live_samples.copy()
        ll_copy = live_labels.copy()

        live_samples_b = live_samples_b + sl_copy
        live_labels_b = live_labels_b + ll_copy
        # print("length after balancing: %g" % len(live_samples_b))

    # balanced data
    training_data_balanced = live_samples_b + attack_samples
    training_label_balanced = live_labels_b + attack_labels

    print("Balanced data samples: %g" % len(training_data_balanced))

    # get the length of the training data
    len_tr = len(training_data_balanced)

    # get the number equal to the length of the training data
    indices_tr = np.arange(len_tr)
    np.random.shuffle(indices_tr)

    # initialize the image counter
    images_read = 0
    train_img_data_r = []
    train_img_data_l = []

    for i in indices_tr:
        if training_label_balanced[i] > 0:
            training_label_balanced[i] = 1

        train_img_data_r.append(
            [training_data_balanced[i][0],
             training_label_balanced[i]])  # read the right image
        train_img_data_l.append(
            [training_data_balanced[i][1],
             training_label_balanced[i]])  # read the left image

        # print(training_data_balanced[i][1])
        # cv2.imshow('img1', cv2.imread(training_data_balanced[i][0]))
        # cv2.waitKey()
        # cv2.imshow('img2', cv2.imread(training_data_balanced[i][1]))
        # cv2.waitKey()

        images_read += 1
        sys.stdout.write('train images read = {0}\r'.format(images_read))
        sys.stdout.flush()

    ############################################################################################################

    # read the test data
    test_pairs, test_data_r, test_labels_r = read_pairs(args.tst_img_lab_r)
    test_pairs, test_data_l, test_labels_l = read_pairs(args.tst_img_lab_l)

    assert len(test_data_r) == len(test_data_l)

    # combine the left and right image in the training data to make a X x Y x 6 tensor
    test_data = []
    for i in range(len(test_data_r)):
        # define the stereo pair
        stereo_pair_t = [test_data_r[i], test_data_l[i]]
        test_data.append(stereo_pair_t)

    test_labels = test_labels_r

    images_read = 0

    # get the length of the training data
    len_test = len(test_data)

    # get the number equal to the length of the training data
    indices_test = np.arange(len_test)

    test_img_data_r = []
    test_img_data_l = []

    for i in indices_test:

        if test_labels[i] > 0:
            test_labels[i] = 1

        test_img_data_r.append([test_data[i][0],
                                test_labels[i]])  # read the right test image
        test_img_data_l.append([test_data[i][1],
                                test_labels[i]])  # red the left test image
        images_read += 1
        sys.stdout.write('test images read = {0}\r'.format(images_read))
        sys.stdout.flush()

    #####################################################################################################
    # make all the data in panda data frame format
    train_df_r = pd.DataFrame(train_img_data_r)
    train_df_r.columns = ['id_r', 'label']

    train_df_l = pd.DataFrame(train_img_data_l)
    train_df_l.columns = ['id_l', 'label']

    test_df_r = pd.DataFrame(test_img_data_r)
    test_df_r.columns = ['id_r', 'label']

    test_df_l = pd.DataFrame(test_img_data_l)
    test_df_l.columns = ['id_l', 'label']

    ########################################################################################################333

    datagen = image.ImageDataGenerator()

    train_generator_r = datagen.flow_from_dataframe(
        dataframe=train_df_r,
        directory=None,
        x_col='id_r',
        y_col='label',
        has_ext=True,
        batch_size=args.batch_size,
        seed=42,
        shuffle=True,
        class_mode="sparse",
        target_size=(args.img_rows, args.img_cols),
        color_mode='grayscale',
        interpolation='nearest',
        drop_duplicates=False)

    train_generator_l = datagen.flow_from_dataframe(
        dataframe=train_df_l,
        directory=None,
        x_col='id_l',
        y_col='label',
        has_ext=True,
        batch_size=args.batch_size,
        seed=42,
        shuffle=True,
        class_mode="sparse",
        target_size=(args.img_rows, args.img_cols),
        color_mode='grayscale',
        interpolation='nearest',
        drop_duplicates=False)

    test_datagen = image.ImageDataGenerator()

    test_generator_r = test_datagen.flow_from_dataframe(
        dataframe=test_df_r,
        directory=None,
        x_col='id_r',
        y_col='label',
        has_ext=True,
        batch_size=args.batch_size,
        seed=42,
        shuffle=False,
        class_mode="sparse",
        target_size=(args.img_rows, args.img_cols),
        color_mode='grayscale',
        interpolation='nearest')

    test_generator_l = test_datagen.flow_from_dataframe(
        dataframe=test_df_l,
        directory=None,
        x_col='id_l',
        y_col='label',
        has_ext=True,
        batch_size=args.batch_size,
        seed=42,
        shuffle=False,
        class_mode="sparse",
        target_size=(args.img_rows, args.img_cols),
        color_mode='grayscale',
        interpolation='nearest')
    #############################################################################################################
    batch_num = 0
    while batch_num < args.max_epochs:

        start_time = time.time()  # initialize the clock
        acc = []
        loss = []
        sub_count = 0

        total_batch = train_generator_r.n // train_generator_r.batch_size

        for i in range(train_generator_r.n // train_generator_r.batch_size):
            x1, y1 = next(train_generator_r)
            x2, y2 = next(train_generator_l)

            # only for DP-3D for comparison
            # disparity_final = []
            #
            # for j in range(x1.shape[0]):
            #     img1 = np.asarray(x1[j])
            #     # img1 = cv2.resize(img1, (img_rows, img_cols),
            #     #                                 interpolation=cv2.INTER_AREA)
            #
            #     img2 = np.asarray(x2[j])
            #     # img2 = cv2.resize(img2, (img_rows, img_cols),
            #     #                                 interpolation=cv2.INTER_AREA)
            #     #
            #     disparity = cv2.subtract(img1,img2)
            #
            #     der_k = np.asarray([[1.0, 2.0, 1.0],
            #                         [0.0, 0.0, 0.0],
            #                         [-1.0, -2.0, -1.0]])
            #
            #     der = cv2.filter2D(img1,-1, kernel=der_k)
            #
            #     disparity_f = disparity/(der+0.005)
            #
            #     disparity_final.append(disparity_f)
            #
            # disparity_final = np.expand_dims(np.asarray(disparity_final).astype('float32'),axis=-1)

            x1 = x1.astype('float32') / 255
            x2 = x2.astype('float32') / 255

            y = y1

            tr_acc1 = model_final.fit([x1, x2], y, epochs=1, verbose=0)
            acc.append(tr_acc1.history['acc'][0])
            loss.append(tr_acc1.history['loss'][0])

            sub_count += 1
            sys.stdout.write('batch_count = {0} of {1} \r'.format(
                sub_count, total_batch))
            sys.stdout.flush()

        train_acc = np.sum(np.asarray(acc)) * 100 / (
            train_generator_r.n // train_generator_r.batch_size)
        train_loss = np.sum(np.asarray(loss)) * 100 / (
            train_generator_r.n // train_generator_r.batch_size)

        print('training_acc: {0} \t training_loss: {1}'.format(
            train_acc, train_loss))

        print(
            '______________________________________________________________________'
        )
        print('Running the evaluations')

        test_acc = []
        test_loss = []
        sub_count = 0

        for i in range(test_generator_r.n // test_generator_r.batch_size):
            x1, y1 = next(test_generator_r)
            x2, y2 = next(test_generator_l)

            # only for DP-3D for comparison
            # disparity_final = []
            #
            # for j in range(x1.shape[0]):
            #     img1 = np.asarray(x1[j])
            #     # img1 = cv2.resize(img1, (img_rows, img_cols),
            #     #                                 interpolation=cv2.INTER_AREA)
            #
            #     img2 = np.asarray(x2[j])
            #     # img2 = cv2.resize(img2, (img_rows, img_cols),
            #     #                                 interpolation=cv2.INTER_AREA)
            #     #
            #     disparity = cv2.subtract(img1, img2)
            #
            #     der_k = np.asarray([[1.0, 2.0, 1.0],
            #                         [0.0, 0.0, 0.0],
            #                         [-1.0, -2.0, -1.0]])
            #
            #     der = cv2.filter2D(img1, -1, kernel=der_k)
            #
            #     disparity_f = disparity / (der + 0.005)
            #
            #     disparity_final.append(disparity_f)
            #
            # disparity_final = np.expand_dims(np.asarray(disparity_final).astype('float32'),axis=-1)

            x1 = x1.astype('float32') / 255
            x2 = x2.astype('float32') / 255

            y = y1

            tst_loss, tst_acc1 = model_final.evaluate([x1, x2], y, verbose=0)
            test_acc.append(tst_acc1)
            test_loss.append(tst_loss)
            sub_count += 1
            sys.stdout.write('epoch_count = {0}\r'.format(sub_count))
            sys.stdout.flush()

        test_acc = np.sum(np.asarray(test_acc)) * 100 / (
            test_generator_r.n // test_generator_r.batch_size)
        test_loss = np.sum(np.asarray(test_loss)) * 100 / (
            test_generator_r.n // test_generator_r.batch_size)

        print('test_acc: {0} \t test_loss: {1}'.format(test_acc, test_loss))

        batch_num += 1

        # **********************************************************************************************
        # learning rate schedule update: if learning is done using a single learning give the batch_num below a
        # high value
        if (batch_num == 3) | (batch_num == 5) | (batch_num == 7):
            lr = 0.1 * lr
            K.set_value(optimizer.lr, lr)
            print(K.get_value(optimizer.lr))

        # ************************************************************************************************
        # -----------------------------------------------------------------------------------------------

        end_time = time.time() - start_time

        print("Total time taken %f :" % end_time)

        model_final.save_weights(
            '/home/yaurehman2/Documents/stereo_face_liveness/stereo_ckpt/Conventional/'
            + 'dual_grayscale_input_revised_protocol_1_' +
            str(args.max_epochs) + '.h5')
Example #22
0
def trainGrayscale(model):

    labels = ['No Finding','Enlarged Cardiomediastinum',
              'Cardiomegaly','Lung Opacity','Lung Lesion',
              'Edema','Consolidation','Pneumonia','Atelectasis',
              'Pneumothorax','Pleural Effusion','Pleural Other','Fracture','Support Devices']
    datagen = image.ImageDataGenerator(rescale=1. / 255)

    traindf = pd.read_csv(TRAIN)
    validatedf = pd.read_csv(VAL)
    testdf = pd.read_csv(TEST)


    traingenerator = datagen.flow_from_dataframe(traindf,
                                                 directory=None,
                                                 color_mode='grayscale',
                                                 target_size=(256, 256),
                                                 x_col='Path',
                                                 y_col=labels,
                                                 class_mode="other",
                                                 shuffle=True,
                                                 batch_size=BATCH,
                                                 drop_duplicates=False)

    validategenerator = datagen.flow_from_dataframe(validatedf,
                                                    directory=None,
                                                    color_mode='grayscale',
                                                    target_size=(256, 256),
                                                    x_col='Path',
                                                    y_col=labels,
                                                    class_mode="other",
                                                    shuffle=False,
                                                    batch_size=BATCH,
                                                    drop_duplicates=False)

    testgenerator = datagen.flow_from_dataframe(testdf,
                                                    directory=None,
                                                    color_mode='grayscale',
                                                    target_size=(256, 256),
                                                    x_col='Path',
                                                    y_col=labels,
                                                    class_mode="other",
                                                    shuffle=False,
                                                    batch_size=BATCH,
                                                    drop_duplicates=False)

    print(traingenerator.n)
    print(validategenerator.n)
    print(testgenerator.n)

    filepath = "DenseNet-grayscale-{epoch:02d}-{val_loss:.2f}.hdf5"
    checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=False,
                                 mode='min')
    clr = CyclicLR(base_lr=0.0001, max_lr=0.0006, step_size=2000.)
    es = EarlyStopping(monitor="val_loss",mode=min, verbose=1)
    callbacks_list = [checkpoint,clr,es,roc_callback(testgenerator,np.array(testgenerator.labels))]

    model.fit_generator(generator=traingenerator,
                        validation_data=validategenerator,
                        epochs=EPOCHS,
                        steps_per_epoch=traingenerator.n / BATCH,
                        validation_steps=validategenerator.n / BATCH,
                        callbacks=callbacks_list,
                        workers=THREAD,
                        verbose=1)

    model.save_weights("DenseNet.h5")
    model.save('DenseNet.h5')
Example #23
0
# and a logistic layer -- let's say we have 200 classes
model.add(Dense(num_classes, activation='softmax'))

# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
    layer.trainable = False

# compile the model (should be done *after* setting layers to non-trainable)
# initiate RMSprop optimizer
opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

datagen = processing.ImageDataGenerator(rescale=1. / 255,
                                        shear_range=0.2,
                                        zoom_range=0.2,
                                        horizontal_flip=True)
train_generator = datagen.flow_from_directory(file_path,
                                              target_size=(img_height,
                                                           img_width),
                                              batch_size=batch_size,
                                              class_mode='categorical')

y_train = train_generator.classes
class_weights = class_weight.compute_class_weight('balanced',
                                                  np.unique(y_train), y_train)

model.fit_generator(train_generator, class_weight=class_weights)
Example #24
0
    fromarray = Image.fromarray


KPImage.pil_image = NumpyPIL

# Prepare datasets
print('Preparing Data..')
img_gen_params = dict(
    vertical_flip=True,
    height_shift_range=0.05,
    width_shift_range=0.02,
    rotation_range=3.0,
    zoom_range=0.05,
    #preprocessing_function=preprocess_input
)
img_gen = KPImage.ImageDataGenerator(**img_gen_params)


def flow_from_dataframe(img_data_gen,
                        in_df,
                        path_col,
                        y_col,
                        seed=None,
                        **dflow_args):
    base_dir = os.path.dirname(in_df[path_col].values[0])
    df_gen = img_data_gen.flow_from_directory(base_dir,
                                              class_mode='sparse',
                                              seed=seed,
                                              **dflow_args)
    df_gen.filenames = in_df[path_col].values
    df_gen.classes = np.stack(in_df[y_col].values, 0)
img_width = 56
# generate data
datagen = processing.ImageDataGenerator(
    rescale=1. / 255,
    shear_range=0.2,
    featurewise_center=False,  # set input mean to 0 over the dataset
    samplewise_center=False,  # set each sample mean to 0
    featurewise_std_normalization=False,  # divide inputs by std of the dataset
    samplewise_std_normalization=False,  # divide each input by its std
    zca_whitening=False,  # apply ZCA whitening
    zca_epsilon=1e-06,  # epsilon for ZCA whitening
    rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
    # randomly shift images horizontally (fraction of total width)
    width_shift_range=0.1,
    # randomly shift images vertically (fraction of total height)
    height_shift_range=0.1,
    zoom_range=0.,  # set range for random zoom
    channel_shift_range=0.,  # set range for random channel shifts
    # set mode for filling points outside the input boundaries
    fill_mode='nearest',
    cval=0.,  # value used for fill_mode = "constant"
    horizontal_flip=True,  # randomly flip images
    vertical_flip=False,  # randomly flip images
    # set function that will be applied on each input
    preprocessing_function=None,
    # image data format, either "channels_first" or "channels_last"
    data_format=None,
    # fraction of images reserved for validation (strictly between 0 and 1)
    validation_split=0.0)
train_generator = datagen.flow_from_directory(file_path,
                                              target_size=(img_height,
Example #26
0
    def test_dataframe_iterator_with_validation_split(self, validation_split,
                                                      num_training, tmpdir):
        num_classes = 2

        # save the images in the tmpdir
        count = 0
        filenames = []
        filenames_without = []
        for test_images in self.all_test_images:
            for im in test_images:
                filename = "image-{}.png".format(count)
                filename_without = "image-{}".format(count)
                filenames.append(filename)
                filenames_without.append(filename_without)
                im.save(str(tmpdir / filename))
                count += 1

        df = pd.DataFrame({
            "filename": filenames,
            "class": [random.randint(0, 1) for _ in filenames]
        })
        df_without_ext = pd.DataFrame({
            "filename":
            filenames_without,
            "class": [random.randint(0, 1) for _ in filenames_without]
        })
        # create iterator
        generator = image.ImageDataGenerator(validation_split=validation_split)
        df_sparse_iterator = generator.flow_from_dataframe(df,
                                                           str(tmpdir),
                                                           has_ext=True,
                                                           class_mode="sparse")
        if np.isnan(next(df_sparse_iterator)[:][1]).any():
            raise ValueError('Invalid values.')

        with pytest.raises(ValueError):
            generator.flow_from_dataframe(df,
                                          tmpdir,
                                          has_ext=True,
                                          subset='foo')

        train_iterator = generator.flow_from_dataframe(df,
                                                       str(tmpdir),
                                                       has_ext=True,
                                                       subset='training')
        assert train_iterator.samples == num_training

        valid_iterator = generator.flow_from_dataframe(df,
                                                       str(tmpdir),
                                                       has_ext=True,
                                                       subset='validation')
        assert valid_iterator.samples == count - num_training

        train_iterator_without = generator.flow_from_dataframe(
            df_without_ext, str(tmpdir), has_ext=False, subset='training')
        assert train_iterator_without.samples == num_training

        valid_iterator_without = generator.flow_from_dataframe(
            df_without_ext, str(tmpdir), has_ext=False, subset='validation')
        assert valid_iterator_without.samples == count - num_training

        # check number of classes and images
        assert len(train_iterator.class_indices) == num_classes
        assert len(train_iterator.classes) == num_training
        assert len(set(train_iterator.filenames)
                   & set(filenames)) == num_training
        intersection = set(train_iterator_without.filenames) & set(filenames)
        assert len(intersection) == num_training
    base_model = InceptionResNetV2(weights='imagenet',
                                   include_top=False,
                                   input_shape=(224, 224, 3))
    base_model.summary()
    model.add(base_model)
    model.add(GlobalAveragePooling2D())
    # # # let's add a fully-connected layer
    model.add(Dense(num_classes, activation='softmax'))
    model.summary()

    return model


if __name__ == '__main__':
    processing.ImageDataGenerator(rescale=1. / 255)
    img_height, img_width = 224, 224
    input_shape = (img_height, img_width, 3)
    num_classes = 103
    test_path = 'E:\\Data\\image_debug\\'
    model_weight_path = 'E:\\PythonSource\\inception_landmark\\keras_landmark_inception_resnet_v2_model.18-0.92.h5 '
    batch_size = 32

    # init class map index
    class_str = []
    for i in range(num_classes):
        class_str.append(str(i))
    class_str = sorted(class_str)

    # init
 def test_valid_args(self):
     with pytest.raises(ValueError):
         dt = image.ImageDataGenerator(brightness_range=0.1)
# top2_acc = functools.partial(top_k_categorical_accuracy, k=2)
#
# top2_acc.__name__ = 'top2_acc'


def top2_acc(y_true, y_pred):
    return functools.partial(top_k_categorical_accuracy, k=2)


model = load_model(
    '/media/raghu/6A3A-B7CD/glare_resnet_models/models-0.7209-0.7751.hdf5')

validation_dir = inputpath + '/val'

validation_datagen = image.ImageDataGenerator(rescale=1. / 255)
val_batchsize = 10

# Create a generator for prediction
validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=(image_size, image_size),
    batch_size=val_batchsize,
    class_mode='categorical',
    shuffle=False)

# Get the filenames from the generator
fnames = validation_generator.filenames

# Get the ground truth from generator
ground_truth = validation_generator.classes
Example #30
0
import tensorflow as tf
from tensorflow import keras
from keras_preprocessing import image
"""
#%%
import glob
from PIL import Image
directory='Images/'
name_list = glob.glob(directory + '*/*')
print(name_list)
"""
#%%
#preprocessing
directory = 'Images/'
data_gen = image.ImageDataGenerator(rescale=1. / 255, validation_split=0.1)
train_gen = data_gen.flow_from_directory(directory,
                                         batch_size=20,
                                         target_size=(150, 150),
                                         class_mode='sparse',
                                         subset='training')
val_gen = data_gen.flow_from_directory(directory,
                                       batch_size=20,
                                       target_size=(150, 150),
                                       class_mode='sparse',
                                       subset='validation')
"""
for i in range(1):
    print(train_gen[16506])
    train_gen.next()
#print(val_gen.class_indices)