Ejemplo n.º 1
0
def test_inifinite_balanced_dataset(tmpdir):

    tmpdir1 = tmpdir / 'rec1'
    tmpdir1.mkdir()

    tmpdir2 = tmpdir / 'rec2'
    tmpdir2.mkdir()

    setup1 = OrderedDatasetSetup(n_files=1,
                                 n_samples_per_file=7,
                                 output_folder=tmpdir1)

    setup2 = OrderedDatasetSetup(n_files=1,
                                 n_samples_per_file=3,
                                 output_folder=tmpdir2)

    patterns = [setup1.fname_pattern, setup2.fname_pattern]

    # check with balanced option
    dataset = create_dataset(patterns,
                             batch_size=1,
                             parser_fn=setup1.parser.parse,
                             shuffle=False,
                             balance_records=True)

    seq1 = [i % 7 for i in range(10)]
    seq2 = [i % 3 for i in range(10)]
    #interleave (order in which records are loaded is random)
    expected_seq_alternativeA = np.asarray(
        [i for j in zip(seq1, seq2) for i in j])
    expected_seq_alternativeB = np.asarray(
        [i for j in zip(seq2, seq1) for i in j])

    loaded_seq = np.asarray(
        [int(d['image'].numpy().squeeze()) for d in dataset.take(20)])

    compA = (expected_seq_alternativeA == loaded_seq).sum()
    compB = (expected_seq_alternativeB == loaded_seq).sum()
    assert (compA == 20) or (compB == 20)

    # check without balanced option
    dataset = create_dataset(patterns,
                             batch_size=1,
                             parser_fn=setup1.parser.parse,
                             shuffle=False,
                             balance_records=False)

    expected_seq = [0, 0, 1, 1, 2, 2, 3, 4, 5, 6]
    loaded_seq = [int(d['image'].numpy().squeeze()) for d in dataset]
    np.testing.assert_array_equal(loaded_seq, expected_seq)
Ejemplo n.º 2
0
def test_training_with_multioutput(tmpdir):
    '''train a dummy network with multiple outputs using the tf.data.Dataset.
    The tf.data pipeline yields tuples for inputs/targets.

    '''
    batch_size = 2
    drop_remainder = True

    tf.random.set_seed(13)

    setup = ClassificationDatasetSetup(2, 14, tmpdir)

    def _dict_to_multi_tuple(sample):
        '''In TF 2.0, keras' model.fit() doesnt accept tf.data.Datasets
        with dictionaries, so we have to convert it into a tuple.

        This also creates a fake multitarget output.

        '''
        return sample['image'], (sample['label'], 2 *
                                 tf.reduce_mean(sample['image']) * tf.ones(3))

    # load dataset.
    dataset = create_dataset(setup.fname_pattern,
                             batch_size,
                             setup.parser.parse,
                             patch_size=None,
                             shuffle_buffer=5,
                             drop_remainder=drop_remainder,
                             transforms=[_dict_to_multi_tuple],
                             cache_after_parse=False)

    # create model and train
    def _construct_model():
        '''create model with two outputs.

        '''

        first_input = tf.keras.layers.Input(shape=(None, None, 1))
        x = tf.keras.layers.Conv2D(4, kernel_size=3,
                                   padding='same')(first_input)
        x = tf.keras.layers.GlobalAveragePooling2D()(x)
        first_output = tf.keras.layers.Dense(1, name='first')(x)
        second_output = tf.keras.layers.Dense(3, name='second')(x)

        return tf.keras.Model(inputs=[first_input],
                              outputs=[first_output, second_output])

    model = _construct_model()
    model.compile(loss={'first': 'mse', 'second': 'mae'})

    loss_before = model.evaluate(dataset)[0]
    model.fit(dataset, epochs=5)
    loss_after = model.evaluate(dataset)[0]

    assert loss_before * 0.95 >= loss_after
Ejemplo n.º 3
0
def test_create_dataset_with_patches(tmpdir, batch_size, patch_size):
    '''test image-to-segmentation with patch sampling.
    '''
    drop_remainder = True

    # test setup
    setup = SegmentationDatasetSetup(2, 7, tmpdir)

    # load dataset.
    dataset = create_dataset(setup.fname_pattern,
                             batch_size,
                             setup.parser.parse,
                             patch_size=patch_size,
                             shuffle_buffer=5,
                             drop_remainder=drop_remainder,
                             cache_after_parse=False)

    counter = 0
    for batch in dataset:
        image_batch = batch['image']
        segm_batch = batch['segm']
        assert np.all(image_batch.shape[1:] == patch_size)
        assert np.all(segm_batch.shape[1:] == patch_size)

        if drop_remainder:
            assert segm_batch.shape[0] == batch_size
            assert image_batch.shape[0] == batch_size
        else:
            assert segm_batch.shape[0] <= batch_size
            assert image_batch.shape[0] <= batch_size

        # check value range
        for image, label in zip(image_batch, segm_batch):
            assert np.all(0 <= label) and np.all(label <= setup.n_classes)
            assert np.all(0 <= image) and np.all(image <= 255)

            counter += 1

    # check correct number of samples
    # Depending on the batch size, some samples might be dropped.
    expected_samples = setup.expected_samples - (
        setup.expected_samples % batch_size if drop_remainder else 0)
    assert counter == expected_samples
Ejemplo n.º 4
0
def test_create_dataset_clf(tmpdir, n_files, n_samples_per_file, batch_size,
                            shuffle, drop_remainder, cache_after_parse):
    '''image-to-class dataset.
    '''
    # test setup
    setup = ClassificationDatasetSetup(n_files, n_samples_per_file, tmpdir)

    # load dataset.
    dataset = create_dataset(setup.fname_pattern,
                             batch_size,
                             setup.parser.parse,
                             shuffle_buffer=5,
                             shuffle=shuffle,
                             drop_remainder=drop_remainder,
                             cache_after_parse=cache_after_parse)

    counter = 0
    for batch in dataset:
        image_batch = batch['image']
        label_batch = batch['label']
        assert np.all(image_batch.shape[1:] == setup.img_shape)
        assert np.all(label_batch.shape[1:] == (
            setup.parser.n_classes, ))  # one-hot encoding.

        if drop_remainder:
            assert label_batch.shape[0] == batch_size
            assert image_batch.shape[0] == batch_size
        else:
            assert label_batch.shape[0] <= batch_size
            assert image_batch.shape[0] <= batch_size

        # check value range
        for image, label in zip(image_batch, label_batch):
            assert np.all(0 <= label) and np.all(label <= 1)
            assert np.all(0 <= image) and np.all(image <= 255)

            counter += 1

    # check correct number of samples
    # Depending on the batch size, some samples might be dropped.
    expected_samples = setup.expected_samples - (
        setup.expected_samples % batch_size if drop_remainder else 0)
    assert counter == expected_samples
Ejemplo n.º 5
0
def test_training_from_dataset(tmpdir):
    '''
    '''
    batch_size = 2
    patch_size = (4, 4, 1)
    drop_remainder = True

    setup = SegmentationDatasetSetup(2, 14, tmpdir)

    def _dict_to_tuple(sample):
        '''In TF 2.0, keras' model.fit() doesnt accept tf.data.Datasets
        with dictionaries, so we have to convert it into a tuple.

        '''
        return sample['image'], sample['segm']

    # load dataset.
    dataset = create_dataset(setup.fname_pattern,
                             batch_size,
                             setup.parser.parse,
                             patch_size=patch_size,
                             shuffle_buffer=5,
                             drop_remainder=drop_remainder,
                             transforms=[
                                 random_axis_flip(axis=1, flip_prob=0.5),
                                 random_axis_flip(axis=0, flip_prob=0.5),
                                 _dict_to_tuple
                             ],
                             cache_after_parse=False)

    # create model
    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(4, kernel_size=3, padding='same'),
        tf.keras.layers.Conv2D(1, kernel_size=3, padding='same'),
    ])

    # train.
    model.compile(loss='mse')
    loss_before = model.evaluate(dataset)
    model.fit(dataset, epochs=5)
    loss_after = model.evaluate(dataset)
    assert loss_before * 0.95 >= loss_after