def test_shuffle(dataset: ILSVRCDataset) -> None: """Tests that the shuffling flag works as expected. Also tests that filenames and labels are still properly mapped. :param dataset: the dataset. """ dataset.trim_dataset(DATASET_FRACTION) x_train_filenames = dataset.partition[TRAIN_KEY] y_train = dataset.get_labels(x_train_filenames, True, NUM_CLASSES) train_sequence = ImageDatasetSequence(x_train_filenames, y=y_train, batch_size=BATCH_SIZE, image_target_size=IMAGE_TARGET_SIZE, batch_augment_fn=None, batch_format_fn=None, overfit_single_batch=False, shuffle_on_epoch_end=True) img_to_label_before = {} for batch in train_sequence: x_batch, y_batch = batch for i in range(x_batch.shape[0]): img_data = tuple(x_batch[i].flatten()) label = tuple(y_batch[i]) img_to_label_before[img_data] = label # Test shuffle. first_batch_before = train_sequence.__getitem__(0) train_sequence.on_epoch_end() first_batch_after = train_sequence.__getitem__(0) assert (first_batch_before[0] != first_batch_after[0]).any() # Test filename/label mappings. for batch in train_sequence: x_batch, y_batch = batch for i in range(x_batch.shape[0]): img_data = tuple(x_batch[i].flatten()) label = tuple(y_batch[i]) assert img_to_label_before[img_data] == label
def test_images(dataset: ILSVRCDataset) -> None: """Tests that the sequence output images meet expected standards. :param dataset: the dataset. """ dataset.trim_dataset(DATASET_FRACTION) x_train_filenames = dataset.partition[TRAIN_KEY] y_train = dataset.get_labels(x_train_filenames, True, NUM_CLASSES) train_sequence = ImageDatasetSequence(x_train_filenames, y=y_train, batch_size=BATCH_SIZE, image_target_size=IMAGE_TARGET_SIZE, batch_augment_fn=None, batch_format_fn=None, overfit_single_batch=False, shuffle_on_epoch_end=True) # Test that only the last batch is not of length BATCH_SIZE. # Also test that there are the correct number of batches. on_last_batch = False num_batches_seen = 0 for batch in train_sequence: assert not on_last_batch x_batch, y_batch = batch # Take the first image/label pair and check that it meets standards. # Check that the image is of the right size. assert x_batch[0].shape == IMAGE_TARGET_SIZE + (3, ) # Check that the image is of the right datatype. assert x_batch.dtype == np.float32 # Check that the image is normalized. assert (0.0 <= x_batch.flatten()).all() assert (x_batch.flatten() <= 1.0).all() # Check that the label is categorical and of the right dimension. assert y_batch.shape[1] == NUM_CLASSES # Check that the label is of the right datatype. assert y_batch.dtype == np.float32 # Check that the label is one-hot. for label in y_batch: assert sum(label) == 1 on_last_batch = not (x_batch.shape[0] == BATCH_SIZE and y_batch.shape[0] == BATCH_SIZE) num_batches_seen += 1 assert num_batches_seen == len(train_sequence)
def test_overfit_single_batch(dataset: ILSVRCDataset) -> None: """Tests that the same batch of images is always presented to the model if overfitting on a single batch. :param dataset: the dataset. """ dataset.trim_dataset(DATASET_FRACTION) x_train_filenames = dataset.partition[TRAIN_KEY] y_train = dataset.get_labels(x_train_filenames, True, NUM_CLASSES) # Test that you can't set overfit and shuffle flags together. train_sequence = ImageDatasetSequence(x_train_filenames, y=y_train, batch_size=BATCH_SIZE, image_target_size=IMAGE_TARGET_SIZE, batch_augment_fn=None, batch_format_fn=None, overfit_single_batch=True, shuffle_on_epoch_end=True) with pytest.raises(ValueError): for _ in train_sequence: pass # Test that you always get the same batch, even after multiple epochs. train_sequence = ImageDatasetSequence(x_train_filenames, y=y_train, batch_size=BATCH_SIZE, image_target_size=IMAGE_TARGET_SIZE, batch_augment_fn=None, batch_format_fn=None, overfit_single_batch=True, shuffle_on_epoch_end=False) num_batches_epoch_1 = 0 for batch in train_sequence: assert (batch[0] == train_sequence.__getitem__(0)[0]).all() num_batches_epoch_1 += 1 train_sequence.on_epoch_end() num_batches_epoch_2 = 0 for batch in train_sequence: assert (batch[0] == train_sequence.__getitem__(0)[0]).all() num_batches_epoch_2 += 1 assert num_batches_epoch_1 == num_batches_epoch_2