def test_should_save_and_read_pairs_correctly(batch_size): images_dataset: DictsDataset paths_dataset: DictsDataset images_dataset, paths_dataset = gen.dicts_dataset(batch_size=batch_size, paired=True, save_on_disc=True) raw_dataset_fragment = testing_helpers.dicts_dataset_to_raw_dataset_fragment( images_dataset) dataset_desc = gen.dataset_desc( storage_method=DatasetStorageMethod.ON_DISC, image_dimensions=ImageDimensions(testing_consts.TEST_IMAGE_SIZE)) dataset_spec = gen.dataset_spec(description=dataset_desc, raw_dataset_fragment=raw_dataset_fragment) tfrecord_full_path = preparing_data.save_to_tfrecord( paths_dataset.features, paths_dataset.labels, 'data', dataset_spec) assert utils.check_filepath(tfrecord_full_path, is_directory=False, is_empty=False) dataset = reading_tfrecords.assemble_dataset(tfrecord_full_path.parent, dataset_spec) dataset = dataset.repeat() dataset = dataset.batch(batch_size) first_batch = dataset.make_one_shot_iterator().get_next() _check_paired_result( first_batch, (images_dataset.features.left, images_dataset.features.right), images_dataset.labels)
def fake_dataset(request): image_dims = extract_dimensions_or_default(request) print("Creating fake dicts dataset with dims {}".format(image_dims)) return gen.dicts_dataset( paired=True, image_dims=image_dims, batch_size=testing_consts.FAKE_IMAGES_IN_DATASET_COUNT)
def test_should_create_correctly_sized_sprite(sprite_expected_side_length, is_rgb, with_border): image_dims = ImageDimensions(20, 20, 3 if is_rgb else 1) features = gen.dicts_dataset(batch_size=150, image_dims=image_dims, paired=True, normalize=True).features expected_dims = ImageDimensions(sprite_expected_side_length) sprite = generate_sprites.create_sprite_image(features=features, expected_dims=expected_dims, with_border=with_border) assert sprite.height == sprite_expected_side_length assert np.array(sprite).max( ) > 0 # make sure image is not black due to PIL poor float to uint conversion
def test_should_save_and_read_unpaired_correctly(batch_size): images_dataset: DictsDataset = gen.dicts_dataset(batch_size=batch_size, paired=False) tfrecord_full_path = preparing_data.save_to_tfrecord( images_dataset.features, images_dataset.labels, 'data', gen.dataset_spec(paired=False)) assert utils.check_filepath(tfrecord_full_path, is_directory=False, is_empty=False) dataset = reading_tfrecords.assemble_dataset( tfrecord_full_path.parent, gen.dataset_spec(paired=False)) dataset = dataset.repeat() dataset = dataset.batch(batch_size) iterator = dataset.make_one_shot_iterator() first_batch = iterator.get_next() _check_result(first_batch, images_dataset.features.all, images_dataset.labels)
def test_should_include_reduced_size_in_path(expected_size, should_image_size_be_reduced): images_dataset: DictsDataset paths_dataset: DictsDataset images_dataset, paths_dataset = gen.dicts_dataset(save_on_disc=True) dataset_desc = gen.dataset_desc( storage_method=DatasetStorageMethod.ON_DISC, image_dimensions=ImageDimensions(expected_size)) raw_dataset_fragment = testing_helpers.dicts_dataset_to_raw_dataset_fragment( images_dataset) dataset_spec = gen.dataset_spec(description=dataset_desc, raw_dataset_fragment=raw_dataset_fragment, paired=False) tfrecord_full_path = preparing_data.save_to_tfrecord( paths_dataset.features, paths_dataset.labels, 'data', dataset_spec) parts = tfrecord_full_path.parts if should_image_size_be_reduced: assert ("size_" + str(expected_size[0])) in parts else: assert_that(parts, not_(contains("size_" + str(expected_size[0]))))
def preparing_dataset(*args, **kwargs): dicts_dataset = gen.dicts_dataset( paired=True, image_dims=image_dims, batch_size=testing_consts.FAKE_IMAGES_IN_DATASET_COUNT) return tf.data.Dataset.from_tensor_slices(dicts_dataset.as_tuple())