def test_auto_contrast_invalid_cutoff_param_c(): """ Test AutoContrast C Op with invalid cutoff parameter """ logger.info("Test AutoContrast C Op with invalid cutoff parameter") try: ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = ds.map(input_columns=["image"], operations=[ C.Decode(), C.Resize((224, 224)), lambda img: np.array(img[:, :, 0]) ]) # invalid ignore ds = ds.map(input_columns="image", operations=C.AutoContrast(cutoff=-10.0)) except ValueError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Input cutoff is not within the required interval of (0 to 100)." in str( error) try: ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = ds.map(input_columns=["image"], operations=[ C.Decode(), C.Resize((224, 224)), lambda img: np.array(img[:, :, 0]) ]) # invalid ignore ds = ds.map(input_columns="image", operations=C.AutoContrast(cutoff=120.0)) except ValueError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Input cutoff is not within the required interval of (0 to 100)." in str( error)
def test_random_sharpness_invalid_params(): """ Test RandomSharpness with invalid input parameters. """ logger.info("Test RandomSharpness with invalid input parameters.") try: data = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) data = data.map(input_columns=["image"], operations=[C.Decode(), C.Resize((224, 224)), C.RandomSharpness(10)]) except TypeError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "tuple" in str(error) try: data = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) data = data.map(input_columns=["image"], operations=[C.Decode(), C.Resize((224, 224)), C.RandomSharpness((-10, 10))]) except ValueError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "interval" in str(error) try: data = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) data = data.map(input_columns=["image"], operations=[C.Decode(), C.Resize((224, 224)), C.RandomSharpness((10, 5))]) except ValueError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "(min,max)" in str(error)
def test_random_affine_op_c(plot=False): """ Test RandomAffine in C transformations """ logger.info("test_random_affine_op_c") # define map operations transforms1 = [ c_vision.Decode(), c_vision.RandomAffine(degrees=0, translate=(0.5, 0.5, 0, 0)) ] transforms2 = [ c_vision.Decode() ] # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data1 = data1.map(input_columns=["image"], operations=transforms1) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = data2.map(input_columns=["image"], operations=transforms2) image_affine = [] image_original = [] for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): image1 = item1["image"] image2 = item2["image"] image_affine.append(image1) image_original.append(image2) if plot: visualize_list(image_original, image_affine)
def skip_test_random_posterize_op_fixed_point_c(plot=False, run_golden=True): """ Test RandomPosterize in C transformations with fixed point """ logger.info("test_random_posterize_op_c") # define map operations transforms1 = [ c_vision.Decode(), c_vision.RandomPosterize(1) ] # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data1 = data1.map(input_columns=["image"], operations=transforms1) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = data2.map(input_columns=["image"], operations=[c_vision.Decode()]) image_posterize = [] image_original = [] for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): image1 = item1["image"] image2 = item2["image"] image_posterize.append(image1) image_original.append(image2) if run_golden: # check results with md5 comparison filename = "random_posterize_fixed_point_01_result_c.npz" save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN) if plot: visualize_list(image_original, image_posterize)
def test_auto_contrast_invalid_ignore_param_c(): """ Test AutoContrast C Op with invalid ignore parameter """ logger.info("Test AutoContrast C Op with invalid ignore parameter") try: ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = ds.map(input_columns=["image"], operations=[ C.Decode(), C.Resize((224, 224)), lambda img: np.array(img[:, :, 0]) ]) # invalid ignore ds = ds.map(input_columns="image", operations=C.AutoContrast(ignore=255.5)) except TypeError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Argument ignore with value 255.5 is not of type" in str(error) try: ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = ds.map(input_columns=["image"], operations=[ C.Decode(), C.Resize((224, 224)), lambda img: np.array(img[:, :, 0]) ]) # invalid ignore ds = ds.map(input_columns="image", operations=C.AutoContrast(ignore=(10, 100))) except TypeError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Argument ignore with value (10,100) is not of type" in str( error)
def test_pipeline(): """ Test that our configuration pipeline works when we set parameters at dataset interval """ data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) ds.config.set_num_parallel_workers(2) data1 = data1.map(input_columns=["image"], operations=[vision.Decode(True)]) ds.serialize(data1, "testpipeline.json") data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) ds.config.set_num_parallel_workers(4) data2 = data2.map(input_columns=["image"], operations=[vision.Decode(True)]) ds.serialize(data2, "testpipeline2.json") # check that the generated output is different assert (filecmp.cmp('testpipeline.json', 'testpipeline2.json')) # this test passes currently because our num_parallel_workers don't get updated. # remove generated jason files file_list = glob.glob('*.json') for f in file_list: try: os.remove(f) except IOError: logger.info("Error while deleting: {}".format(f))
def test_random_sharpness_c(degrees=(1.6, 1.6), plot=False): """ Test RandomSharpness cpp op """ print(degrees) logger.info("Test RandomSharpness cpp op") # Original Images data = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) transforms_original = [C.Decode(), C.Resize((224, 224))] ds_original = data.map(input_columns="image", operations=transforms_original) ds_original = ds_original.batch(512) for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = image else: images_original = np.append(images_original, image, axis=0) # Random Sharpness Adjusted Images data = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) c_op = C.RandomSharpness() if degrees is not None: c_op = C.RandomSharpness(degrees) transforms_random_sharpness = [C.Decode(), C.Resize((224, 224)), c_op] ds_random_sharpness = data.map(input_columns="image", operations=transforms_random_sharpness) ds_random_sharpness = ds_random_sharpness.batch(512) for idx, (image, _) in enumerate(ds_random_sharpness): if idx == 0: images_random_sharpness = image else: images_random_sharpness = np.append(images_random_sharpness, image, axis=0) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_random_sharpness[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse)))) if plot: visualize_list(images_original, images_random_sharpness)
def test_cpp_uniform_augment(plot=False, num_ops=2): """ Test UniformAugment """ logger.info("Test CPP UniformAugment") # Original Images ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) transforms_original = [C.Decode(), C.Resize(size=[224, 224]), F.ToTensor()] ds_original = ds.map(input_columns="image", operations=transforms_original) ds_original = ds_original.batch(512) for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = np.transpose(image, (0, 2, 3, 1)) else: images_original = np.append(images_original, np.transpose(image, (0, 2, 3, 1)), axis=0) # UniformAugment Images ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) transforms_ua = [C.RandomCrop(size=[224, 224], padding=[32, 32, 32, 32]), C.RandomHorizontalFlip(), C.RandomVerticalFlip(), C.RandomColorAdjust(), C.RandomRotation(degrees=45)] uni_aug = C.UniformAugment(operations=transforms_ua, num_ops=num_ops) transforms_all = [C.Decode(), C.Resize(size=[224, 224]), uni_aug, F.ToTensor()] ds_ua = ds.map(input_columns="image", operations=transforms_all, num_parallel_workers=1) ds_ua = ds_ua.batch(512) for idx, (image, _) in enumerate(ds_ua): if idx == 0: images_ua = np.transpose(image, (0, 2, 3, 1)) else: images_ua = np.append(images_ua, np.transpose(image, (0, 2, 3, 1)), axis=0) if plot: visualize_list(images_original, images_ua) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_ua[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse))))
def test_random_sharpness_c_py(degrees=(1.0, 1.0), plot=False): """ Test Random Sharpness C and python Op """ logger.info("Test RandomSharpness C and python Op") # RandomSharpness Images data = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) data = data.map(input_columns=["image"], operations=[C.Decode(), C.Resize((200, 300))]) python_op = F.RandomSharpness(degrees) c_op = C.RandomSharpness(degrees) transforms_op = F.ComposeOp( [lambda img: F.ToPIL()(img.astype(np.uint8)), python_op, np.array])() ds_random_sharpness_py = data.map(input_columns="image", operations=transforms_op) ds_random_sharpness_py = ds_random_sharpness_py.batch(512) for idx, (image, _) in enumerate(ds_random_sharpness_py): if idx == 0: images_random_sharpness_py = image else: images_random_sharpness_py = np.append(images_random_sharpness_py, image, axis=0) data = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) data = data.map(input_columns=["image"], operations=[C.Decode(), C.Resize((200, 300))]) ds_images_random_sharpness_c = data.map(input_columns="image", operations=c_op) ds_images_random_sharpness_c = ds_images_random_sharpness_c.batch(512) for idx, (image, _) in enumerate(ds_images_random_sharpness_c): if idx == 0: images_random_sharpness_c = image else: images_random_sharpness_c = np.append(images_random_sharpness_c, image, axis=0) num_samples = images_random_sharpness_c.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_random_sharpness_c[i], images_random_sharpness_py[i]) logger.info("MSE= {}".format(str(np.mean(mse)))) if plot: visualize_list(images_random_sharpness_c, images_random_sharpness_py, visualize_mode=2)
def test_compare_random_color_op(degrees=None, plot=False): """ Compare Random Color op in Python and Cpp """ logger.info("test_random_color_op") original_seed = config_get_set_seed(5) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Decode with rgb format set to True data1 = ds.TFRecordDataset(C_DATA_DIR, C_SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(C_DATA_DIR, C_SCHEMA_DIR, columns_list=["image"], shuffle=False) if degrees is None: c_op = vision.RandomColor() p_op = F.RandomColor() else: c_op = vision.RandomColor(degrees) p_op = F.RandomColor(degrees) transforms_random_color_py = F.ComposeOp( [lambda img: img.astype(np.uint8), F.ToPIL(), p_op, np.array]) data1 = data1.map(input_columns=["image"], operations=[vision.Decode(), c_op]) data2 = data2.map(input_columns=["image"], operations=[vision.Decode()]) data2 = data2.map(input_columns=["image"], operations=transforms_random_color_py()) image_random_color_op = [] image = [] for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): actual = item1["image"] expected = item2["image"] image_random_color_op.append(actual) image.append(expected) assert actual.shape == expected.shape mse = diff_mse(actual, expected) logger.info("MSE= {}".format(str(np.mean(mse)))) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers) if plot: visualize_list(image, image_random_color_op)
def test_auto_contrast_c(plot=False): """ Test AutoContrast C Op """ logger.info("Test AutoContrast C Op") # AutoContrast Images ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = ds.map(input_columns=["image"], operations=[C.Decode(), C.Resize((224, 224))]) python_op = F.AutoContrast() c_op = C.AutoContrast() transforms_op = F.ComposeOp( [lambda img: F.ToPIL()(img.astype(np.uint8)), python_op, np.array])() ds_auto_contrast_py = ds.map(input_columns="image", operations=transforms_op) ds_auto_contrast_py = ds_auto_contrast_py.batch(512) for idx, (image, _) in enumerate(ds_auto_contrast_py): if idx == 0: images_auto_contrast_py = image else: images_auto_contrast_py = np.append(images_auto_contrast_py, image, axis=0) ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = ds.map(input_columns=["image"], operations=[C.Decode(), C.Resize((224, 224))]) ds_auto_contrast_c = ds.map(input_columns="image", operations=c_op) ds_auto_contrast_c = ds_auto_contrast_c.batch(512) for idx, (image, _) in enumerate(ds_auto_contrast_c): if idx == 0: images_auto_contrast_c = image else: images_auto_contrast_c = np.append(images_auto_contrast_c, image, axis=0) num_samples = images_auto_contrast_c.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_auto_contrast_c[i], images_auto_contrast_py[i]) logger.info("MSE= {}".format(str(np.mean(mse)))) np.testing.assert_equal(np.mean(mse), 0.0) if plot: visualize_list(images_auto_contrast_c, images_auto_contrast_py, visualize_mode=2)
def test_invert_py_c(plot=False): """ Test Invert Cpp op and python op """ logger.info("Test Invert cpp and python op") # Invert Images in cpp ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = ds.map(input_columns=["image"], operations=[C.Decode(), C.Resize((224, 224))]) ds_c_invert = ds.map(input_columns="image", operations=C.Invert()) ds_c_invert = ds_c_invert.batch(512) for idx, (image, _) in enumerate(ds_c_invert): if idx == 0: images_c_invert = image else: images_c_invert = np.append(images_c_invert, image, axis=0) # invert images in python ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = ds.map(input_columns=["image"], operations=[C.Decode(), C.Resize((224, 224))]) transforms_p_invert = F.ComposeOp( [lambda img: img.astype(np.uint8), F.ToPIL(), F.Invert(), np.array]) ds_p_invert = ds.map(input_columns="image", operations=transforms_p_invert()) ds_p_invert = ds_p_invert.batch(512) for idx, (image, _) in enumerate(ds_p_invert): if idx == 0: images_p_invert = image else: images_p_invert = np.append(images_p_invert, image, axis=0) num_samples = images_c_invert.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_p_invert[i], images_c_invert[i]) logger.info("MSE= {}".format(str(np.mean(mse)))) if plot: visualize_list(images_c_invert, images_p_invert, visualize_mode=2)
def test_random_color_c(degrees=(0.1, 1.9), plot=False, run_golden=True): """ Test Cpp RandomColor """ logger.info("test_random_color_op") original_seed = config_get_set_seed(10) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Decode with rgb format set to True data1 = ds.TFRecordDataset(C_DATA_DIR, C_SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(C_DATA_DIR, C_SCHEMA_DIR, columns_list=["image"], shuffle=False) # Serialize and Load dataset requires using vision.Decode instead of vision.Decode(). if degrees is None: c_op = vision.RandomColor() else: c_op = vision.RandomColor(degrees) data1 = data1.map(input_columns=["image"], operations=[vision.Decode()]) data2 = data2.map(input_columns=["image"], operations=[vision.Decode(), c_op]) image_random_color_op = [] image = [] for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): actual = item1["image"] expected = item2["image"] image.append(actual) image_random_color_op.append(expected) if run_golden: # Compare with expected md5 from images filename = "random_color_op_02_result.npz" save_and_check_md5(data2, filename, generate_golden=GENERATE_GOLDEN) if plot: visualize_list(image, image_random_color_op) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers((original_num_parallel_workers))
def test_random_posterize_op_c(plot=False, run_golden=False): """ Test RandomPosterize in C transformations (uses assertion on mse as using md5 could have jpeg decoding inconsistencies) """ logger.info("test_random_posterize_op_c") original_seed = config_get_set_seed(55) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # define map operations transforms1 = [ c_vision.Decode(), c_vision.RandomPosterize((1, 8)) ] # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data1 = data1.map(input_columns=["image"], operations=transforms1) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = data2.map(input_columns=["image"], operations=[c_vision.Decode()]) image_posterize = [] image_original = [] for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): image1 = item1["image"] image2 = item2["image"] image_posterize.append(image1) image_original.append(image2) # check mse as md5 can be inconsistent. # mse = 2.9668956 is calculated from # a thousand runs of diff_mse(np.array(image_original), np.array(image_posterize)) that all produced the same mse. # allow for an error of 0.0000005 assert abs(2.9668956 - diff_mse(np.array(image_original), np.array(image_posterize))) <= 0.0000005 if run_golden: # check results with md5 comparison filename = "random_posterize_01_result_c.npz" save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN) if plot: visualize_list(image_original, image_posterize) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_cache_nomap_basic5(): """ A TF reader dataset (a non mappable dataset) with a cache over it just after the leaf Same as test 3, but this one does not have shuffle arg, causing tf to default to global shuffle which attempts to inject a shuffle operator. However, since there is a cache we do not need global shuffle, so the shuffle will not be built. It ends up being identical to test basic 3, however we arrive at the same tree in different codepaths (if there was no cache, then the shuffle IS built) Repeat | Map(decode) | Cache | TFReader """ logger.info("Test cache nomap basic 5") # This dataset has 3 records in it only some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True) ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], cache=some_cache) decode_op = c_vision.Decode() ds1 = ds1.map(input_columns=["image"], operations=decode_op) ds1 = ds1.repeat(4) num_iter = 0 for _ in ds1.create_dict_iterator(): num_iter += 1 logger.info("Number of data in ds1: {} ".format(num_iter)) assert num_iter == 12 logger.info("test_cache_nomap_basic5 Ended.\n")
def test_random_affine_c_md5(): """ Test RandomAffine C Op with md5 comparison """ logger.info("test_random_affine_c_md5") original_seed = config_get_set_seed(1) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # define map operations transforms = [ c_vision.Decode(), c_vision.RandomAffine(degrees=(-5, 15), translate=(0.1, 0.3), scale=(0.9, 1.1), shear=(-10, 10, -5, 5)) ] # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data = data.map(input_columns=["image"], operations=transforms) # check results with md5 comparison filename = "random_affine_01_c_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers((original_num_parallel_workers))
def test_random_crop_08_c(): """ Test RandomCrop op with c_transforms: padding_mode is Border.EDGE, expected to pass """ logger.info("test_random_crop_08_c") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # Note: The padding_mode is Border.EDGE. random_crop_op = c_vision.RandomCrop(512, [200, 200, 200, 200], padding_mode=mode.Border.EDGE) decode_op = c_vision.Decode() data = data.map(input_columns=["image"], operations=decode_op) data = data.map(input_columns=["image"], operations=random_crop_op) filename = "random_crop_08_c_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_crop_05_c(): """ Test RandomCrop op with c_transforms: input image size < crop size but pad_if_needed is enabled, expected to pass """ logger.info("test_random_crop_05_c") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # Note: The size of the image is 4032*2268 random_crop_op = c_vision.RandomCrop([2268, 4033], [200, 200, 200, 200], pad_if_needed=True) decode_op = c_vision.Decode() data = data.map(input_columns=["image"], operations=decode_op) data = data.map(input_columns=["image"], operations=random_crop_op) filename = "random_crop_05_c_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_crop_op_c(plot=False): """ Test RandomCrop Op in c transforms """ logger.info("test_random_crop_op_c") # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) random_crop_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200]) decode_op = c_vision.Decode() data1 = data1.map(input_columns=["image"], operations=decode_op) data1 = data1.map(input_columns=["image"], operations=random_crop_op) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = data2.map(input_columns=["image"], operations=decode_op) image_cropped = [] image = [] for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): image1 = item1["image"] image2 = item2["image"] image_cropped.append(image1) image.append(image2) if plot: visualize(image, image_cropped)
def create_ssd_dataset(mindrecord_file, batch_size=32, repeat_num=10, device_num=1, rank=0, is_training=True, num_parallel_workers=4): """Creatr SSD dataset with MindDataset.""" ds = de.MindDataset(mindrecord_file, columns_list=["img_id", "image", "annotation"], num_shards=device_num, shard_id=rank, num_parallel_workers=num_parallel_workers, shuffle=is_training) decode = C.Decode() ds = ds.map(input_columns=["image"], operations=decode) change_swap_op = C.HWC2CHW() normalize_op = C.Normalize(mean=[0.485*255, 0.456*255, 0.406*255], std=[0.229*255, 0.224*255, 0.225*255]) color_adjust_op = C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4) compose_map_func = (lambda img_id, image, annotation: preprocess_fn(img_id, image, annotation, is_training)) if is_training: output_columns = ["image", "box", "label", "num_match"] trans = [color_adjust_op, normalize_op, change_swap_op] else: output_columns = ["img_id", "image", "image_shape"] trans = [normalize_op, change_swap_op] ds = ds.map(input_columns=["img_id", "image", "annotation"], output_columns=output_columns, columns_order=output_columns, operations=compose_map_func, python_multiprocessing=is_training, num_parallel_workers=num_parallel_workers) ds = ds.map(input_columns=["image"], operations=trans, python_multiprocessing=is_training, num_parallel_workers=num_parallel_workers) ds = ds.batch(batch_size, drop_remainder=True) ds = ds.repeat(repeat_num) return ds
def test_random_crop_op(): """ Test RandomCropAndResize op """ logger.info("test_random_crop_and_resize_op") # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) random_crop_op = vision.RandomCrop([512, 512], [200, 200, 200, 200]) decode_op = vision.Decode() data1 = data1.map(input_columns=["image"], operations=decode_op) data1 = data1.map(input_columns=["image"], operations=random_crop_op) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = data2.map(input_columns=["image"], operations=decode_op) for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): image1 = item1["image"] image2 = item2["image"]
def test_exception_02(): """ Test multiple exceptions with invalid input """ logger.info("test_exception_02") num_samples = 0 with pytest.raises(ValueError) as info: data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_samples=num_samples) assert "num_samples must be greater than 0" in str(info.value) num_samples = -1 with pytest.raises(ValueError) as info: data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_samples=num_samples) assert "num_samples must be greater than 0" in str(info.value) num_samples = 1 data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_samples=num_samples) data = data.map(input_columns=["image"], operations=vision.Decode()) data = data.map(input_columns=["image"], operations=vision.Resize((100, 100))) # Confirm 1 sample in dataset assert sum([1 for _ in data]) == 1 num_iters = 0 for _ in data.create_dict_iterator(): num_iters += 1 assert num_iters == 1
def test_imagefolder_padded_with_decode_and_get_dataset_size(): num_shards = 5 count = 0 for shard_id in range(num_shards): DATA_DIR = "../data/dataset/testPK/data" data = ds.ImageFolderDatasetV2(DATA_DIR) white_io = BytesIO() Image.new('RGB', (224, 224), (255, 255, 255)).save(white_io, 'JPEG') padded_sample = {} padded_sample['image'] = np.array(bytearray(white_io.getvalue()), dtype='uint8') padded_sample['label'] = np.array(-1, np.int32) white_samples = [ padded_sample, padded_sample, padded_sample, padded_sample ] data2 = ds.PaddedDataset(white_samples) data3 = data + data2 testsampler = ds.DistributedSampler(num_shards=num_shards, shard_id=shard_id, shuffle=False, num_samples=None) data3.use_sampler(testsampler) shard_dataset_size = data3.get_dataset_size() data3 = data3.map(input_columns="image", operations=V_C.Decode()) shard_sample_count = 0 for ele in data3.create_dict_iterator(): print("label: {}".format(ele['label'])) count += 1 shard_sample_count += 1 assert shard_sample_count in (9, 10) assert shard_dataset_size == shard_sample_count assert count == 48
def test_cache_nomap_basic7(): """ A TF reader dataset (a non mappable dataset) that uses global shuffle, and is cached followed by map. In this one, the tf dataset with global shuffle might want to inject a shuffle op over top of the tf reader, but since a cache is given, it will choose not to. Repeat | Map(decode) | cache | TFReader """ logger.info("Test cache nomap basic 7") # This dataset has 3 records in it only some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True) ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=ds.Shuffle.GLOBAL, cache=some_cache) decode_op = c_vision.Decode() ds1 = ds1.map(input_columns=["image"], operations=decode_op) ds1 = ds1.repeat(4) num_iter = 0 for _ in ds1.create_dict_iterator(): num_iter += 1 logger.info("Number of data in ds1: {} ".format(num_iter)) assert num_iter == 12 logger.info("test_cache_nomap_basic7 Ended.\n")
def test_cache_nomap_basic3(): """ A TF reader dataset (a non mappable dataset) with a cache over it just after the leaf Repeat | Map(decode) | Cache | TFReader """ logger.info("Test cache nomap basic 3") some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True) ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False, cache=some_cache) decode_op = c_vision.Decode() ds1 = ds1.map(input_columns=["image"], operations=decode_op) ds1 = ds1.repeat(4) num_iter = 0 for _ in ds1.create_dict_iterator(): num_iter += 1 logger.info("Number of data in ds1: {} ".format(num_iter)) assert num_iter == 12 logger.info("test_cache_nomap_basic3 Ended.\n")
def test_random_crop_01_c(): """ Test RandomCrop op with c_transforms: size is a single integer, expected to pass """ logger.info("test_random_crop_01_c") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # Note: If size is an int, a square crop of size (size, size) is returned. random_crop_op = c_vision.RandomCrop(512) decode_op = c_vision.Decode() data = data.map(input_columns=["image"], operations=decode_op) data = data.map(input_columns=["image"], operations=random_crop_op) filename = "random_crop_01_c_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_resize_op(plot=False): """ Test random_resize_op """ logger.info("Test resize") data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # define map operations decode_op = vision.Decode() resize_op = vision.RandomResize(10) # apply map operations on images data1 = data1.map(input_columns=["image"], operations=decode_op) data2 = data1.map(input_columns=["image"], operations=resize_op) image_original = [] image_resized = [] num_iter = 0 for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): image_1 = item1["image"] image_2 = item2["image"] image_original.append(image_1) image_resized.append(image_2) num_iter += 1 if plot: visualize_list(image_original, image_resized)
def test_random_crop_02_c(): """ Test RandomCrop op with c_transforms: size is a list/tuple with length 2, expected to pass """ logger.info("test_random_crop_02_c") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # Note: If size is a sequence of length 2, it should be (height, width). random_crop_op = c_vision.RandomCrop([512, 375]) decode_op = c_vision.Decode() data = data.map(input_columns=["image"], operations=decode_op) data = data.map(input_columns=["image"], operations=random_crop_op) filename = "random_crop_02_c_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_me_de_train_dataset(): data_list = ["{0}/train-00001-of-01024.data".format(data_path)] data_set_new = ds.TFRecordDataset( data_list, schema=SCHEMA_DIR, columns_list=["image/encoded", "image/class/label"]) resize_height = 224 resize_width = 224 rescale = 1.0 / 255.0 shift = 0.0 # define map operations decode_op = vision.Decode() resize_op = vision.Resize((resize_height, resize_width), Inter.LINEAR) # Bilinear as default rescale_op = vision.Rescale(rescale, shift) # apply map operations on images data_set_new = data_set_new.map(input_columns="image/encoded", operations=decode_op) data_set_new = data_set_new.map(input_columns="image/encoded", operations=resize_op) data_set_new = data_set_new.map(input_columns="image/encoded", operations=rescale_op) hwc2chw_op = vision.HWC2CHW() data_set_new = data_set_new.map(input_columns="image/encoded", operations=hwc2chw_op) data_set_new = data_set_new.repeat(1) # apply batch operations batch_size_new = 32 data_set_new = data_set_new.batch(batch_size_new, drop_remainder=True) return data_set_new
def test_random_crop_and_resize_05_c(): """ Test RandomCropAndResize with c_transforms: invalid range of ratio (max<min), expected to raise ValueError """ logger.info("test_random_crop_and_resize_05_c") ds.config.set_seed(0) ds.config.set_num_parallel_workers(1) try: # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() random_crop_and_resize_op = c_vision.RandomResizedCrop( (256, 512), (1, 1), (1, 0.5)) # If input range of ratio is not in the order of (min, max), ValueError will be raised. data = data.map(input_columns=["image"], operations=decode_op) data = data.map(input_columns=["image"], operations=random_crop_and_resize_op) image_list = [] for item in data.create_dict_iterator(): image = item["image"] image_list.append(image.shape) except ValueError as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "Input range is not valid" in str(e)