def test_random_affine_op_c(plot=False): """ Test RandomAffine in C transformations """ logger.info("test_random_affine_op_c") # define map operations transforms1 = [ c_vision.Decode(), c_vision.RandomAffine(degrees=0, translate=(0.5, 0.5, 0, 0)) ] transforms2 = [ c_vision.Decode() ] # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data1 = data1.map(operations=transforms1, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = data2.map(operations=transforms2, input_columns=["image"]) image_affine = [] image_original = [] for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image1 = item1["image"] image2 = item2["image"] image_affine.append(image1) image_original.append(image2) if plot: visualize_list(image_original, image_affine)
def test_random_posterize_op_fixed_point_c(plot=False, run_golden=True): """ Test RandomPosterize in C transformations with fixed point """ logger.info("test_random_posterize_op_c") # define map operations transforms1 = [ c_vision.Decode(), c_vision.RandomPosterize(1) ] # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data1 = data1.map(operations=transforms1, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = data2.map(operations=[c_vision.Decode()], input_columns=["image"]) image_posterize = [] image_original = [] for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image1 = item1["image"] image2 = item2["image"] image_posterize.append(image1) image_original.append(image2) if run_golden: # check results with md5 comparison filename = "random_posterize_fixed_point_01_result_c.npz" save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN) if plot: visualize_list(image_original, image_posterize)
def test_auto_contrast_invalid_cutoff_param_c(): """ Test AutoContrast C Op with invalid cutoff parameter """ logger.info("Test AutoContrast C Op with invalid cutoff parameter") try: data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224)), lambda img: np.array(img[:, :, 0])], input_columns=["image"]) # invalid ignore data_set = data_set.map(operations=C.AutoContrast(cutoff=-10.0), input_columns="image") except ValueError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Input cutoff is not within the required interval of (0 to 100)." in str(error) try: data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224)), lambda img: np.array(img[:, :, 0])], input_columns=["image"]) # invalid ignore data_set = data_set.map(operations=C.AutoContrast(cutoff=120.0), input_columns="image") except ValueError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Input cutoff is not within the required interval of (0 to 100)." in str(error)
def test_auto_contrast_invalid_ignore_param_c(): """ Test AutoContrast C Op with invalid ignore parameter """ logger.info("Test AutoContrast C Op with invalid ignore parameter") try: data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[ C.Decode(), C.Resize((224, 224)), lambda img: np.array(img[:, :, 0]) ], input_columns=["image"]) # invalid ignore data_set = data_set.map(operations=C.AutoContrast(ignore=255.5), input_columns="image") except TypeError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Argument ignore with value 255.5 is not of type" in str(error) try: data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[ C.Decode(), C.Resize((224, 224)), lambda img: np.array(img[:, :, 0]) ], input_columns=["image"]) # invalid ignore data_set = data_set.map(operations=C.AutoContrast(ignore=(10, 100)), input_columns="image") except TypeError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Argument ignore with value (10,100) is not of type" in str( error)
def test_eager_exceptions(): try: img = "../data/dataset/apple.jpg" img = C.Decode()(img) assert False except TypeError as e: assert "Input should be a 1-D NumPy with integer type" in str(e) try: img = np.array(["a", "b", "c"]) img = C.Decode()(img) assert False except TypeError as e: assert "Input should be a 1-D NumPy with integer type" in str(e) try: img = cv2.imread("../data/dataset/apple.jpg") img = C.Resize(size=(-32, 32))(img) assert False except ValueError as e: assert "not within the required interval" in str(e) try: img = "../data/dataset/apple.jpg" img = C.Pad(padding=4)(img) assert False except TypeError as e: assert "Input should be NumPy or PIL image" in str(e)
def test_pipeline(): """ Test that our configuration pipeline works when we set parameters at different locations in dataset code """ # Save original configuration values num_parallel_workers_original = ds.config.get_num_parallel_workers() data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) data1 = data1.map(operations=[c_vision.Decode(True)], input_columns=["image"]) ds.serialize(data1, "testpipeline.json") data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, num_parallel_workers=num_parallel_workers_original, shuffle=False) data2 = data2.map(operations=[c_vision.Decode(True)], input_columns=["image"]) ds.serialize(data2, "testpipeline2.json") # check that the generated output is different assert filecmp.cmp('testpipeline.json', 'testpipeline2.json') # this test passes currently because our num_parallel_workers don't get updated. # remove generated jason files file_list = glob.glob('*.json') for f in file_list: try: os.remove(f) except IOError: logger.info("Error while deleting: {}".format(f)) # Restore original configuration values ds.config.set_num_parallel_workers(num_parallel_workers_original)
def test_equalize_py_c(plot=False): """ Test Equalize Cpp op and python op """ logger.info("Test Equalize cpp and python op") # equalize Images in cpp data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"]) ds_c_equalize = data_set.map(operations=C.Equalize(), input_columns="image") ds_c_equalize = ds_c_equalize.batch(512) for idx, (image, _) in enumerate(ds_c_equalize): if idx == 0: images_c_equalize = image.asnumpy() else: images_c_equalize = np.append(images_c_equalize, image.asnumpy(), axis=0) # Equalize images in python data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"]) transforms_p_equalize = mindspore.dataset.transforms.py_transforms.Compose( [lambda img: img.astype(np.uint8), F.ToPIL(), F.Equalize(), np.array]) ds_p_equalize = data_set.map(operations=transforms_p_equalize, input_columns="image") ds_p_equalize = ds_p_equalize.batch(512) for idx, (image, _) in enumerate(ds_p_equalize): if idx == 0: images_p_equalize = image.asnumpy() else: images_p_equalize = np.append(images_p_equalize, image.asnumpy(), axis=0) num_samples = images_c_equalize.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_p_equalize[i], images_c_equalize[i]) logger.info("MSE= {}".format(str(np.mean(mse)))) if plot: visualize_list(images_c_equalize, images_p_equalize, visualize_mode=2)
def test_compare_random_color_op(degrees=None, plot=False): """ Compare Random Color op in Python and Cpp """ logger.info("test_random_color_op") original_seed = config_get_set_seed(5) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Decode with rgb format set to True data1 = ds.TFRecordDataset(C_DATA_DIR, C_SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(C_DATA_DIR, C_SCHEMA_DIR, columns_list=["image"], shuffle=False) if degrees is None: c_op = vision.RandomColor() p_op = F.RandomColor() else: c_op = vision.RandomColor(degrees) p_op = F.RandomColor(degrees) transforms_random_color_py = mindspore.dataset.transforms.py_transforms.Compose( [lambda img: img.astype(np.uint8), F.ToPIL(), p_op, np.array]) data1 = data1.map(operations=[vision.Decode(), c_op], input_columns=["image"]) data2 = data2.map(operations=[vision.Decode()], input_columns=["image"]) data2 = data2.map(operations=transforms_random_color_py, input_columns=["image"]) image_random_color_op = [] image = [] for item1, item2 in zip( data1.create_dict_iterator(num_epochs=1, output_numpy=True), data2.create_dict_iterator(num_epochs=1, output_numpy=True)): actual = item1["image"] expected = item2["image"] image_random_color_op.append(actual) image.append(expected) assert actual.shape == expected.shape mse = diff_mse(actual, expected) logger.info("MSE= {}".format(str(np.mean(mse)))) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers) if plot: visualize_list(image, image_random_color_op)
def test_cpp_uniform_augment(plot=False, num_ops=2): """ Test UniformAugment """ logger.info("Test CPP UniformAugment") # Original Images data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) transforms_original = [C.Decode(), C.Resize(size=[224, 224]), F.ToTensor()] ds_original = data_set.map(operations=transforms_original, input_columns="image") ds_original = ds_original.batch(512) for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_original = np.append(images_original, np.transpose(image.asnumpy(), (0, 2, 3, 1)), axis=0) # UniformAugment Images data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) transforms_ua = [C.RandomCrop(size=[224, 224], padding=[32, 32, 32, 32]), C.RandomHorizontalFlip(), C.RandomVerticalFlip(), C.RandomColorAdjust(), C.RandomRotation(degrees=45)] uni_aug = C.UniformAugment(transforms=transforms_ua, num_ops=num_ops) transforms_all = [C.Decode(), C.Resize(size=[224, 224]), uni_aug, F.ToTensor()] ds_ua = data_set.map(operations=transforms_all, input_columns="image", num_parallel_workers=1) ds_ua = ds_ua.batch(512) for idx, (image, _) in enumerate(ds_ua): if idx == 0: images_ua = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_ua = np.append(images_ua, np.transpose(image.asnumpy(), (0, 2, 3, 1)), axis=0) if plot: visualize_list(images_original, images_ua) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_ua[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse))))
def test_random_sharpness_c(degrees=(1.6, 1.6), plot=False): """ Test RandomSharpness cpp op """ print(degrees) logger.info("Test RandomSharpness cpp op") # Original Images data = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) transforms_original = [C.Decode(), C.Resize((224, 224))] ds_original = data.map(operations=transforms_original, input_columns="image") ds_original = ds_original.batch(512) for idx, (image, _) in enumerate( ds_original.create_tuple_iterator(output_numpy=True)): if idx == 0: images_original = image else: images_original = np.append(images_original, image, axis=0) # Random Sharpness Adjusted Images data = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) c_op = C.RandomSharpness() if degrees is not None: c_op = C.RandomSharpness(degrees) transforms_random_sharpness = [C.Decode(), C.Resize((224, 224)), c_op] ds_random_sharpness = data.map(operations=transforms_random_sharpness, input_columns="image") ds_random_sharpness = ds_random_sharpness.batch(512) for idx, (image, _) in enumerate( ds_random_sharpness.create_tuple_iterator(output_numpy=True)): if idx == 0: images_random_sharpness = image else: images_random_sharpness = np.append(images_random_sharpness, image, axis=0) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_random_sharpness[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse)))) if plot: visualize_list(images_original, images_random_sharpness)
def test_auto_contrast_c(plot=False): """ Test AutoContrast C Op """ logger.info("Test AutoContrast C Op") # AutoContrast Images data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"]) python_op = F.AutoContrast(cutoff=10.0, ignore=[10, 20]) c_op = C.AutoContrast(cutoff=10.0, ignore=[10, 20]) transforms_op = mindspore.dataset.transforms.py_transforms.Compose([lambda img: F.ToPIL()(img.astype(np.uint8)), python_op, np.array]) ds_auto_contrast_py = data_set.map(operations=transforms_op, input_columns="image") ds_auto_contrast_py = ds_auto_contrast_py.batch(512) for idx, (image, _) in enumerate(ds_auto_contrast_py): if idx == 0: images_auto_contrast_py = image.asnumpy() else: images_auto_contrast_py = np.append(images_auto_contrast_py, image.asnumpy(), axis=0) data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"]) ds_auto_contrast_c = data_set.map(operations=c_op, input_columns="image") ds_auto_contrast_c = ds_auto_contrast_c.batch(512) for idx, (image, _) in enumerate(ds_auto_contrast_c): if idx == 0: images_auto_contrast_c = image.asnumpy() else: images_auto_contrast_c = np.append(images_auto_contrast_c, image.asnumpy(), axis=0) num_samples = images_auto_contrast_c.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_auto_contrast_c[i], images_auto_contrast_py[i]) logger.info("MSE= {}".format(str(np.mean(mse)))) np.testing.assert_equal(np.mean(mse), 0.0) # Compare with expected md5 from images filename = "autocontrast_01_result_c.npz" save_and_check_md5(ds_auto_contrast_c, filename, generate_golden=GENERATE_GOLDEN) if plot: visualize_list(images_auto_contrast_c, images_auto_contrast_py, visualize_mode=2)
def test_auto_contrast_one_channel_c(plot=False): """ Test AutoContrast C op with one channel """ logger.info("Test AutoContrast C Op With One Channel Images") # AutoContrast Images data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"]) python_op = F.AutoContrast() c_op = C.AutoContrast() # not using F.ToTensor() since it converts to floats transforms_op = mindspore.dataset.transforms.py_transforms.Compose( [lambda img: (np.array(img)[:, :, 0]).astype(np.uint8), F.ToPIL(), python_op, np.array]) ds_auto_contrast_py = data_set.map(operations=transforms_op, input_columns="image") ds_auto_contrast_py = ds_auto_contrast_py.batch(512) for idx, (image, _) in enumerate(ds_auto_contrast_py): if idx == 0: images_auto_contrast_py = image.asnumpy() else: images_auto_contrast_py = np.append(images_auto_contrast_py, image.asnumpy(), axis=0) data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224)), lambda img: np.array(img[:, :, 0])], input_columns=["image"]) ds_auto_contrast_c = data_set.map(operations=c_op, input_columns="image") ds_auto_contrast_c = ds_auto_contrast_c.batch(512) for idx, (image, _) in enumerate(ds_auto_contrast_c): if idx == 0: images_auto_contrast_c = image.asnumpy() else: images_auto_contrast_c = np.append(images_auto_contrast_c, image.asnumpy(), axis=0) num_samples = images_auto_contrast_c.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_auto_contrast_c[i], images_auto_contrast_py[i]) logger.info("MSE= {}".format(str(np.mean(mse)))) np.testing.assert_equal(np.mean(mse), 0.0) if plot: visualize_list(images_auto_contrast_c, images_auto_contrast_py, visualize_mode=2)
def test_cutmix_batch_success3(plot=False): """ Test CutMixBatch op with default values for alpha and prob on a batch of HWC images on ImageFolderDataset """ logger.info("test_cutmix_batch_success3") ds_original = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False) decode_op = vision.Decode() ds_original = ds_original.map(operations=[decode_op], input_columns=["image"]) resize_op = vision.Resize([224, 224]) ds_original = ds_original.map(operations=[resize_op], input_columns=["image"]) ds_original = ds_original.batch(4, pad_info={}, drop_remainder=True) images_original = None for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = image.asnumpy() else: images_original = np.append(images_original, image.asnumpy(), axis=0) # CutMix Images data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False) decode_op = vision.Decode() data1 = data1.map(operations=[decode_op], input_columns=["image"]) resize_op = vision.Resize([224, 224]) data1 = data1.map(operations=[resize_op], input_columns=["image"]) one_hot_op = data_trans.OneHot(num_classes=10) data1 = data1.map(operations=one_hot_op, input_columns=["label"]) cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC) data1 = data1.batch(4, pad_info={}, drop_remainder=True) data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"]) images_cutmix = None for idx, (image, _) in enumerate(data1): if idx == 0: images_cutmix = image.asnumpy() else: images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0) if plot: visualize_list(images_original, images_cutmix) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_cutmix[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse))))
def test_cutmix_batch_success4(plot=False): """ Test CutMixBatch on a dataset where OneHot returns a 2D vector """ logger.info("test_cutmix_batch_success4") ds_original = ds.CelebADataset(DATA_DIR3, shuffle=False) decode_op = vision.Decode() ds_original = ds_original.map(operations=[decode_op], input_columns=["image"]) resize_op = vision.Resize([224, 224]) ds_original = ds_original.map(operations=[resize_op], input_columns=["image"]) ds_original = ds_original.batch(2, drop_remainder=True) images_original = None for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = image.asnumpy() else: images_original = np.append(images_original, image.asnumpy(), axis=0) # CutMix Images data1 = ds.CelebADataset(dataset_dir=DATA_DIR3, shuffle=False) decode_op = vision.Decode() data1 = data1.map(operations=[decode_op], input_columns=["image"]) resize_op = vision.Resize([224, 224]) data1 = data1.map(operations=[resize_op], input_columns=["image"]) one_hot_op = data_trans.OneHot(num_classes=100) data1 = data1.map(operations=one_hot_op, input_columns=["attr"]) cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 0.5, 0.9) data1 = data1.batch(2, drop_remainder=True) data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "attr"]) images_cutmix = None for idx, (image, _) in enumerate(data1): if idx == 0: images_cutmix = image.asnumpy() else: images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0) if plot: visualize_list(images_original, images_cutmix) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_cutmix[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse))))
def test_random_posterize_op_c(plot=False, run_golden=False): """ Test RandomPosterize in C transformations (uses assertion on mse as using md5 could have jpeg decoding inconsistencies) """ logger.info("test_random_posterize_op_c") original_seed = config_get_set_seed(55) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # define map operations transforms1 = [c_vision.Decode(), c_vision.RandomPosterize((1, 8))] # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data1 = data1.map(operations=transforms1, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = data2.map(operations=[c_vision.Decode()], input_columns=["image"]) image_posterize = [] image_original = [] for item1, item2 in zip( data1.create_dict_iterator(num_epochs=1, output_numpy=True), data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image1 = item1["image"] image2 = item2["image"] image_posterize.append(image1) image_original.append(image2) # check mse as md5 can be inconsistent. # mse = 2.9668956 is calculated from # a thousand runs of diff_mse(np.array(image_original), np.array(image_posterize)) that all produced the same mse. # allow for an error of 0.0000005 assert abs(2.9668956 - diff_mse(np.array(image_original), np.array(image_posterize))) <= 0.0000005 if run_golden: # check results with md5 comparison filename = "random_posterize_01_result_c.npz" save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN) if plot: visualize_list(image_original, image_posterize) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_color_c(degrees=(0.1, 1.9), plot=False, run_golden=True): """ Test Cpp RandomColor """ logger.info("test_random_color_op") original_seed = config_get_set_seed(10) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Decode with rgb format set to True data1 = ds.TFRecordDataset(C_DATA_DIR, C_SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(C_DATA_DIR, C_SCHEMA_DIR, columns_list=["image"], shuffle=False) # Serialize and Load dataset requires using vision.Decode instead of vision.Decode(). if degrees is None: c_op = vision.RandomColor() else: c_op = vision.RandomColor(degrees) data1 = data1.map(operations=[vision.Decode()], input_columns=["image"]) data2 = data2.map(operations=[vision.Decode(), c_op], input_columns=["image"]) image_random_color_op = [] image = [] for item1, item2 in zip( data1.create_dict_iterator(num_epochs=1, output_numpy=True), data2.create_dict_iterator(num_epochs=1, output_numpy=True)): actual = item1["image"] expected = item2["image"] image.append(actual) image_random_color_op.append(expected) if run_golden: # Compare with expected md5 from images filename = "random_color_op_02_result.npz" save_and_check_md5(data2, filename, generate_golden=GENERATE_GOLDEN) if plot: visualize_list(image, image_random_color_op) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers((original_num_parallel_workers))
def test_mixup_batch_success4(plot=False): """ Test MixUpBatch op on a dataset where OneHot returns a 2D vector. Alpha parameter will be selected by default in this case """ logger.info("test_mixup_batch_success4") # Original Images ds_original = ds.CelebADataset(DATA_DIR3, shuffle=False) decode_op = vision.Decode() ds_original = ds_original.map(operations=[decode_op], input_columns=["image"]) ds_original = ds_original.batch(2, drop_remainder=True) images_original = None for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = image.asnumpy() else: images_original = np.append(images_original, image.asnumpy(), axis=0) # MixUp Images data1 = ds.CelebADataset(DATA_DIR3, shuffle=False) decode_op = vision.Decode() data1 = data1.map(operations=[decode_op], input_columns=["image"]) one_hot_op = data_trans.OneHot(num_classes=100) data1 = data1.map(operations=one_hot_op, input_columns=["attr"]) mixup_batch_op = vision.MixUpBatch() data1 = data1.batch(2, drop_remainder=True) data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "attr"]) images_mixup = np.array([]) for idx, (image, _) in enumerate(data1): if idx == 0: images_mixup = image.asnumpy() else: images_mixup = np.append(images_mixup, image.asnumpy(), axis=0) if plot: visualize_list(images_original, images_mixup) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_mixup[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse))))
def test_mixup_batch_success2(plot=False): """ Test MixUpBatch op with specified alpha parameter on ImageFolderDataset """ logger.info("test_mixup_batch_success2") # Original Images ds_original = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False) decode_op = vision.Decode() ds_original = ds_original.map(operations=[decode_op], input_columns=["image"]) ds_original = ds_original.batch(4, pad_info={}, drop_remainder=True) images_original = None for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = image.asnumpy() else: images_original = np.append(images_original, image.asnumpy(), axis=0) # MixUp Images data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False) decode_op = vision.Decode() data1 = data1.map(operations=[decode_op], input_columns=["image"]) one_hot_op = data_trans.OneHot(num_classes=10) data1 = data1.map(operations=one_hot_op, input_columns=["label"]) mixup_batch_op = vision.MixUpBatch(2.0) data1 = data1.batch(4, pad_info={}, drop_remainder=True) data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"]) images_mixup = None for idx, (image, _) in enumerate(data1): if idx == 0: images_mixup = image.asnumpy() else: images_mixup = np.append(images_mixup, image.asnumpy(), axis=0) if plot: visualize_list(images_original, images_mixup) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_mixup[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse))))
def test_c_py_compose_vision_module(plot=False, run_golden=True): """ Test combining Python and C++ vision transforms """ original_seed = config_get_set_seed(10) original_num_parallel_workers = config_get_set_num_parallel_workers(1) def test_config(plot, file_name, op_list): data_dir = "../data/dataset/testImageNetData/train/" data1 = ds.ImageFolderDataset(dataset_dir=data_dir, shuffle=False) data1 = data1.map(operations=op_list, input_columns=["image"]) data2 = ds.ImageFolderDataset(dataset_dir=data_dir, shuffle=False) data2 = data2.map(operations=c_vision.Decode(), input_columns=["image"]) original_images = [] transformed_images = [] for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): transformed_images.append(item["image"]) for item in data2.create_dict_iterator(num_epochs=1, output_numpy=True): original_images.append(item["image"]) if run_golden: # Compare with expected md5 from images save_and_check_md5(data1, file_name, generate_golden=GENERATE_GOLDEN) if plot: visualize_list(original_images, transformed_images) test_config(op_list=[c_vision.Decode(), py_vision.ToPIL(), py_vision.Resize((224, 224)), np.array], plot=plot, file_name="compose_c_py_1.npz") test_config(op_list=[c_vision.Decode(), c_vision.Resize((224, 244)), py_vision.ToPIL(), np.array, c_vision.Resize((24, 24))], plot=plot, file_name="compose_c_py_2.npz") test_config(op_list=[py_vision.Decode(), py_vision.Resize((224, 224)), np.array, c_vision.RandomColor()], plot=plot, file_name="compose_c_py_3.npz") # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers((original_num_parallel_workers))
def test_equalize_c(plot=False): """ Test Equalize Cpp op """ logger.info("Test Equalize cpp op") # Original Images data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) transforms_original = [C.Decode(), C.Resize(size=[224, 224])] ds_original = data_set.map(operations=transforms_original, input_columns="image") ds_original = ds_original.batch(512) for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = image.asnumpy() else: images_original = np.append(images_original, image.asnumpy(), axis=0) # Equalize Images data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) transform_equalize = [C.Decode(), C.Resize(size=[224, 224]), C.Equalize()] ds_equalize = data_set.map(operations=transform_equalize, input_columns="image") ds_equalize = ds_equalize.batch(512) for idx, (image, _) in enumerate(ds_equalize): if idx == 0: images_equalize = image.asnumpy() else: images_equalize = np.append(images_equalize, image.asnumpy(), axis=0) if plot: visualize_list(images_original, images_equalize) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_equalize[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse))))
def test_center_crop_comp(height=375, width=375, plot=False): """ Test CenterCrop between python and c image augmentation """ logger.info("Test CenterCrop") # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = vision.Decode() center_crop_op = vision.CenterCrop([height, width]) data1 = data1.map(operations=decode_op, input_columns=["image"]) data1 = data1.map(operations=center_crop_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) transforms = [ py_vision.Decode(), py_vision.CenterCrop([height, width]), py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) data2 = data2.map(operations=transform, input_columns=["image"]) image_c_cropped = [] image_py_cropped = [] for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), data2.create_dict_iterator(num_epochs=1, output_numpy=True)): c_image = item1["image"] py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) # Note: The images aren't exactly the same due to rounding error assert diff_mse(py_image, c_image) < 0.001 image_c_cropped.append(c_image.copy()) image_py_cropped.append(py_image.copy()) if plot: visualize_list(image_c_cropped, image_py_cropped, visualize_mode=2)
def test_center_crop_op(height=375, width=375, plot=False): """ Test CenterCrop """ logger.info("Test CenterCrop") # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"]) decode_op = vision.Decode() # 3 images [375, 500] [600, 500] [512, 512] center_crop_op = vision.CenterCrop([height, width]) data1 = data1.map(operations=decode_op, input_columns=["image"]) data1 = data1.map(operations=center_crop_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"]) data2 = data2.map(operations=decode_op, input_columns=["image"]) image_cropped = [] image = [] for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image_cropped.append(item1["image"].copy()) image.append(item2["image"].copy()) if plot: visualize_list(image, image_cropped)
def test_random_crop_comp(plot=False): """ Test RandomCrop and compare between python and c image augmentation """ logger.info("Test RandomCrop with c_transform and py_transform comparison") cropped_size = 512 # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) random_crop_op = c_vision.RandomCrop(cropped_size) decode_op = c_vision.Decode() data1 = data1.map(operations=decode_op, input_columns=["image"]) data1 = data1.map(operations=random_crop_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) transforms = [ py_vision.Decode(), py_vision.RandomCrop(cropped_size), py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) data2 = data2.map(operations=transform, input_columns=["image"]) image_c_cropped = [] image_py_cropped = [] for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), data2.create_dict_iterator(num_epochs=1, output_numpy=True)): c_image = item1["image"] py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_c_cropped.append(c_image) image_py_cropped.append(py_image) if plot: visualize_list(image_c_cropped, image_py_cropped, visualize_mode=2)
def util_test_normalize(mean, std, op_type): """ Utility function for testing Normalize. Input arguments are given by other tests """ if op_type == "cpp": # define map operations decode_op = c_vision.Decode() normalize_op = c_vision.Normalize(mean, std) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data = data.map(operations=decode_op, input_columns=["image"]) data = data.map(operations=normalize_op, input_columns=["image"]) elif op_type == "python": # define map operations transforms = [ py_vision.Decode(), py_vision.ToTensor(), py_vision.Normalize(mean, std) ] transform = mindspore.dataset.transforms.py_transforms.Compose( transforms) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data = data.map(operations=transform, input_columns=["image"]) else: raise ValueError("Wrong parameter value") return data
def test_decode_normalize_op(): """ Test Decode op followed by Normalize op """ logger.info("Test [Decode, Normalize] in one Map") data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image", "label"], num_parallel_workers=1, shuffle=False) # define map operations decode_op = c_vision.Decode() normalize_op = c_vision.Normalize([121.0, 115.0, 100.0], [70.0, 68.0, 71.0]) # apply map operations on images data1 = data1.map(operations=[decode_op, normalize_op], input_columns=["image"]) num_iter = 0 for item in data1.create_dict_iterator(num_epochs=1): logger.info("Looping inside iterator {}".format(num_iter)) _ = item["image"] num_iter += 1
def test_serdes_uniform_augment(remove_json_files=True): """ Test serdes on uniform augment. """ data_dir = "../data/dataset/testPK/data" data = ds.ImageFolderDataset(dataset_dir=data_dir, shuffle=False) ds.config.set_seed(1) transforms_ua = [ vision.RandomHorizontalFlip(), vision.RandomVerticalFlip(), vision.RandomColor(), vision.RandomSharpness(), vision.Invert(), vision.AutoContrast(), vision.Equalize() ] transforms_all = [ vision.Decode(), vision.Resize(size=[224, 224]), vision.UniformAugment(transforms=transforms_ua, num_ops=5) ] data = data.map(operations=transforms_all, input_columns="image", num_parallel_workers=1) util_check_serialize_deserialize_file(data, "uniform_augment_pipeline", remove_json_files)
def test_resize_md5_parameters(test_name, size, filename, seed, plot): """ Test Resize with md5 check """ logger.info("Test Resize with md5 check: {0}".format(test_name)) original_seed = config_get_set_seed(seed) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Generate dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = vision.Decode() resize_op = vision.Resize(size) data1 = data1.map(operations=decode_op, input_columns=["image"]) data2 = data1.map(operations=resize_op, input_columns=["image"]) image_original = [] image_resized = [] # Compare with expected md5 from images save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN) for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image_1 = item1["image"] image_2 = item2["image"] image_original.append(image_1) image_resized.append(image_2) if plot: visualize_list(image_original, image_resized) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_resize_op_parameters(test_name, size, plot): """ Test resize_op """ logger.info("Test resize: {0}".format(test_name)) data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # define map operations decode_op = vision.Decode() resize_op = vision.Resize(size) # apply map operations on images data1 = data1.map(operations=decode_op, input_columns=["image"]) data2 = data1.map(operations=resize_op, input_columns=["image"]) image_original = [] image_resized = [] for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image_1 = item1["image"] image_2 = item2["image"] image_original.append(image_1) image_resized.append(image_2) if plot: visualize_list(image_original, image_resized)
def test_pipeline_get_dataset_size(): dataset = ds.TFRecordDataset(IMAGENET_TFFILE_DIR, SCHEMA_FILE, columns_list=["image"], shuffle=False) assert dataset.get_dataset_size() == 12 dataset = dataset.shuffle(buffer_size=3) assert dataset.get_dataset_size() == 12 decode_op = vision.Decode() resize_op = vision.RandomResize(10) dataset = dataset.map([decode_op, resize_op], input_columns=["image"]) assert dataset.get_dataset_size() == 12 dataset = dataset.batch(batch_size=3) assert dataset.get_dataset_size() == 4 dataset = dataset.repeat(count=2) assert dataset.get_dataset_size() == 8 tf1 = ds.TFRecordDataset(IMAGENET_TFFILE_DIR, shuffle=True) tf2 = ds.TFRecordDataset(IMAGENET_TFFILE_DIR, shuffle=True) assert tf2.concat(tf1).get_dataset_size() == 24
def test_rescale_op(plot=False): """ Test rescale """ logger.info("Test rescale") data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # define map operations decode_op = vision.Decode() rescale_op = vision.Rescale(1.0 / 255.0, -1.0) # apply map operations on images data1 = data1.map(operations=decode_op, input_columns=["image"]) data2 = data1.map(operations=rescale_op, input_columns=["image"]) num_iter = 0 for item1, item2 in zip( data1.create_dict_iterator(num_epochs=1, output_numpy=True), data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image_original = item1["image"] image_de_rescaled = item2["image"] image_np_rescaled = get_rescaled(num_iter) mse = diff_mse(image_de_rescaled, image_np_rescaled) assert mse < 0.001 # rounding error logger.info("image_{}, mse: {}".format(num_iter + 1, mse)) num_iter += 1 if plot: visualize_image(image_original, image_de_rescaled, mse, image_np_rescaled)