def test_cutmix_batch_nchw_md5(): """ Test CutMixBatch on a batch of CHW images with MD5: """ logger.info("test_cutmix_batch_nchw_md5") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # CutMixBatch Images data = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) hwc2chw_op = vision.HWC2CHW() data = data.map(input_columns=["image"], operations=hwc2chw_op) one_hot_op = data_trans.OneHot(num_classes=10) data = data.map(input_columns=["label"], operations=one_hot_op) cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NCHW) data = data.batch(5, drop_remainder=True) data = data.map(input_columns=["image", "label"], operations=cutmix_batch_op) filename = "cutmix_batch_c_nchw_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_horizontal_valid_prob_py(): """ Test RandomHorizontalFlip op with py_transforms: valid non-default input, expect to pass """ logger.info("test_random_horizontal_valid_prob_py") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) transforms = [ py_vision.Decode(), py_vision.RandomHorizontalFlip(0.8), py_vision.ToTensor() ] transform = py_vision.ComposeOp(transforms) data = data.map(input_columns=["image"], operations=transform()) filename = "random_horizontal_01_py_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_unmappable_randomize_repeatable(): original_num_parallel_workers = config_get_set_num_parallel_workers(4) # the labels outputted by ShuffleOp for seed 53 is [0, 2, 1, 4, 3] ds.config.set_seed(53) d = ds.TextFileDataset(text_file_dataset_path, shuffle=False) s1, s2 = d.split([0.8, 0.2]) num_epochs = 5 s1 = s1.repeat(num_epochs) s2 = s2.repeat(num_epochs) s1_output = [] for item in s1.create_dict_iterator(): s1_output.append(item["text"].item().decode("utf8")) s2_output = [] for item in s2.create_dict_iterator(): s2_output.append(item["text"].item().decode("utf8")) # note no overlap assert s1_output == [ text_file_data[0], text_file_data[2], text_file_data[1], text_file_data[4] ] * num_epochs assert s2_output == [text_file_data[3]] * num_epochs # Restore configuration num_parallel_workers ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_erasing_md5(): """ Test RandomErasing with md5 check """ logger.info("Test RandomErasing with md5 check") original_seed = config_get_set_seed(5) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) transforms_1 = [ vision.Decode(), vision.ToTensor(), vision.RandomErasing(value='random') ] transform_1 = vision.ComposeOp(transforms_1) data = data.map(input_columns=["image"], operations=transform_1()) # Compare with expected md5 from images filename = "random_erasing_01_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers((original_num_parallel_workers))
def test_cut_out_md5(): """ Test Cutout with md5 check """ logger.info("test_cut_out_md5") original_seed = config_get_set_seed(2) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c.Decode() cut_out_op = c.CutOut(100) data1 = data1.map(input_columns=["image"], operations=decode_op) data1 = data1.map(input_columns=["image"], operations=cut_out_op) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) transforms = [f.Decode(), f.ToTensor(), f.Cutout(100)] transform = f.ComposeOp(transforms) data2 = data2.map(input_columns=["image"], operations=transform()) # Compare with expected md5 from images filename1 = "cut_out_01_c_result.npz" save_and_check_md5(data1, filename1, generate_golden=GENERATE_GOLDEN) filename2 = "cut_out_01_py_result.npz" save_and_check_md5(data2, filename2, generate_golden=GENERATE_GOLDEN) # Restore config ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_bounding_box_augment_valid_ratio_c(plot_vis=False): """ Test BoundingBoxAugment op (testing with valid ratio, less than 1. Prints images side by side with and without Aug applied + bboxes to compare and test """ logger.info("test_bounding_box_augment_valid_ratio_c") original_seed = config_get_set_seed(1) original_num_parallel_workers = config_get_set_num_parallel_workers(1) dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 0.9) # map to apply ops dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"], output_columns=["image", "annotation"], columns_order=["image", "annotation"], operations=[test_op]) # Add column for "annotation" filename = "bounding_box_augment_valid_ratio_c_result.npz" save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN) unaugSamp, augSamp = [], [] for unAug, Aug in zip(dataVoc1.create_dict_iterator(), dataVoc2.create_dict_iterator()): unaugSamp.append(unAug) augSamp.append(Aug) if plot_vis: visualize_with_bounding_boxes(unaugSamp, augSamp) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_crop_07_c(): """ Test RandomCrop op with c_transforms: padding_mode is Border.CONSTANT and fill_value is 255 (White), expected to pass """ logger.info("test_random_crop_07_c") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # Note: The padding_mode is default as Border.CONSTANT and set filling color to be white. random_crop_op = c_vision.RandomCrop(512, [200, 200, 200, 200], fill_value=(255, 255, 255)) decode_op = c_vision.Decode() data = data.map(input_columns=["image"], operations=decode_op) data = data.map(input_columns=["image"], operations=random_crop_op) filename = "random_crop_07_c_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_sharpness_c_md5(): """ Test RandomSharpness cpp op with md5 comparison """ logger.info("Test RandomSharpness cpp op with md5 comparison") original_seed = config_get_set_seed(200) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # define map operations transforms = [ C.Decode(), C.RandomSharpness((10.0, 15.0)) ] # Generate dataset data = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) data = data.map(input_columns=["image"], operations=transforms) # check results with md5 comparison filename = "random_sharpness_cpp_01_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_crop_02_py(): """ Test RandomCrop op with py_transforms: size is a list/tuple with length 2, expected to pass """ logger.info("test_random_crop_02_py") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # Note: If size is a sequence of length 2, it should be (height, width). transforms = [ py_vision.Decode(), py_vision.RandomCrop([512, 375]), py_vision.ToTensor() ] transform = py_vision.ComposeOp(transforms) data = data.map(input_columns=["image"], operations=transform()) filename = "random_crop_02_py_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_crop_05_py(): """ Test RandomCrop op with py_transforms: input image size < crop size but pad_if_needed is enabled, expected to pass """ logger.info("test_random_crop_05_py") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # Note: The size of the image is 4032*2268 transforms = [ py_vision.Decode(), py_vision.RandomCrop([2268, 4033], [200, 200, 200, 200], pad_if_needed=True), py_vision.ToTensor() ] transform = py_vision.ComposeOp(transforms) data = data.map(input_columns=["image"], operations=transform()) filename = "random_crop_05_py_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_crop_08_py(): """ Test RandomCrop op with py_transforms: padding_mode is Border.EDGE, expected to pass """ logger.info("test_random_crop_08_py") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # Note: The padding_mode is Border.EDGE. transforms = [ py_vision.Decode(), py_vision.RandomCrop(512, [200, 200, 200, 200], padding_mode=mode.Border.EDGE), py_vision.ToTensor() ] transform = py_vision.ComposeOp(transforms) data = data.map(input_columns=["image"], operations=transform()) filename = "random_crop_08_py_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_resize_md5_parameters(test_name, size, filename, seed, plot): """ Test Resize with md5 check """ logger.info("Test Resize with md5 check: {0}".format(test_name)) original_seed = config_get_set_seed(seed) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Generate dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = vision.Decode() resize_op = vision.Resize(size) data1 = data1.map(operations=decode_op, input_columns=["image"]) data2 = data1.map(operations=resize_op, input_columns=["image"]) image_original = [] image_resized = [] # Compare with expected md5 from images save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN) for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image_1 = item1["image"] image_2 = item2["image"] image_original.append(image_1) image_resized.append(image_2) if plot: visualize_list(image_original, image_resized) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_crop_and_resize_02(): """ Test RandomCropAndResize with md5 check:Image interpolation mode is Inter.NEAREST, expected to pass """ logger.info("test_random_crop_and_resize_02") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() random_crop_and_resize_op = c_vision.RandomResizedCrop((256, 512), interpolation=mode.Inter.NEAREST) data1 = data1.map(input_columns=["image"], operations=decode_op) data1 = data1.map(input_columns=["image"], operations=random_crop_and_resize_op) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) transforms = [ py_vision.Decode(), py_vision.RandomResizedCrop((256, 512), interpolation=mode.Inter.NEAREST), py_vision.ToTensor() ] transform = py_vision.ComposeOp(transforms) data2 = data2.map(input_columns=["image"], operations=transform()) filename1 = "random_crop_and_resize_02_c_result.npz" filename2 = "random_crop_and_resize_02_py_result.npz" save_and_check_md5(data1, filename1, generate_golden=GENERATE_GOLDEN) save_and_check_md5(data2, filename2, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_affine_c_md5(): """ Test RandomAffine C Op with md5 comparison """ logger.info("test_random_affine_c_md5") original_seed = config_get_set_seed(1) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # define map operations transforms = [ c_vision.Decode(), c_vision.RandomAffine(degrees=(-5, 15), translate=(0.1, 0.3), scale=(0.9, 1.1), shear=(-10, 10, -5, 5)) ] # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data = data.map(input_columns=["image"], operations=transforms) # check results with md5 comparison filename = "random_affine_01_c_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers((original_num_parallel_workers))
def test_random_grayscale_md5_no_param(): """ Test RandomGrayscale with md5 comparison: no parameter given, expect to pass """ logger.info("test_random_grayscale_md5_no_param") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) transforms = [ py_vision.Decode(), py_vision.RandomGrayscale(), py_vision.ToTensor() ] transform = py_vision.ComposeOp(transforms) data = data.map(input_columns=["image"], operations=transform()) # Check output images with md5 comparison filename = "random_grayscale_02_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore config ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_crop(): logger.info("test_random_crop") DATA_DIR = [ "../data/dataset/test_tf_file_3_images/train-0000-of-0001.data" ] SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json" original_num_parallel_workers = config_get_set_num_parallel_workers(1) # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"]) decode_op = vision.Decode() random_crop_op = vision.RandomCrop([512, 512], [200, 200, 200, 200]) data1 = data1.map(input_columns="image", operations=decode_op) data1 = data1.map(input_columns="image", operations=random_crop_op) # Serializing into python dictionary ds1_dict = ds.serialize(data1) # Serializing into json object _ = json.dumps(ds1_dict, indent=2) # Reconstruct dataset pipeline from its serialized form data1_1 = ds.deserialize(input_dict=ds1_dict) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"]) data2 = data2.map(input_columns="image", operations=decode_op) for item1, item1_1, item2 in zip(data1.create_dict_iterator(), data1_1.create_dict_iterator(), data2.create_dict_iterator()): assert np.array_equal(item1['image'], item1_1['image']) _ = item2["image"] # Restore configuration num_parallel_workers ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_color_adjust_md5(): """ Test RandomColorAdjust with md5 check """ logger.info("Test RandomColorAdjust with md5 check") original_seed = config_get_set_seed(10) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() random_adjust_op = c_vision.RandomColorAdjust(0.4, 0.4, 0.4, 0.1) data1 = data1.map(operations=decode_op, input_columns=["image"]) data1 = data1.map(operations=random_adjust_op, input_columns=["image"]) # Second dataset transforms = [ py_vision.Decode(), py_vision.RandomColorAdjust(0.4, 0.4, 0.4, 0.1), py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = data2.map(operations=transform, input_columns=["image"]) # Compare with expected md5 from images filename = "random_color_adjust_01_c_result.npz" save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN) filename = "random_color_adjust_01_py_result.npz" save_and_check_md5(data2, filename, generate_golden=GENERATE_GOLDEN) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_bounding_box_augment_valid_edge_c(plot_vis=False): """ Test BoundingBoxAugment op (testing with valid edge case, box covering full image). Prints images side by side with and without Aug applied + bboxes to compare and test """ logger.info("test_bounding_box_augment_valid_edge_c") original_seed = config_get_set_seed(1) original_num_parallel_workers = config_get_set_num_parallel_workers(1) dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False, decode=True) dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False, decode=True) test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 1) # map to apply ops # Add column for "bbox" dataVoc1 = dataVoc1.map( operations=lambda img, bbox: (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype( np.float32)), input_columns=["image", "bbox"], output_columns=["image", "bbox"], column_order=["image", "bbox"]) dataVoc2 = dataVoc2.map( operations=lambda img, bbox: (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype( np.float32)), input_columns=["image", "bbox"], output_columns=["image", "bbox"], column_order=["image", "bbox"]) dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], column_order=["image", "bbox"]) filename = "bounding_box_augment_valid_edge_c_result.npz" save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN) unaugSamp, augSamp = [], [] for unAug, Aug in zip( dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True), dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)): unaugSamp.append(unAug) augSamp.append(Aug) if plot_vis: visualize_with_bounding_boxes(unaugSamp, augSamp) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_crop_and_resize_03(): """ Test RandomCropAndResize with md5 check: max_attempts is 1, expected to pass """ logger.info("test_random_crop_and_resize_03") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() random_crop_and_resize_op = c_vision.RandomResizedCrop((256, 512), max_attempts=1) data1 = data1.map(operations=decode_op, input_columns=["image"]) data1 = data1.map(operations=random_crop_and_resize_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) transforms = [ py_vision.Decode(), py_vision.RandomResizedCrop((256, 512), max_attempts=1), py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) data2 = data2.map(operations=transform, input_columns=["image"]) filename1 = "random_crop_and_resize_03_c_result.npz" filename2 = "random_crop_and_resize_03_py_result.npz" save_and_check_md5(data1, filename1, generate_golden=GENERATE_GOLDEN) save_and_check_md5(data2, filename2, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def skip_test_random_perspective_md5(): """ Test RandomPerspective with md5 comparison """ logger.info("test_random_perspective_md5") original_seed = config_get_set_seed(5) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # define map operations transforms = [ py_vision.Decode(), py_vision.RandomPerspective(distortion_scale=0.3, prob=0.7, interpolation=Inter.BILINEAR), py_vision.ToTensor() ] transform = py_vision.ComposeOp(transforms) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data = data.map(input_columns=["image"], operations=transform()) # check results with md5 comparison filename = "random_perspective_01_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers((original_num_parallel_workers))
def test_random_apply_exception_random_crop_badinput(): """ Test RandomApply: test invalid input for one of the transform functions, expected to raise error """ logger.info("test_random_apply_exception_random_crop_badinput") original_seed = config_get_set_seed(200) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # define map operations transforms_list = [ py_vision.Resize([32, 32]), py_vision.RandomCrop(100), # crop size > image size py_vision.RandomRotation(30) ] transforms = [ py_vision.Decode(), py_transforms.RandomApply(transforms_list, prob=0.6), py_vision.ToTensor() ] transform = py_transforms.Compose(transforms) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data = data.map(operations=transform, input_columns=["image"]) try: _ = data.create_dict_iterator(num_epochs=1).get_next() except RuntimeError as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "Crop size" in str(e) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_crop_01_py(): """ Test RandomCrop op with py_transforms: size is a single integer, expected to pass """ logger.info("test_random_crop_01_py") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # Note: If size is an int, a square crop of size (size, size) is returned. transforms = [ py_vision.Decode(), py_vision.RandomCrop(512), py_vision.ToTensor() ] transform = py_vision.ComposeOp(transforms) data = data.map(input_columns=["image"], operations=transform()) filename = "random_crop_01_py_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_apply_md5(): """ Test RandomApply op with md5 check """ logger.info("test_random_apply_md5") original_seed = config_get_set_seed(10) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # define map operations transforms_list = [py_vision.CenterCrop(64), py_vision.RandomRotation(30)] transforms = [ py_vision.Decode(), # Note: using default value "prob=0.5" py_transforms.RandomApply(transforms_list), py_vision.ToTensor() ] transform = py_transforms.Compose(transforms) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data = data.map(operations=transform, input_columns=["image"]) # check results with md5 comparison filename = "random_apply_01_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers((original_num_parallel_workers))
def test_random_crop_03_c(): """ Test RandomCrop op with c_transforms: input image size == crop size, expected to pass """ logger.info("test_random_crop_03_c") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # Note: The size of the image is 4032*2268 random_crop_op = c_vision.RandomCrop([2268, 4032]) decode_op = c_vision.Decode() data = data.map(input_columns=["image"], operations=decode_op) data = data.map(input_columns=["image"], operations=random_crop_op) filename = "random_crop_03_c_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_sharpness_md5(): """ Test RandomSharpness with md5 comparison """ logger.info("Test RandomSharpness with md5 comparison") original_seed = config_get_set_seed(5) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # define map operations transforms = [ F.Decode(), F.RandomSharpness((0.1, 1.9)), F.ToTensor() ] transform = F.ComposeOp(transforms) # Generate dataset data = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) data = data.map(input_columns=["image"], operations=transform()) # check results with md5 comparison filename = "random_sharpness_01_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_compare_random_color_op(degrees=None, plot=False): """ Compare Random Color op in Python and Cpp """ logger.info("test_random_color_op") original_seed = config_get_set_seed(5) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Decode with rgb format set to True data1 = ds.TFRecordDataset(C_DATA_DIR, C_SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(C_DATA_DIR, C_SCHEMA_DIR, columns_list=["image"], shuffle=False) if degrees is None: c_op = vision.RandomColor() p_op = F.RandomColor() else: c_op = vision.RandomColor(degrees) p_op = F.RandomColor(degrees) transforms_random_color_py = mindspore.dataset.transforms.py_transforms.Compose( [lambda img: img.astype(np.uint8), F.ToPIL(), p_op, np.array]) data1 = data1.map(operations=[vision.Decode(), c_op], input_columns=["image"]) data2 = data2.map(operations=[vision.Decode()], input_columns=["image"]) data2 = data2.map(operations=transforms_random_color_py, input_columns=["image"]) image_random_color_op = [] image = [] for item1, item2 in zip( data1.create_dict_iterator(num_epochs=1, output_numpy=True), data2.create_dict_iterator(num_epochs=1, output_numpy=True)): actual = item1["image"] expected = item2["image"] image_random_color_op.append(actual) image.append(expected) assert actual.shape == expected.shape mse = diff_mse(actual, expected) logger.info("MSE= {}".format(str(np.mean(mse)))) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers) if plot: visualize_list(image, image_random_color_op)
def test_random_horizontal_bbox_with_bbox_valid_rand_c(plot_vis=False): """ Uses a valid non-default input, expect to pass Prints images side by side with and without Aug applied + bboxes to compare and test """ logger.info("test_random_horizontal_bbox_valid_rand_c") original_seed = config_get_set_seed(1) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Load dataset dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) test_op = c_vision.RandomHorizontalFlipWithBBox(0.6) # maps to fix annotations to minddata standard dataVoc1 = dataVoc1.map(input_columns=["annotation"], output_columns=["annotation"], operations=fix_annotate) dataVoc2 = dataVoc2.map(input_columns=["annotation"], output_columns=["annotation"], operations=fix_annotate) # map to apply ops dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"], output_columns=["image", "annotation"], columns_order=["image", "annotation"], operations=[test_op]) filename = "random_horizontal_flip_with_bbox_01_c_result.npz" save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN) unaugSamp, augSamp = [], [] for unAug, Aug in zip(dataVoc1.create_dict_iterator(), dataVoc2.create_dict_iterator()): unaugSamp.append(unAug) augSamp.append(Aug) if plot_vis: visualize_with_bounding_boxes(unaugSamp, augSamp) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_posterize_op_c(plot=False, run_golden=False): """ Test RandomPosterize in C transformations (uses assertion on mse as using md5 could have jpeg decoding inconsistencies) """ logger.info("test_random_posterize_op_c") original_seed = config_get_set_seed(55) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # define map operations transforms1 = [c_vision.Decode(), c_vision.RandomPosterize((1, 8))] # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data1 = data1.map(operations=transforms1, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = data2.map(operations=[c_vision.Decode()], input_columns=["image"]) image_posterize = [] image_original = [] for item1, item2 in zip( data1.create_dict_iterator(num_epochs=1, output_numpy=True), data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image1 = item1["image"] image2 = item2["image"] image_posterize.append(image1) image_original.append(image2) # check mse as md5 can be inconsistent. # mse = 2.9668956 is calculated from # a thousand runs of diff_mse(np.array(image_original), np.array(image_posterize)) that all produced the same mse. # allow for an error of 0.0000005 assert abs(2.9668956 - diff_mse(np.array(image_original), np.array(image_posterize))) <= 0.0000005 if run_golden: # check results with md5 comparison filename = "random_posterize_01_result_c.npz" save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN) if plot: visualize_list(image_original, image_posterize) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_resize_with_bbox_op_rand_c(plot_vis=False): """ Prints images and bboxes side by side with and without RandomResizeWithBBox Op applied, tests with MD5 check, expected to pass """ logger.info("test_random_resize_with_bbox_rand_c") original_seed = config_get_set_seed(1) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Load dataset dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) test_op = c_vision.RandomResizeWithBBox(200) dataVoc1 = dataVoc1.map(input_columns=["annotation"], output_columns=["annotation"], operations=fix_annotate) dataVoc2 = dataVoc2.map(input_columns=["annotation"], output_columns=["annotation"], operations=fix_annotate) # map to apply ops dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"], output_columns=["image", "annotation"], columns_order=["image", "annotation"], operations=[test_op]) filename = "random_resize_with_bbox_op_01_c_result.npz" save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN) unaugSamp, augSamp = [], [] for unAug, Aug in zip(dataVoc1.create_dict_iterator(), dataVoc2.create_dict_iterator()): unaugSamp.append(unAug) augSamp.append(Aug) if plot_vis: visualize_with_bounding_boxes(unaugSamp, augSamp) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_grayscale_input_grayscale_images(): """ Test RandomGrayscale Op: valid parameter with grayscale images as input, expect to pass """ logger.info("test_random_grayscale_input_grayscale_images") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) transforms1 = [ py_vision.Decode(), py_vision.Grayscale(1), # Note: If the input images is grayscale image with 1 channel. py_vision.RandomGrayscale(0.5), py_vision.ToTensor() ] transform1 = mindspore.dataset.transforms.py_transforms.Compose( transforms1) data1 = data1.map(operations=transform1, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) transforms2 = [py_vision.Decode(), py_vision.ToTensor()] transform2 = mindspore.dataset.transforms.py_transforms.Compose( transforms2) data2 = data2.map(operations=transform2, input_columns=["image"]) image_gray = [] image = [] for item1, item2 in zip( data1.create_dict_iterator(num_epochs=1, output_numpy=True), data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_gray.append(image1) image.append(image2) assert len(image1.shape) == 3 assert image1.shape[2] == 1 assert len(image2.shape) == 3 assert image2.shape[2] == 3 # Restore config ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)