def test_auto_contrast_invalid_cutoff_param_c(): """ Test AutoContrast C Op with invalid cutoff parameter """ logger.info("Test AutoContrast C Op with invalid cutoff parameter") try: ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = ds.map(input_columns=["image"], operations=[ C.Decode(), C.Resize((224, 224)), lambda img: np.array(img[:, :, 0]) ]) # invalid ignore ds = ds.map(input_columns="image", operations=C.AutoContrast(cutoff=-10.0)) except ValueError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Input cutoff is not within the required interval of (0 to 100)." in str( error) try: ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = ds.map(input_columns=["image"], operations=[ C.Decode(), C.Resize((224, 224)), lambda img: np.array(img[:, :, 0]) ]) # invalid ignore ds = ds.map(input_columns="image", operations=C.AutoContrast(cutoff=120.0)) except ValueError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Input cutoff is not within the required interval of (0 to 100)." in str( error)
def test_random_sharpness_invalid_params(): """ Test RandomSharpness with invalid input parameters. """ logger.info("Test RandomSharpness with invalid input parameters.") try: data = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) data = data.map(input_columns=["image"], operations=[C.Decode(), C.Resize((224, 224)), C.RandomSharpness(10)]) except TypeError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "tuple" in str(error) try: data = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) data = data.map(input_columns=["image"], operations=[C.Decode(), C.Resize((224, 224)), C.RandomSharpness((-10, 10))]) except ValueError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "interval" in str(error) try: data = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) data = data.map(input_columns=["image"], operations=[C.Decode(), C.Resize((224, 224)), C.RandomSharpness((10, 5))]) except ValueError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "(min,max)" in str(error)
def test_auto_contrast_invalid_ignore_param_c(): """ Test AutoContrast C Op with invalid ignore parameter """ logger.info("Test AutoContrast C Op with invalid ignore parameter") try: ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = ds.map(input_columns=["image"], operations=[ C.Decode(), C.Resize((224, 224)), lambda img: np.array(img[:, :, 0]) ]) # invalid ignore ds = ds.map(input_columns="image", operations=C.AutoContrast(ignore=255.5)) except TypeError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Argument ignore with value 255.5 is not of type" in str(error) try: ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = ds.map(input_columns=["image"], operations=[ C.Decode(), C.Resize((224, 224)), lambda img: np.array(img[:, :, 0]) ]) # invalid ignore ds = ds.map(input_columns="image", operations=C.AutoContrast(ignore=(10, 100))) except TypeError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Argument ignore with value (10,100) is not of type" in str( error)
def test_cpp_uniform_augment(plot=False, num_ops=2): """ Test UniformAugment """ logger.info("Test CPP UniformAugment") # Original Images ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) transforms_original = [C.Decode(), C.Resize(size=[224, 224]), F.ToTensor()] ds_original = ds.map(input_columns="image", operations=transforms_original) ds_original = ds_original.batch(512) for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = np.transpose(image, (0, 2, 3, 1)) else: images_original = np.append(images_original, np.transpose(image, (0, 2, 3, 1)), axis=0) # UniformAugment Images ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) transforms_ua = [C.RandomCrop(size=[224, 224], padding=[32, 32, 32, 32]), C.RandomHorizontalFlip(), C.RandomVerticalFlip(), C.RandomColorAdjust(), C.RandomRotation(degrees=45)] uni_aug = C.UniformAugment(operations=transforms_ua, num_ops=num_ops) transforms_all = [C.Decode(), C.Resize(size=[224, 224]), uni_aug, F.ToTensor()] ds_ua = ds.map(input_columns="image", operations=transforms_all, num_parallel_workers=1) ds_ua = ds_ua.batch(512) for idx, (image, _) in enumerate(ds_ua): if idx == 0: images_ua = np.transpose(image, (0, 2, 3, 1)) else: images_ua = np.append(images_ua, np.transpose(image, (0, 2, 3, 1)), axis=0) if plot: visualize_list(images_original, images_ua) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_ua[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse))))
def test_random_sharpness_c_py(degrees=(1.0, 1.0), plot=False): """ Test Random Sharpness C and python Op """ logger.info("Test RandomSharpness C and python Op") # RandomSharpness Images data = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) data = data.map(input_columns=["image"], operations=[C.Decode(), C.Resize((200, 300))]) python_op = F.RandomSharpness(degrees) c_op = C.RandomSharpness(degrees) transforms_op = F.ComposeOp( [lambda img: F.ToPIL()(img.astype(np.uint8)), python_op, np.array])() ds_random_sharpness_py = data.map(input_columns="image", operations=transforms_op) ds_random_sharpness_py = ds_random_sharpness_py.batch(512) for idx, (image, _) in enumerate(ds_random_sharpness_py): if idx == 0: images_random_sharpness_py = image else: images_random_sharpness_py = np.append(images_random_sharpness_py, image, axis=0) data = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) data = data.map(input_columns=["image"], operations=[C.Decode(), C.Resize((200, 300))]) ds_images_random_sharpness_c = data.map(input_columns="image", operations=c_op) ds_images_random_sharpness_c = ds_images_random_sharpness_c.batch(512) for idx, (image, _) in enumerate(ds_images_random_sharpness_c): if idx == 0: images_random_sharpness_c = image else: images_random_sharpness_c = np.append(images_random_sharpness_c, image, axis=0) num_samples = images_random_sharpness_c.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_random_sharpness_c[i], images_random_sharpness_py[i]) logger.info("MSE= {}".format(str(np.mean(mse)))) if plot: visualize_list(images_random_sharpness_c, images_random_sharpness_py, visualize_mode=2)
def test_random_sharpness_c(degrees=(1.6, 1.6), plot=False): """ Test RandomSharpness cpp op """ print(degrees) logger.info("Test RandomSharpness cpp op") # Original Images data = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) transforms_original = [C.Decode(), C.Resize((224, 224))] ds_original = data.map(input_columns="image", operations=transforms_original) ds_original = ds_original.batch(512) for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = image else: images_original = np.append(images_original, image, axis=0) # Random Sharpness Adjusted Images data = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) c_op = C.RandomSharpness() if degrees is not None: c_op = C.RandomSharpness(degrees) transforms_random_sharpness = [C.Decode(), C.Resize((224, 224)), c_op] ds_random_sharpness = data.map(input_columns="image", operations=transforms_random_sharpness) ds_random_sharpness = ds_random_sharpness.batch(512) for idx, (image, _) in enumerate(ds_random_sharpness): if idx == 0: images_random_sharpness = image else: images_random_sharpness = np.append(images_random_sharpness, image, axis=0) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_random_sharpness[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse)))) if plot: visualize_list(images_original, images_random_sharpness)
def test_auto_contrast_c(plot=False): """ Test AutoContrast C Op """ logger.info("Test AutoContrast C Op") # AutoContrast Images ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = ds.map(input_columns=["image"], operations=[C.Decode(), C.Resize((224, 224))]) python_op = F.AutoContrast() c_op = C.AutoContrast() transforms_op = F.ComposeOp( [lambda img: F.ToPIL()(img.astype(np.uint8)), python_op, np.array])() ds_auto_contrast_py = ds.map(input_columns="image", operations=transforms_op) ds_auto_contrast_py = ds_auto_contrast_py.batch(512) for idx, (image, _) in enumerate(ds_auto_contrast_py): if idx == 0: images_auto_contrast_py = image else: images_auto_contrast_py = np.append(images_auto_contrast_py, image, axis=0) ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = ds.map(input_columns=["image"], operations=[C.Decode(), C.Resize((224, 224))]) ds_auto_contrast_c = ds.map(input_columns="image", operations=c_op) ds_auto_contrast_c = ds_auto_contrast_c.batch(512) for idx, (image, _) in enumerate(ds_auto_contrast_c): if idx == 0: images_auto_contrast_c = image else: images_auto_contrast_c = np.append(images_auto_contrast_c, image, axis=0) num_samples = images_auto_contrast_c.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_auto_contrast_c[i], images_auto_contrast_py[i]) logger.info("MSE= {}".format(str(np.mean(mse)))) np.testing.assert_equal(np.mean(mse), 0.0) if plot: visualize_list(images_auto_contrast_c, images_auto_contrast_py, visualize_mode=2)
def test_invert_py_c(plot=False): """ Test Invert Cpp op and python op """ logger.info("Test Invert cpp and python op") # Invert Images in cpp ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = ds.map(input_columns=["image"], operations=[C.Decode(), C.Resize((224, 224))]) ds_c_invert = ds.map(input_columns="image", operations=C.Invert()) ds_c_invert = ds_c_invert.batch(512) for idx, (image, _) in enumerate(ds_c_invert): if idx == 0: images_c_invert = image else: images_c_invert = np.append(images_c_invert, image, axis=0) # invert images in python ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) ds = ds.map(input_columns=["image"], operations=[C.Decode(), C.Resize((224, 224))]) transforms_p_invert = F.ComposeOp( [lambda img: img.astype(np.uint8), F.ToPIL(), F.Invert(), np.array]) ds_p_invert = ds.map(input_columns="image", operations=transforms_p_invert()) ds_p_invert = ds_p_invert.batch(512) for idx, (image, _) in enumerate(ds_p_invert): if idx == 0: images_p_invert = image else: images_p_invert = np.append(images_p_invert, image, axis=0) num_samples = images_c_invert.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_p_invert[i], images_c_invert[i]) logger.info("MSE= {}".format(str(np.mean(mse)))) if plot: visualize_list(images_c_invert, images_p_invert, visualize_mode=2)
def test_cv_minddataset_repeat_reshuffle(add_and_remove_cv_file): """tutorial for cv minddataset.""" columns_list = ["data", "label"] num_readers = 4 data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers) decode_op = vision.Decode() data_set = data_set.map(input_columns=["data"], operations=decode_op, num_parallel_workers=2) resize_op = vision.Resize((32, 32), interpolation=Inter.LINEAR) data_set = data_set.map(input_columns="data", operations=resize_op, num_parallel_workers=2) data_set = data_set.batch(2) data_set = data_set.repeat(2) num_iter = 0 labels = [] for item in data_set.create_dict_iterator(): logger.info( "-------------- get dataset size {} -----------------".format( num_iter)) logger.info( "-------------- item[label]: {} ---------------------".format( item["label"])) logger.info( "-------------- item[data]: {} ----------------------".format( item["data"])) num_iter += 1 labels.append(item["label"]) assert num_iter == 10 logger.info("repeat shuffle: {}".format(labels)) assert len(labels) == 10 assert labels[0:5] == labels[0:5] assert labels[0:5] != labels[5:5]
def skip_test_exception(): ds.config.set_num_parallel_workers(1) data = ds.TFRecordDataset(DATA_DIR, columns_list=["image"]) data = data.map(input_columns=["image"], operations=vision.Resize(100, 100)) with pytest.raises(RuntimeError) as info: data.create_tuple_iterator().get_next() assert "The shape size 1 of input tensor is invalid" in str(info.value)
def create_mnist_dataset(mnist_dir, num_parallel_workers=1): ds = de.MnistDataset(mnist_dir) # apply map operations on images ds = ds.map(input_columns="label", operations=C.TypeCast(mstype.int32)) ds = ds.map(input_columns="image", operations=VC.Resize( (MNIST_CONFIG.image_height, MNIST_CONFIG.image_width), interpolation=Inter.LINEAR), num_parallel_workers=num_parallel_workers) ds = ds.map(input_columns="image", operations=VC.Rescale(1 / 0.3081, -1 * 0.1307 / 0.3081), num_parallel_workers=num_parallel_workers) ds = ds.map(input_columns="image", operations=VC.Rescale(1.0 / 255.0, 0.0), num_parallel_workers=num_parallel_workers) ds = ds.map(input_columns="image", operations=VC.HWC2CHW(), num_parallel_workers=num_parallel_workers) # apply DatasetOps ds = ds.shuffle( buffer_size=MNIST_CONFIG.buffer_size) # 10000 as in LeNet train script ds = ds.batch(MNIST_CONFIG.batch_size, drop_remainder=True) ds = ds.repeat(MNIST_CONFIG.repeat_size) return ds
def test_soft_dvpp_decode_resize_jpeg(plot=False): """ Test SoftDvppDecodeResizeJpeg op """ logger.info("test_random_decode_resize_op") # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = vision.Decode() resize_op = vision.Resize((256, 512)) data1 = data1.map(input_columns=["image"], operations=[decode_op, resize_op]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) soft_dvpp_decode_resize_op = vision.SoftDvppDecodeResizeJpeg((256, 512)) data2 = data2.map(input_columns=["image"], operations=soft_dvpp_decode_resize_op) num_iter = 0 for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): if num_iter > 0: break image1 = item1["image"] image2 = item2["image"] mse = diff_mse(image1, image2) assert mse <= 0.02 logger.info("random_crop_decode_resize_op_{}, mse: {}".format(num_iter + 1, mse)) if plot: visualize_image(image1, image2, mse) num_iter += 1
def test_resize_md5_parameters(test_name, size, filename, seed, plot): """ Test Resize with md5 check """ logger.info("Test Resize with md5 check: {0}".format(test_name)) original_seed = config_get_set_seed(seed) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Generate dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = vision.Decode() resize_op = vision.Resize(size) data1 = data1.map(input_columns=["image"], operations=decode_op) data2 = data1.map(input_columns=["image"], operations=resize_op) image_original = [] image_resized = [] # Compare with expected md5 from images save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN) for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): image_1 = item1["image"] image_2 = item2["image"] image_original.append(image_1) image_resized.append(image_2) if plot: visualize_list(image_original, image_resized) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_resize_op_parameters(test_name, size, plot): """ Test resize_op """ logger.info("Test resize: {0}".format(test_name)) data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # define map operations decode_op = vision.Decode() resize_op = vision.Resize(size) # apply map operations on images data1 = data1.map(input_columns=["image"], operations=decode_op) data2 = data1.map(input_columns=["image"], operations=resize_op) image_original = [] image_resized = [] for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): image_1 = item1["image"] image_2 = item2["image"] image_original.append(image_1) image_resized.append(image_2) if plot: visualize_list(image_original, image_resized)
def test_cv_minddataset_reader_basic_tutorial_5_epoch_with_batch( add_and_remove_cv_file): """tutorial for cv minderdataset.""" columns_list = ["data", "file_name", "label"] num_readers = 4 data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers) resize_height = 32 resize_width = 32 # define map operations decode_op = vision.Decode() resize_op = vision.Resize((resize_height, resize_width), ds.transforms.vision.Inter.LINEAR) data_set = data_set.map(input_columns=["data"], operations=decode_op, num_parallel_workers=4) data_set = data_set.map(input_columns=["data"], operations=resize_op, num_parallel_workers=4) data_set = data_set.batch(2) assert data_set.get_dataset_size() == 5 for epoch in range(5): num_iter = 0 for data in data_set: logger.info("data is {}".format(data)) num_iter += 1 assert num_iter == 5 data_set.reset()
def test_exception_02(): """ Test exceptions with invalid input, and test valid input """ logger.info("test_exception_02") num_samples = -1 with pytest.raises(ValueError) as info: ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_samples=num_samples) assert 'Input num_samples is not within the required interval of (0 to 2147483647).' in str( info.value) num_samples = 1 data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_samples=num_samples) data = data.map(input_columns=["image"], operations=vision.Decode()) data = data.map(input_columns=["image"], operations=vision.Resize((100, 100))) # Confirm 1 sample in dataset assert sum([1 for _ in data]) == 1 num_iters = 0 for _ in data.create_dict_iterator(): num_iters += 1 assert num_iters == 1
def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"): """ create a train or eval imagenet2012 dataset for resnet50 Args: dataset_path(string): the path of dataset. do_train(bool): whether dataset is used for train or eval. repeat_num(int): the repeat times of dataset. Default: 1 batch_size(int): the batch size of dataset. Default: 32 target(str): the device target. Default: Ascend Returns: dataset """ if target == "Ascend": device_num, rank_id = _get_rank_info() else: init("nccl") rank_id = get_rank() device_num = get_group_size() if device_num == 1: ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True) else: ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True, num_shards=device_num, shard_id=rank_id) image_size = 224 mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] std = [0.229 * 255, 0.224 * 255, 0.225 * 255] # define map operations if do_train: trans = [ C.RandomCropDecodeResize(image_size, scale=(0.08, 1.0), ratio=(0.75, 1.333)), C.RandomHorizontalFlip(prob=0.5), C.Normalize(mean=mean, std=std), C.HWC2CHW() ] else: trans = [ C.Decode(), C.Resize(256), C.CenterCrop(image_size), C.Normalize(mean=mean, std=std), C.HWC2CHW() ] type_cast_op = C2.TypeCast(mstype.int32) ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans) ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op) # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) # apply dataset repeat operation ds = ds.repeat(repeat_num) return ds
def test_cv_minddataset_batch_size_larger_than_records(add_and_remove_cv_file): """tutorial for cv minddataset.""" columns_list = ["data", "label"] num_readers = 4 data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers) decode_op = vision.Decode() data_set = data_set.map(input_columns=["data"], operations=decode_op, num_parallel_workers=2) resize_op = vision.Resize((32, 32), interpolation=Inter.LINEAR) data_set = data_set.map(input_columns="data", operations=resize_op, num_parallel_workers=2) data_set = data_set.batch(32, drop_remainder=True) num_iter = 0 for item in data_set.create_dict_iterator(): logger.info( "-------------- get dataset size {} -----------------".format( num_iter)) logger.info( "-------------- item[label]: {} ---------------------".format( item["label"])) logger.info( "-------------- item[data]: {} ----------------------".format( item["data"])) num_iter += 1 assert num_iter == 0
def test_me_de_train_dataset(): data_list = ["{0}/train-00001-of-01024.data".format(data_path)] data_set_new = ds.TFRecordDataset( data_list, schema=SCHEMA_DIR, columns_list=["image/encoded", "image/class/label"]) resize_height = 224 resize_width = 224 rescale = 1.0 / 255.0 shift = 0.0 # define map operations decode_op = vision.Decode() resize_op = vision.Resize((resize_height, resize_width), Inter.LINEAR) # Bilinear as default rescale_op = vision.Rescale(rescale, shift) # apply map operations on images data_set_new = data_set_new.map(input_columns="image/encoded", operations=decode_op) data_set_new = data_set_new.map(input_columns="image/encoded", operations=resize_op) data_set_new = data_set_new.map(input_columns="image/encoded", operations=rescale_op) hwc2chw_op = vision.HWC2CHW() data_set_new = data_set_new.map(input_columns="image/encoded", operations=hwc2chw_op) data_set_new = data_set_new.repeat(1) # apply batch operations batch_size_new = 32 data_set_new = data_set_new.batch(batch_size_new, drop_remainder=True) return data_set_new
def create_dataset(data_path, batch_size=32, repeat_size=1, mode="train"): """ create dataset for train or test """ cifar_ds = ds.Cifar10Dataset(data_path) rescale = 1.0 / 255.0 shift = 0.0 resize_op = CV.Resize((cfg.image_height, cfg.image_width)) rescale_op = CV.Rescale(rescale, shift) normalize_op = CV.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) if mode == "train": random_crop_op = CV.RandomCrop([32, 32], [4, 4, 4, 4]) random_horizontal_op = CV.RandomHorizontalFlip() channel_swap_op = CV.HWC2CHW() typecast_op = C.TypeCast(mstype.int32) cifar_ds = cifar_ds.map(input_columns="label", operations=typecast_op) if mode == "train": cifar_ds = cifar_ds.map(input_columns="image", operations=random_crop_op) cifar_ds = cifar_ds.map(input_columns="image", operations=random_horizontal_op) cifar_ds = cifar_ds.map(input_columns="image", operations=resize_op) cifar_ds = cifar_ds.map(input_columns="image", operations=rescale_op) cifar_ds = cifar_ds.map(input_columns="image", operations=normalize_op) cifar_ds = cifar_ds.map(input_columns="image", operations=channel_swap_op) cifar_ds = cifar_ds.shuffle(buffer_size=cfg.buffer_size) cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True) cifar_ds = cifar_ds.repeat(repeat_size) return cifar_ds
def test_case_3(): """ Test Repeat & Batch """ data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) # define data augmentation parameters resize_height, resize_width = 224, 224 # define map operations decode_op = vision.Decode() resize_op = vision.Resize((resize_height, resize_width)) # apply map operations on images data = data.map(input_columns=["image"], operations=decode_op) data = data.map(input_columns=["image"], operations=resize_op) data = data.repeat(2) batch_size = 2 data = data.batch(batch_size, drop_remainder=True) data = data.device_que() data.send() time.sleep(0.1) data.stop_send()
def test_exception_02(): """ Test multiple exceptions with invalid input """ logger.info("test_exception_02") num_samples = 0 with pytest.raises(ValueError) as info: data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_samples=num_samples) assert "num_samples must be greater than 0" in str(info.value) num_samples = -1 with pytest.raises(ValueError) as info: data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_samples=num_samples) assert "num_samples must be greater than 0" in str(info.value) num_samples = 1 data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_samples=num_samples) data = data.map(input_columns=["image"], operations=vision.Decode()) data = data.map(input_columns=["image"], operations=vision.Resize((100, 100))) # Confirm 1 sample in dataset assert sum([1 for _ in data]) == 1 num_iters = 0 for _ in data.create_dict_iterator(): num_iters += 1 assert num_iters == 1
def test_sample_exception(): num_samples = 0 with pytest.raises(ValueError) as info: data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_samples=num_samples) assert "num_samples must be greater than 0" in str(info.value) num_samples = -1 with pytest.raises(ValueError) as info: data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_samples=num_samples) assert "num_samples must be greater than 0" in str(info.value) num_samples = 1 data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_samples=num_samples) data = data.map(input_columns=["image"], operations=vision.Decode()) data = data.map(input_columns=["image"], operations=vision.Resize((100, 100))) num_iters = 0 for item in data.create_dict_iterator(): num_iters += 1 assert num_iters == 1
def create_dataset(data_path, batch_size=32, repeat_size=1, num_parallel_workers=1): """ create dataset for train or test """ # define dataset mnist_ds = ds.MnistDataset(data_path) resize_height, resize_width = 32, 32 rescale = 1.0 / 255.0 shift = 0.0 # define map operations resize_op = CV.Resize((resize_height, resize_width)) # Bilinear mode rescale_op = CV.Rescale(rescale, shift) hwc2chw_op = CV.HWC2CHW() # apply map operations on images mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) # apply DatasetOps mnist_ds = mnist_ds.batch(batch_size) mnist_ds = mnist_ds.repeat(repeat_size) return mnist_ds
def create_dataset(data_path, batch_size=32, repeat_size=1, num_parallel_workers=1): """ create dataset for train or test """ # define dataset mnist_ds = ds.MnistDataset(data_path) resize_height, resize_width = 32, 32 rescale = 1.0 / 255.0 shift = 0.0 rescale_nml = 1 / 0.3081 shift_nml = -1 * 0.1307 / 0.3081 # define map operations resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR) # Bilinear mode rescale_nml_op = CV.Rescale(rescale_nml, shift_nml) rescale_op = CV.Rescale(rescale, shift) hwc2chw_op = CV.HWC2CHW() type_cast_op = C.TypeCast(mstype.int32) # apply map operations on images mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) # apply DatasetOps buffer_size = 10000 mnist_ds = mnist_ds.shuffle(buffer_size=buffer_size) # 10000 as in LeNet train script mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True) mnist_ds = mnist_ds.repeat(repeat_size) return mnist_ds
def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32): """ Create a train or eval dataset. Args: dataset_path (str): The path of dataset. do_train (bool): Whether dataset is used for train or eval. repeat_num (int): The repeat times of dataset. Default: 1. batch_size (int): The batch size of dataset. Default: 32. Returns: Dataset. """ if do_train: dataset_path = os.path.join(dataset_path, 'train') do_shuffle = True else: dataset_path = os.path.join(dataset_path, 'eval') do_shuffle = False if device_num == 1 or not do_train: ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=do_shuffle) else: ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=do_shuffle, num_shards=device_num, shard_id=device_id) resize_height = 224 resize_width = 224 buffer_size = 100 rescale = 1.0 / 255.0 shift = 0.0 # define map operations random_crop_op = C.RandomCrop((32, 32), (4, 4, 4, 4)) random_horizontal_flip_op = C.RandomHorizontalFlip(device_id / (device_id + 1)) resize_op = C.Resize((resize_height, resize_width)) rescale_op = C.Rescale(rescale, shift) normalize_op = C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) change_swap_op = C.HWC2CHW() trans = [] if do_train: trans += [random_crop_op, random_horizontal_flip_op] trans += [resize_op, rescale_op, normalize_op, change_swap_op] type_cast_op = C2.TypeCast(mstype.int32) ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op) ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans) # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) # apply dataset repeat operation ds = ds.repeat(repeat_num) return ds
def test_mix_up_multi(): """ Test multi batch mix up op """ logger.info("Test several batch mix up op") resize_height = 224 resize_width = 224 # Create dataset and define map operations ds1 = ds.ImageFolderDatasetV2(DATA_DIR_2) num_classes = 3 decode_op = c_vision.Decode() resize_op = c_vision.Resize((resize_height, resize_width), c_vision.Inter.LINEAR) one_hot_encode = c.OneHot(num_classes) # num_classes is input argument ds1 = ds1.map(input_columns=["image"], operations=decode_op) ds1 = ds1.map(input_columns=["image"], operations=resize_op) ds1 = ds1.map(input_columns=["label"], operations=one_hot_encode) # apply batch operations batch_size = 3 ds1 = ds1.batch(batch_size, drop_remainder=True) ds2 = ds1 alpha = 0.2 transforms = [ py_vision.MixUp(batch_size=batch_size, alpha=alpha, is_single=False) ] ds1 = ds1.map(input_columns=["image", "label"], operations=transforms) num_iter = 0 batch1_image1 = 0 for data1, data2 in zip(ds1.create_dict_iterator(), ds2.create_dict_iterator()): image1 = data1["image"] label1 = data1["label"] logger.info("label: {}".format(label1)) image2 = data2["image"] label2 = data2["label"] logger.info("label2: {}".format(label2)) if num_iter == 0: batch1_image1 = image1 if num_iter == 1: lam = np.abs(label2 - label1) logger.info("lam value in multi: {}".format(lam)) for index in range(batch_size): if np.square(lam[index]).mean() != 0: lam_value = 1 - np.sum(lam[index]) / 2 img_golden = lam_value * image2[index] + ( 1 - lam_value) * batch1_image1[index] assert image1[index].all() == img_golden.all() logger.info("====test several batch mixup ok====") break num_iter = num_iter + 1
def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32): """ create a train or eval dataset Args: dataset_path(string): the path of dataset. do_train(bool): whether dataset is used for train or eval. repeat_num(int): the repeat times of dataset. Default: 1 batch_size(int): the batch size of dataset. Default: 32 Returns: dataset """ device_num = int(os.getenv("DEVICE_NUM")) rank_id = int(os.getenv("RANK_ID")) if device_num == 1: ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=True) else: ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=True, num_shards=device_num, shard_id=rank_id) # define map operations trans = [] if do_train: trans += [ C.RandomCrop((32, 32), (4, 4, 4, 4)), C.RandomHorizontalFlip(prob=0.5) ] trans += [ C.Resize((config.image_height, config.image_width)), C.Rescale(1.0 / 255.0, 0.0), C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]), C.HWC2CHW() ] type_cast_op = C2.TypeCast(mstype.int32) ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op) ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans) # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) # apply dataset repeat operation ds = ds.repeat(repeat_num) return ds
def create_dataset(dataset_path, do_train, config, device_target, repeat_num=1, batch_size=32): """ create a train or eval dataset Args: dataset_path(string): the path of dataset. do_train(bool): whether dataset is used for train or eval. repeat_num(int): the repeat times of dataset. Default: 1 batch_size(int): the batch size of dataset. Default: 32 Returns: dataset """ if device_target == "GPU": if do_train: from mindspore.communication.management import get_rank, get_group_size ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True, num_shards=get_group_size(), shard_id=get_rank()) else: ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True) else: raise ValueError("Unsupported device_target.") resize_height = config.image_height resize_width = config.image_width buffer_size = 1000 # define map operations decode_op = C.Decode() resize_crop_op = C.RandomCropDecodeResize(resize_height, scale=(0.08, 1.0), ratio=(0.75, 1.333)) horizontal_flip_op = C.RandomHorizontalFlip(prob=0.5) resize_op = C.Resize(256) center_crop = C.CenterCrop(resize_width) rescale_op = C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4) normalize_op = C.Normalize(mean=[0.485*255, 0.456*255, 0.406*255], std=[0.229*255, 0.224*255, 0.225*255]) change_swap_op = C.HWC2CHW() if do_train: trans = [resize_crop_op, horizontal_flip_op, rescale_op, normalize_op, change_swap_op] else: trans = [decode_op, resize_op, center_crop, normalize_op, change_swap_op] type_cast_op = C2.TypeCast(mstype.int32) ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=8) ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8) # apply shuffle operations ds = ds.shuffle(buffer_size=buffer_size) # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) # apply dataset repeat operation ds = ds.repeat(repeat_num) return ds
def create_dataset(data_path, batch_size=32, repeat_size=1, num_parallel_workers=1): """ create dataset for train or test Args: data_path: Data path batch_size: The number of data records in each group repeat_size: The number of replicated data records num_parallel_workers: The number of parallel workers """ # define dataset mnist_ds = ds.MnistDataset(data_path) # define operation parameters resize_height, resize_width = 32, 32 rescale = 1.0 / 255.0 shift = 0.0 rescale_nml = 1 / 0.3081 shift_nml = -1 * 0.1307 / 0.3081 # define map operations resize_op = CV.Resize( (resize_height, resize_width), interpolation=Inter.LINEAR) # Resize images to (32, 32) rescale_nml_op = CV.Rescale(rescale_nml, shift_nml) # normalize images rescale_op = CV.Rescale(rescale, shift) # rescale images hwc2chw_op = CV.HWC2CHW( ) # change shape from (height, width, channel) to (channel, height, width) to fit network. type_cast_op = C.TypeCast( mstype.int32) # change data type of label to int32 to fit network # apply map operations on images mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers) mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) # apply DatasetOps buffer_size = 10000 mnist_ds = mnist_ds.shuffle( buffer_size=buffer_size) # 10000 as in LeNet train script mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True) mnist_ds = mnist_ds.repeat(repeat_size) return mnist_ds