def __init__(self, data_dir, training=True, use_third_trsfm=False, use_auto_augment=False, num_parallel_workers=8, device_num=1, device_id=0): if not training: trsfm = Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) else: if not use_third_trsfm: trsfm = Compose([ transforms.ToPIL(), transforms.RandomResizedCrop(size=32, scale=(0.2, 1.)), transforms.RandomColorAdjust(0.4, 0.4, 0.4, 0.4), transforms.RandomGrayscale(prob=0.2), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) else: if use_auto_augment: trsfm = Compose([ transforms.ToPIL(), transforms.RandomResizedCrop(size=32, scale=(0.2, 1.)), transforms.RandomHorizontalFlip(), CIFAR10Policy(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) else: rand_augment = RandAugment(n=2, m=10) trsfm = Compose([ transforms.ToPIL(), transforms.RandomResizedCrop(size=32, scale=(0.2, 1.)), transforms.RandomHorizontalFlip(), rand_augment, transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) self.trsfm = trsfm self.data_dir = data_dir self.num_parallel_workers = num_parallel_workers self.device_num = device_num self.device_id = device_id
def test_c_py_compose_vision_module(plot=False, run_golden=True): """ Test combining Python and C++ vision transforms """ original_seed = config_get_set_seed(10) original_num_parallel_workers = config_get_set_num_parallel_workers(1) def test_config(plot, file_name, op_list): data_dir = "../data/dataset/testImageNetData/train/" data1 = ds.ImageFolderDataset(dataset_dir=data_dir, shuffle=False) data1 = data1.map(operations=op_list, input_columns=["image"]) data2 = ds.ImageFolderDataset(dataset_dir=data_dir, shuffle=False) data2 = data2.map(operations=c_vision.Decode(), input_columns=["image"]) original_images = [] transformed_images = [] for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): transformed_images.append(item["image"]) for item in data2.create_dict_iterator(num_epochs=1, output_numpy=True): original_images.append(item["image"]) if run_golden: # Compare with expected md5 from images save_and_check_md5(data1, file_name, generate_golden=GENERATE_GOLDEN) if plot: visualize_list(original_images, transformed_images) test_config(op_list=[c_vision.Decode(), py_vision.ToPIL(), py_vision.Resize((224, 224)), np.array], plot=plot, file_name="compose_c_py_1.npz") test_config(op_list=[c_vision.Decode(), c_vision.Resize((224, 244)), py_vision.ToPIL(), np.array, c_vision.Resize((24, 24))], plot=plot, file_name="compose_c_py_2.npz") test_config(op_list=[py_vision.Decode(), py_vision.Resize((224, 224)), np.array, c_vision.RandomColor()], plot=plot, file_name="compose_c_py_3.npz") # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers((original_num_parallel_workers))
def test_to_pil_02(): """ Test ToPIL Op with md5 comparison: input is not PIL image Expected to pass """ logger.info("test_to_pil_02") # Generate dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() transforms = [ # If input type is not PIL. py_vision.ToPIL(), py_vision.CenterCrop(375), py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) data1 = data1.map(operations=decode_op, input_columns=["image"]) data1 = data1.map(operations=transform, input_columns=["image"]) # Compare with expected md5 from images filename = "to_pil_02_result.npz" save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN)
def test_equalize_py_c(plot=False): """ Test Equalize Cpp op and python op """ logger.info("Test Equalize cpp and python op") # equalize Images in cpp data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"]) ds_c_equalize = data_set.map(operations=C.Equalize(), input_columns="image") ds_c_equalize = ds_c_equalize.batch(512) for idx, (image, _) in enumerate(ds_c_equalize): if idx == 0: images_c_equalize = image.asnumpy() else: images_c_equalize = np.append(images_c_equalize, image.asnumpy(), axis=0) # Equalize images in python data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"]) transforms_p_equalize = mindspore.dataset.transforms.py_transforms.Compose( [lambda img: img.astype(np.uint8), F.ToPIL(), F.Equalize(), np.array]) ds_p_equalize = data_set.map(operations=transforms_p_equalize, input_columns="image") ds_p_equalize = ds_p_equalize.batch(512) for idx, (image, _) in enumerate(ds_p_equalize): if idx == 0: images_p_equalize = image.asnumpy() else: images_p_equalize = np.append(images_p_equalize, image.asnumpy(), axis=0) num_samples = images_c_equalize.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_p_equalize[i], images_c_equalize[i]) logger.info("MSE= {}".format(str(np.mean(mse)))) if plot: visualize_list(images_c_equalize, images_p_equalize, visualize_mode=2)
def test_compare_random_color_op(degrees=None, plot=False): """ Compare Random Color op in Python and Cpp """ logger.info("test_random_color_op") original_seed = config_get_set_seed(5) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Decode with rgb format set to True data1 = ds.TFRecordDataset(C_DATA_DIR, C_SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(C_DATA_DIR, C_SCHEMA_DIR, columns_list=["image"], shuffle=False) if degrees is None: c_op = vision.RandomColor() p_op = F.RandomColor() else: c_op = vision.RandomColor(degrees) p_op = F.RandomColor(degrees) transforms_random_color_py = mindspore.dataset.transforms.py_transforms.Compose( [lambda img: img.astype(np.uint8), F.ToPIL(), p_op, np.array]) data1 = data1.map(operations=[vision.Decode(), c_op], input_columns=["image"]) data2 = data2.map(operations=[vision.Decode()], input_columns=["image"]) data2 = data2.map(operations=transforms_random_color_py, input_columns=["image"]) image_random_color_op = [] image = [] for item1, item2 in zip( data1.create_dict_iterator(num_epochs=1, output_numpy=True), data2.create_dict_iterator(num_epochs=1, output_numpy=True)): actual = item1["image"] expected = item2["image"] image_random_color_op.append(actual) image.append(expected) assert actual.shape == expected.shape mse = diff_mse(actual, expected) logger.info("MSE= {}".format(str(np.mean(mse)))) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers) if plot: visualize_list(image, image_random_color_op)
def test_auto_contrast_c(plot=False): """ Test AutoContrast C Op """ logger.info("Test AutoContrast C Op") # AutoContrast Images data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"]) python_op = F.AutoContrast(cutoff=10.0, ignore=[10, 20]) c_op = C.AutoContrast(cutoff=10.0, ignore=[10, 20]) transforms_op = mindspore.dataset.transforms.py_transforms.Compose([lambda img: F.ToPIL()(img.astype(np.uint8)), python_op, np.array]) ds_auto_contrast_py = data_set.map(operations=transforms_op, input_columns="image") ds_auto_contrast_py = ds_auto_contrast_py.batch(512) for idx, (image, _) in enumerate(ds_auto_contrast_py): if idx == 0: images_auto_contrast_py = image.asnumpy() else: images_auto_contrast_py = np.append(images_auto_contrast_py, image.asnumpy(), axis=0) data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"]) ds_auto_contrast_c = data_set.map(operations=c_op, input_columns="image") ds_auto_contrast_c = ds_auto_contrast_c.batch(512) for idx, (image, _) in enumerate(ds_auto_contrast_c): if idx == 0: images_auto_contrast_c = image.asnumpy() else: images_auto_contrast_c = np.append(images_auto_contrast_c, image.asnumpy(), axis=0) num_samples = images_auto_contrast_c.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_auto_contrast_c[i], images_auto_contrast_py[i]) logger.info("MSE= {}".format(str(np.mean(mse)))) np.testing.assert_equal(np.mean(mse), 0.0) # Compare with expected md5 from images filename = "autocontrast_01_result_c.npz" save_and_check_md5(ds_auto_contrast_c, filename, generate_golden=GENERATE_GOLDEN) if plot: visualize_list(images_auto_contrast_c, images_auto_contrast_py, visualize_mode=2)
def test_auto_contrast_one_channel_c(plot=False): """ Test AutoContrast C op with one channel """ logger.info("Test AutoContrast C Op With One Channel Images") # AutoContrast Images data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"]) python_op = F.AutoContrast() c_op = C.AutoContrast() # not using F.ToTensor() since it converts to floats transforms_op = mindspore.dataset.transforms.py_transforms.Compose( [lambda img: (np.array(img)[:, :, 0]).astype(np.uint8), F.ToPIL(), python_op, np.array]) ds_auto_contrast_py = data_set.map(operations=transforms_op, input_columns="image") ds_auto_contrast_py = ds_auto_contrast_py.batch(512) for idx, (image, _) in enumerate(ds_auto_contrast_py): if idx == 0: images_auto_contrast_py = image.asnumpy() else: images_auto_contrast_py = np.append(images_auto_contrast_py, image.asnumpy(), axis=0) data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224)), lambda img: np.array(img[:, :, 0])], input_columns=["image"]) ds_auto_contrast_c = data_set.map(operations=c_op, input_columns="image") ds_auto_contrast_c = ds_auto_contrast_c.batch(512) for idx, (image, _) in enumerate(ds_auto_contrast_c): if idx == 0: images_auto_contrast_c = image.asnumpy() else: images_auto_contrast_c = np.append(images_auto_contrast_c, image.asnumpy(), axis=0) num_samples = images_auto_contrast_c.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_auto_contrast_c[i], images_auto_contrast_py[i]) logger.info("MSE= {}".format(str(np.mean(mse)))) np.testing.assert_equal(np.mean(mse), 0.0) if plot: visualize_list(images_auto_contrast_c, images_auto_contrast_py, visualize_mode=2)
def __call__(self, imgs, labels, batchInfo): # assert the imgs object are pil_images ret_imgs = [] ret_labels = [] py_to_pil_op = P.ToPIL() to_tensor = P.ToTensor() normalize_op = P.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD) rand_augment_ops = transform_utils.rand_augment_transform( self.config_str, self.hparams) for i, image in enumerate(imgs): img_pil = py_to_pil_op(image) img_pil = rand_augment_ops(img_pil) img_array = to_tensor(img_pil) img_array = normalize_op(img_array) ret_imgs.append(img_array) ret_labels.append(labels[i]) return np.array(ret_imgs), np.array(ret_labels)
def get_de_dataset(args): '''get_de_dataset''' lbl_transforms = [F.ToType(np.int32)] transform_label = F2.Compose(lbl_transforms) drop_remainder = False transforms = [ F.ToPIL(), F.RandomHorizontalFlip(), F.ToTensor(), F.Normalize(mean=[0.5], std=[0.5]) ] transform = F2.Compose(transforms) cache_path = os.path.join('cache', os.path.basename(args.data_dir), 'data_cache.pkl') print(cache_path) if not os.path.exists(os.path.dirname(cache_path)): os.makedirs(os.path.dirname(cache_path)) dataset = CustomDataset(args.data_dir, cache_path, args.is_distributed) args.logger.info("dataset len:{}".format(dataset.__len__())) sampler = DistributedCustomSampler(dataset, num_replicas=args.world_size, rank=args.local_rank, is_distributed=args.is_distributed) de_dataset = de.GeneratorDataset(dataset, ["image", "label"], sampler=sampler) args.logger.info("after sampler de_dataset datasize :{}".format( de_dataset.get_dataset_size())) de_dataset = de_dataset.map(input_columns="image", operations=transform) de_dataset = de_dataset.map(input_columns="label", operations=transform_label) de_dataset = de_dataset.project(columns=["image", "label"]) de_dataset = de_dataset.batch(args.per_batch_size, drop_remainder=drop_remainder) num_iter_per_npu = math.ceil( len(dataset) * 1.0 / args.world_size / args.per_batch_size) num_classes = len(dataset.classes) return de_dataset, num_iter_per_npu, num_classes
def create_dataset(args, dataset_mode, repeat_num=1): """ create a train or evaluate cifar10 dataset for SimCLR """ if args.dataset_name != "cifar10": raise ValueError("Unsupported dataset.") if dataset_mode in ("train_endcoder", "train_classifier"): dataset_path = args.train_dataset_path else: dataset_path = args.eval_dataset_path if args.run_distribute and args.device_target == "Ascend": data_set = ds.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=True, num_shards=args.device_num, shard_id=args.device_id) else: data_set = ds.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=True) # define map operations trans = [] if dataset_mode == "train_endcoder": if args.use_crop: trans += [C.Resize(256, interpolation=Inter.BICUBIC)] trans += [ C.RandomResizedCrop(size=(32, 32), scale=(0.31, 1), interpolation=Inter.BICUBIC, max_attempts=100) ] if args.use_flip: trans += [C.RandomHorizontalFlip(prob=0.5)] if args.use_color_jitter: scale = 0.6 color_jitter = C.RandomColorAdjust(0.8 * scale, 0.8 * scale, 0.8 * scale, 0.2 * scale) trans += [C2.RandomApply([color_jitter], prob=0.8)] if args.use_color_gray: trans += [ py_vision.ToPIL(), py_vision.RandomGrayscale(prob=0.2), np.array ] # need to convert PIL image to a NumPy array to pass it to C++ operation if args.use_blur: trans += [C2.RandomApply([gaussian_blur], prob=0.8)] if args.use_norm: trans += [ C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ] trans += [C2.TypeCast(mstype.float32), C.HWC2CHW()] else: trans += [C.Resize(32)] trans += [C2.TypeCast(mstype.float32)] if args.use_norm: trans += [ C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ] trans += [C.HWC2CHW()] type_cast_op = C2.TypeCast(mstype.int32) data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8) data_set = data_set.map(operations=copy_column, input_columns=["image", "label"], output_columns=["image1", "image2", "label"], column_order=["image1", "image2", "label"], num_parallel_workers=8) data_set = data_set.map(operations=trans, input_columns=["image1"], num_parallel_workers=8) data_set = data_set.map(operations=trans, input_columns=["image2"], num_parallel_workers=8) # apply batch operations data_set = data_set.batch(args.batch_size, drop_remainder=True) # apply dataset repeat operation data_set = data_set.repeat(repeat_num) return data_set
def test_random_sharpness_c_py(degrees=(1.0, 1.0), plot=False): """ Test Random Sharpness C and python Op """ logger.info("Test RandomSharpness C and python Op") # RandomSharpness Images data = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data = data.map(operations=[C.Decode(), C.Resize((200, 300))], input_columns=["image"]) python_op = F.RandomSharpness(degrees) c_op = C.RandomSharpness(degrees) transforms_op = mindspore.dataset.transforms.py_transforms.Compose( [lambda img: F.ToPIL()(img.astype(np.uint8)), python_op, np.array]) ds_random_sharpness_py = data.map(operations=transforms_op, input_columns="image") ds_random_sharpness_py = ds_random_sharpness_py.batch(512) for idx, (image, _) in enumerate( ds_random_sharpness_py.create_tuple_iterator(output_numpy=True)): if idx == 0: images_random_sharpness_py = image else: images_random_sharpness_py = np.append(images_random_sharpness_py, image, axis=0) data = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data = data.map(operations=[C.Decode(), C.Resize((200, 300))], input_columns=["image"]) ds_images_random_sharpness_c = data.map(operations=c_op, input_columns="image") ds_images_random_sharpness_c = ds_images_random_sharpness_c.batch(512) for idx, (image, _) in enumerate( ds_images_random_sharpness_c.create_tuple_iterator( output_numpy=True)): if idx == 0: images_random_sharpness_c = image else: images_random_sharpness_c = np.append(images_random_sharpness_c, image, axis=0) num_samples = images_random_sharpness_c.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_random_sharpness_c[i], images_random_sharpness_py[i]) logger.info("MSE= {}".format(str(np.mean(mse)))) if plot: visualize_list(images_random_sharpness_c, images_random_sharpness_py, visualize_mode=2)