def create_icdar_train_dataset(img_path, gt_path, batch_size=32, repeat_num=10, is_training=True, num_parallel_workers=1, length=512, scale=0.25): dataset = ds.GeneratorDataset( source=datasetV2(img_path, gt_path), column_names=['image', 'annotation', 'label'], num_parallel_workers=num_parallel_workers, shuffle=True) dataset.set_dataset_size(1000) change_swap_op = C.HWC2CHW() #normalize_op = C.Normalize(mean=[0.485*255, 0.456*255, 0.406*255], std=[0.229*255, 0.224*255, 0.225*255]) normalize_op = C.Normalize(mean=[0.5 * 255, 0.5 * 255, 0.5 * 255], std=[0.5 * 255, 0.5 * 255, 0.5 * 255]) #normalize_op = C.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) color_adjust_op = C.RandomColorAdjust(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.25) compose_map_func = (lambda image, annotation, label: preprocess( image, annotation, label, length, scale)) output_columns = ["image", "score_map", "geo_map", "ignored_map"] dataset = dataset.map(input_columns=["image", "annotation", "label"], output_columns=output_columns, columns_order=output_columns, operations=compose_map_func, python_multiprocessing=is_training, num_parallel_workers=num_parallel_workers) trans = [color_adjust_op, normalize_op, change_swap_op] dataset = dataset.map(input_columns=["image"], operations=trans, python_multiprocessing=is_training, num_parallel_workers=num_parallel_workers) dataset = dataset.batch(batch_size, drop_remainder=True) dataset = dataset.repeat(repeat_num) return dataset
def test_random_color_adjust_op_saturation(): """ Test RandomColorAdjust op """ logger.info("test_random_color_adjust_op") # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() random_adjust_op = c_vision.RandomColorAdjust((1, 1), (1, 1), (0.5, 0.5), (0, 0)) ctrans = [decode_op, random_adjust_op ] data1 = data1.map(input_columns=["image"], operations=ctrans) # Second dataset transforms = [ py_vision.Decode(), py_vision.RandomColorAdjust((1, 1), (1, 1), (0.5, 0.5), (0, 0)), py_vision.ToTensor(), ] transform = py_vision.ComposeOp(transforms) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = data2.map(input_columns=["image"], operations=transform()) num_iter = 0 for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): num_iter += 1 c_image = item1["image"] py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) logger.info("shape of c_image: {}".format(c_image.shape)) logger.info("shape of py_image: {}".format(py_image.shape)) logger.info("dtype of c_image: {}".format(c_image.dtype)) logger.info("dtype of py_image: {}".format(py_image.dtype)) diff = c_image - py_image mse = diff_mse(c_image, py_image) logger.info("mse is {}".format(mse)) assert mse < 0.01
def test_cpp_uniform_augment_exception_float_numops(num_ops=2.5): """ Test UniformAugment invalid float number of ops """ logger.info("Test CPP UniformAugment invalid float num_ops exception") transforms_ua = [C.RandomCrop(size=[224, 224], padding=[32, 32, 32, 32]), C.RandomHorizontalFlip(), C.RandomVerticalFlip(), C.RandomColorAdjust(), C.RandomRotation(degrees=45)] try: _ = C.UniformAugment(operations=transforms_ua, num_ops=num_ops) except Exception as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "Argument num_ops with value 2.5 is not of type (<class 'int'>,)" in str(e)
def test_cpp_uniform_augment_exception_nonpositive_numops(num_ops=0): """ Test UniformAugment invalid non-positive number of ops """ logger.info("Test CPP UniformAugment invalid non-positive num_ops exception") transforms_ua = [C.RandomCrop(size=[224, 224], padding=[32, 32, 32, 32]), C.RandomHorizontalFlip(), C.RandomVerticalFlip(), C.RandomColorAdjust(), C.RandomRotation(degrees=45)] try: _ = C.UniformAugment(operations=transforms_ua, num_ops=num_ops) except Exception as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "Input num_ops must be greater than 0" in str(e)
def create_ssd_dataset(mindrecord_file, batch_size=32, repeat_num=10, device_num=1, rank=0, is_training=True, num_parallel_workers=4): """Creatr SSD dataset with MindDataset.""" ds = de.MindDataset(mindrecord_file, columns_list=["img_id", "image", "annotation"], num_shards=device_num, shard_id=rank, num_parallel_workers=num_parallel_workers, shuffle=is_training) decode = C.Decode() ds = ds.map(input_columns=["image"], operations=decode) change_swap_op = C.HWC2CHW() normalize_op = C.Normalize(mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) color_adjust_op = C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4) compose_map_func = (lambda img_id, image, annotation: preprocess_fn( img_id, image, annotation, is_training)) if is_training: output_columns = ["image", "box", "label", "num_match"] trans = [color_adjust_op, normalize_op, change_swap_op] else: output_columns = ["img_id", "image", "image_shape"] trans = [normalize_op, change_swap_op] ds = ds.map(input_columns=["img_id", "image", "annotation"], output_columns=output_columns, columns_order=output_columns, operations=compose_map_func, python_multiprocessing=is_training, num_parallel_workers=num_parallel_workers) ds = ds.map(input_columns=["image"], operations=trans, python_multiprocessing=is_training, num_parallel_workers=num_parallel_workers) ds = ds.batch(batch_size, drop_remainder=True) ds = ds.repeat(repeat_num) return ds
def test_cpp_uniform_augment_exception_large_numops(num_ops=6): """ Test UniformAugment invalid large number of ops """ logger.info("Test CPP UniformAugment invalid large num_ops exception") transforms_ua = [ C.RandomCrop(size=[224, 224], padding=[32, 32, 32, 32]), C.RandomHorizontalFlip(), C.RandomVerticalFlip(), C.RandomColorAdjust(), C.RandomRotation(degrees=45) ] try: uni_aug = C.UniformAugment(operations=transforms_ua, num_ops=num_ops) except BaseException as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "num_ops" in str(e)
def test_cpp_uniform_augment_exception_pyops(num_ops=2): """ Test UniformAugment invalid op in operations """ logger.info("Test CPP UniformAugment invalid OP exception") transforms_ua = [C.RandomCrop(size=[224, 224], padding=[32, 32, 32, 32]), C.RandomHorizontalFlip(), C.RandomVerticalFlip(), C.RandomColorAdjust(), C.RandomRotation(degrees=45), F.Invert()] with pytest.raises(TypeError) as e: C.UniformAugment(operations=transforms_ua, num_ops=num_ops) logger.info("Got an exception in DE: {}".format(str(e))) assert "Argument tensor_ops[5] with value" \ " <mindspore.dataset.transforms.vision.py_transforms.Invert" in str(e.value) assert "is not of type (<class 'mindspore._c_dataengine.TensorOp'>,)" in str(e.value)
def test_cpp_uniform_augment_exception_pyops(num_ops=2): """ Test UniformAugment invalid op in operations """ logger.info("Test CPP UniformAugment invalid OP exception") transforms_ua = [ C.RandomCrop(size=[224, 224], padding=[32, 32, 32, 32]), C.RandomHorizontalFlip(), C.RandomVerticalFlip(), C.RandomColorAdjust(), C.RandomRotation(degrees=45), F.Invert() ] try: _ = C.UniformAugment(operations=transforms_ua, num_ops=num_ops) except Exception as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "operations" in str(e)
def test_random_color_adjust_md5(): """ Test RandomColorAdjust with md5 check """ logger.info("Test RandomColorAdjust with md5 check") original_seed = config_get_set_seed(10) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() random_adjust_op = c_vision.RandomColorAdjust(0.4, 0.4, 0.4, 0.1) data1 = data1.map(input_columns=["image"], operations=decode_op) data1 = data1.map(input_columns=["image"], operations=random_adjust_op) # Second dataset transforms = [ py_vision.Decode(), py_vision.RandomColorAdjust(0.4, 0.4, 0.4, 0.1), py_vision.ToTensor() ] transform = py_vision.ComposeOp(transforms) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = data2.map(input_columns=["image"], operations=transform()) # Compare with expected md5 from images filename = "random_color_adjust_01_c_result.npz" save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN) filename = "random_color_adjust_01_py_result.npz" save_and_check_md5(data2, filename, generate_golden=GENERATE_GOLDEN) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def util_test_random_color_adjust_error(brightness=(1, 1), contrast=(1, 1), saturation=(1, 1), hue=(0, 0)): """ Util function that tests the error message in case of grayscale images """ transforms = [ py_vision.Decode(), py_vision.Grayscale(1), py_vision.ToTensor(), (lambda image: (image.transpose(1, 2, 0) * 255).astype(np.uint8)) ] transform = py_vision.ComposeOp(transforms) data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data1 = data1.map(input_columns=["image"], operations=transform()) # if input is grayscale, the output dimensions should be single channel, the following should fail random_adjust_op = c_vision.RandomColorAdjust(brightness=brightness, contrast=contrast, saturation=saturation, hue=hue) with pytest.raises(RuntimeError) as info: data1 = data1.map(input_columns=["image"], operations=random_adjust_op) dataset_shape_1 = [] for item1 in data1.create_dict_iterator(): c_image = item1["image"] dataset_shape_1.append(c_image.shape) error_msg = "The shape is incorrect: number of channels does not equal 3" assert error_msg in str(info.value)
def util_test_random_color_adjust_op(brightness=(1, 1), contrast=(1, 1), saturation=(1, 1), hue=(0, 0), plot=False): """ Util function that tests RandomColorAdjust for a specific argument """ # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() random_adjust_op = c_vision.RandomColorAdjust(brightness=brightness, contrast=contrast, saturation=saturation, hue=hue) ctrans = [ decode_op, random_adjust_op, ] data1 = data1.map(input_columns=["image"], operations=ctrans) # Second dataset transforms = [ py_vision.Decode(), py_vision.RandomColorAdjust(brightness=brightness, contrast=contrast, saturation=saturation, hue=hue), py_vision.ToTensor() ] transform = py_vision.ComposeOp(transforms) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = data2.map(input_columns=["image"], operations=transform()) num_iter = 0 for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): num_iter += 1 c_image = item1["image"] py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) logger.info("shape of c_image: {}".format(c_image.shape)) logger.info("shape of py_image: {}".format(py_image.shape)) logger.info("dtype of c_image: {}".format(c_image.dtype)) logger.info("dtype of py_image: {}".format(py_image.dtype)) mse = diff_mse(c_image, py_image) logger.info("mse is {}".format(mse)) logger.info("random_rotation_op_{}, mse: {}".format(num_iter + 1, mse)) assert mse < 0.01 if plot: visualize_image(c_image, py_image, mse)
def create_dataset(dataset_path, do_train, config, platform, repeat_num=1, batch_size=32): """ create a train or eval dataset Args: dataset_path(string): the path of dataset. do_train(bool): whether dataset is used for train or eval. repeat_num(int): the repeat times of dataset. Default: 1 batch_size(int): the batch size of dataset. Default: 32 Returns: dataset """ if platform == "Ascend": rank_size = int(os.getenv("RANK_SIZE")) rank_id = int(os.getenv("RANK_ID")) if rank_size == 1: ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True) else: ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True, num_shards=rank_size, shard_id=rank_id) elif platform == "GPU": if do_train: from mindspore.communication.management import get_rank, get_group_size ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True, num_shards=get_group_size(), shard_id=get_rank()) else: ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True) else: raise ValueError("Unsupport platform.") resize_height = config.image_height resize_width = config.image_width buffer_size = 1000 # define map operations decode_op = C.Decode() resize_crop_op = C.RandomCropDecodeResize(resize_height, scale=(0.08, 1.0), ratio=(0.75, 1.333)) horizontal_flip_op = C.RandomHorizontalFlip(prob=0.5) resize_op = C.Resize(256) center_crop = C.CenterCrop(resize_width) rescale_op = C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4) normalize_op = C.Normalize(mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) change_swap_op = C.HWC2CHW() if do_train: trans = [ resize_crop_op, horizontal_flip_op, rescale_op, normalize_op, change_swap_op ] else: trans = [ decode_op, resize_op, center_crop, normalize_op, change_swap_op ] type_cast_op = C2.TypeCast(mstype.int32) ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=8) ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8) # apply shuffle operations ds = ds.shuffle(buffer_size=buffer_size) # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) # apply dataset repeat operation ds = ds.repeat(repeat_num) return ds
def test_cpp_uniform_augment(plot=False, num_ops=2): """ Test UniformAugment """ logger.info("Test CPP UniformAugment") # Original Images ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) transforms_original = [C.Decode(), C.Resize(size=[224, 224]), F.ToTensor()] ds_original = ds.map(input_columns="image", operations=transforms_original) ds_original = ds_original.batch(512) for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = np.transpose(image, (0, 2, 3, 1)) else: images_original = np.append(images_original, np.transpose(image, (0, 2, 3, 1)), axis=0) # UniformAugment Images ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) transforms_ua = [ C.RandomCrop(size=[224, 224], padding=[32, 32, 32, 32]), C.RandomHorizontalFlip(), C.RandomVerticalFlip(), C.RandomColorAdjust(), C.RandomRotation(degrees=45) ] uni_aug = C.UniformAugment(operations=transforms_ua, num_ops=num_ops) transforms_all = [ C.Decode(), C.Resize(size=[224, 224]), uni_aug, F.ToTensor() ] ds_ua = ds.map(input_columns="image", operations=transforms_all, num_parallel_workers=1) ds_ua = ds_ua.batch(512) for idx, (image, _) in enumerate(ds_ua): if idx == 0: images_ua = np.transpose(image, (0, 2, 3, 1)) else: images_ua = np.append(images_ua, np.transpose(image, (0, 2, 3, 1)), axis=0) if plot: visualize_list(images_original, images_ua) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_ua[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse))))
def create_dataset(dataset_path, do_train, config, platform, repeat_num=1, batch_size=100, model='ghsotnet'): """ create a train or eval dataset Args: dataset_path(string): the path of dataset. do_train(bool): whether dataset is used for train or eval. repeat_num(int): the repeat times of dataset. Default: 1 batch_size(int): the batch size of dataset. Default: 32 Returns: dataset """ if platform == "Ascend": rank_size = int(os.getenv("RANK_SIZE")) rank_id = int(os.getenv("RANK_ID")) if rank_size == 1: data_set = ds.MindDataset(dataset_path, num_parallel_workers=8, shuffle=True) else: data_set = ds.MindDataset(dataset_path, num_parallel_workers=8, shuffle=True, num_shards=rank_size, shard_id=rank_id) elif platform == "GPU": if do_train: from mindspore.communication.management import get_rank, get_group_size data_set = ds.MindDataset(dataset_path, num_parallel_workers=8, shuffle=True, num_shards=get_group_size(), shard_id=get_rank()) else: data_set = ds.MindDataset(dataset_path, num_parallel_workers=8, shuffle=True) else: raise ValueError("Unsupported platform.") resize_height = config.image_height buffer_size = 1000 # define map operations resize_crop_op = C.RandomCropDecodeResize(resize_height, scale=(0.08, 1.0), ratio=(0.75, 1.333)) horizontal_flip_op = C.RandomHorizontalFlip(prob=0.5) color_op = C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4) rescale_op = C.Rescale(1 / 255.0, 0) normalize_op = C.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) change_swap_op = C.HWC2CHW() # define python operations decode_p = P.Decode() if model == 'ghostnet-600': s = 274 c = 240 else: s = 256 c = 224 resize_p = P.Resize(s, interpolation=Inter.BICUBIC) center_crop_p = P.CenterCrop(c) totensor = P.ToTensor() normalize_p = P.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) composeop = P.ComposeOp( [decode_p, resize_p, center_crop_p, totensor, normalize_p]) if do_train: trans = [ resize_crop_op, horizontal_flip_op, color_op, rescale_op, normalize_op, change_swap_op ] else: trans = composeop() type_cast_op = C2.TypeCast(mstype.int32) data_set = data_set.map(input_columns="image", operations=trans, num_parallel_workers=8) data_set = data_set.map(input_columns="label_list", operations=type_cast_op, num_parallel_workers=8) # apply shuffle operations data_set = data_set.shuffle(buffer_size=buffer_size) # apply batch operations data_set = data_set.batch(batch_size, drop_remainder=True) # apply dataset repeat operation data_set = data_set.repeat(repeat_num) return data_set
def classification_dataset(data_dir, image_size, per_batch_size, max_epoch, rank, group_size, mode='train', input_mode='folder', root='', num_parallel_workers=None, shuffle=None, sampler=None, class_indexing=None, drop_remainder=True, transform=None, target_transform=None): """ A function that returns a dataset for classification. The mode of input dataset could be "folder" or "txt". If it is "folder", all images within one folder have the same label. If it is "txt", all paths of images are written into a textfile. Args: data_dir (str): Path to the root directory that contains the dataset for "input_mode="folder"". Or path of the textfile that contains every image's path of the dataset. image_size (str): Size of the input images. per_batch_size (int): the batch size of evey step during training. max_epoch (int): the number of epochs. rank (int): The shard ID within num_shards (default=None). group_size (int): Number of shards that the dataset should be divided into (default=None). mode (str): "train" or others. Default: " train". input_mode (str): The form of the input dataset. "folder" or "txt". Default: "folder". root (str): the images path for "input_mode="txt"". Default: " ". num_parallel_workers (int): Number of workers to read the data. Default: None. shuffle (bool): Whether or not to perform shuffle on the dataset (default=None, performs shuffle). sampler (Sampler): Object used to choose samples from the dataset. Default: None. class_indexing (dict): A str-to-int mapping from folder name to index (default=None, the folder names will be sorted alphabetically and each class will be given a unique index starting from 0). Examples: >>> from mindvision.common.datasets.classification import classification_dataset >>> # path to imagefolder directory. This directory needs to contain sub-directories which contain the images >>> dataset_dir = "/path/to/imagefolder_directory" >>> de_dataset = classification_dataset(train_data_dir, image_size=[224, 244], >>> per_batch_size=64, max_epoch=100, >>> rank=0, group_size=4) >>> # Path of the textfile that contains every image's path of the dataset. >>> dataset_dir = "/path/to/dataset/images/train.txt" >>> images_dir = "/path/to/dataset/images" >>> de_dataset = classification_dataset(train_data_dir, image_size=[224, 244], >>> per_batch_size=64, max_epoch=100, >>> rank=0, group_size=4, >>> input_mode="txt", root=images_dir) """ mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] std = [0.229 * 255, 0.224 * 255, 0.225 * 255] if transform is None: if mode == 'train': transform_img = [ V_C.RandomCropDecodeResize(image_size, scale=(0.08, 1.0), ratio=(0.75, 1.333)), V_C.RandomHorizontalFlip(prob=0.5), V_C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4), V_C.Normalize(mean=mean, std=std), V_C.HWC2CHW() ] else: transform_img = [ V_C.Decode(), V_C.Resize((256, 256)), V_C.CenterCrop(image_size), V_C.Normalize(mean=mean, std=std), V_C.HWC2CHW() ] else: transform_img = transform if target_transform is None: transform_label = [C.TypeCast(mstype.int32)] else: transform_label = target_transform if input_mode == 'folder': de_dataset = de.ImageFolderDatasetV2( data_dir, num_parallel_workers=num_parallel_workers, shuffle=shuffle, sampler=sampler, class_indexing=class_indexing, num_shards=group_size, shard_id=rank) else: dataset = TxtDataset(root, data_dir) sampler = DistributedSampler(dataset, rank, group_size, shuffle=shuffle) de_dataset = de.GeneratorDataset(dataset, ["image", "label"], sampler=sampler) de_dataset.set_dataset_size(len(sampler)) de_dataset = de_dataset.map(input_columns="image", num_parallel_workers=num_parallel_workers, operations=transform_img) de_dataset = de_dataset.map(input_columns="label", num_parallel_workers=num_parallel_workers, operations=transform_label) columns_to_project = ["image", "label"] de_dataset = de_dataset.project(columns=columns_to_project) de_dataset = de_dataset.batch(per_batch_size, drop_remainder=drop_remainder) de_dataset = de_dataset.repeat(max_epoch) return de_dataset