def test_random_horizontal_invalid_prob_py(): """ Test RandomHorizontalFlip op in py_transforms: invalid input, expect to raise error """ logger.info("test_random_horizontal_invalid_prob_py") # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) try: transforms = [ py_vision.Decode(), # Note: Valid range of prob should be [0.0, 1.0] py_vision.RandomHorizontalFlip(1.5), py_vision.ToTensor() ] transform = py_vision.ComposeOp(transforms) data = data.map(input_columns=["image"], operations=transform()) except ValueError as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "Input prob is not within the required interval of (0.0 to 1.0)." in str( e)
def test_random_horizontal_valid_prob_py(): """ Test RandomHorizontalFlip op with py_transforms: valid non-default input, expect to pass """ logger.info("test_random_horizontal_valid_prob_py") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) transforms = [ py_vision.Decode(), py_vision.RandomHorizontalFlip(0.8), py_vision.ToTensor() ] transform = py_vision.ComposeOp(transforms) data = data.map(input_columns=["image"], operations=transform()) filename = "random_horizontal_01_py_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def create_dataset_py(dataset_path, do_train, config, device_target, repeat_num=1, batch_size=32): """ create a train or eval dataset Args: dataset_path(string): the path of dataset. do_train(bool): whether dataset is used for train or eval. repeat_num(int): the repeat times of dataset. Default: 1. batch_size(int): the batch size of dataset. Default: 32. Returns: dataset """ if device_target == "Ascend": rank_size = int(os.getenv("RANK_SIZE")) rank_id = int(os.getenv("RANK_ID")) if do_train: if rank_size == 1: ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True) else: ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True, num_shards=rank_size, shard_id=rank_id) else: ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=False) else: raise ValueError("Unsupported device target.") resize_height = config.image_height if do_train: buffer_size = 20480 # apply shuffle operations ds = ds.shuffle(buffer_size=buffer_size) # define map operations decode_op = P.Decode() resize_crop_op = P.RandomResizedCrop(resize_height, scale=(0.08, 1.0), ratio=(0.75, 1.333)) horizontal_flip_op = P.RandomHorizontalFlip(prob=0.5) resize_op = P.Resize(256) center_crop = P.CenterCrop(resize_height) to_tensor = P.ToTensor() normalize_op = P.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if do_train: trans = [decode_op, resize_crop_op, horizontal_flip_op, to_tensor, normalize_op] else: trans = [decode_op, resize_op, center_crop, to_tensor, normalize_op] compose = P.ComposeOp(trans) ds = ds.map(input_columns="image", operations=compose(), num_parallel_workers=8, python_multiprocessing=True) # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) # apply dataset repeat operation ds = ds.repeat(repeat_num) return ds
def create_imagenet_dataset(imagenet_dir): ds = de.ImageFolderDatasetV2(imagenet_dir) transform = F.ComposeOp([ F.Decode(), F.RandomHorizontalFlip(0.5), F.ToTensor(), F.Normalize((0.491, 0.482, 0.447), (0.247, 0.243, 0.262)), F.RandomErasing() ]) ds = ds.map(input_columns="image", operations=transform()) ds = ds.shuffle(buffer_size=5) ds = ds.repeat(3) return ds
def test_random_horizontal_comp(plot=False): """ Test test_random_horizontal_flip and compare between python and c image augmentation ops """ logger.info("test_random_horizontal_comp") # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() # Note: The image must be flipped if prob is set to be 1 random_horizontal_op = c_vision.RandomHorizontalFlip(1) data1 = data1.map(input_columns=["image"], operations=decode_op) data1 = data1.map(input_columns=["image"], operations=random_horizontal_op) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) transforms = [ py_vision.Decode(), # Note: The image must be flipped if prob is set to be 1 py_vision.RandomHorizontalFlip(1), py_vision.ToTensor() ] transform = py_vision.ComposeOp(transforms) data2 = data2.map(input_columns=["image"], operations=transform()) images_list_c = [] images_list_py = [] for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): image_c = item1["image"] image_py = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) images_list_c.append(image_c) images_list_py.append(image_py) # Check if the output images are the same mse = diff_mse(image_c, image_py) assert mse < 0.001 if plot: visualize_list(images_list_c, images_list_py, visualize_mode=2)
def create_dataset_py(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"): """ create a train or eval dataset Args: dataset_path(string): the path of dataset. do_train(bool): whether dataset is used for train or eval. repeat_num(int): the repeat times of dataset. Default: 1 batch_size(int): the batch size of dataset. Default: 32 target(str): the device target. Default: Ascend Returns: dataset """ if target == "Ascend": device_num = int(os.getenv("RANK_SIZE")) rank_id = int(os.getenv("RANK_ID")) else: init("nccl") rank_id = get_rank() device_num = get_group_size() if do_train: if device_num == 1: ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True) else: ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True, num_shards=device_num, shard_id=rank_id) else: ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=False) image_size = 224 # define map operations decode_op = P.Decode() resize_crop_op = P.RandomResizedCrop(image_size, scale=(0.08, 1.0), ratio=(0.75, 1.333)) horizontal_flip_op = P.RandomHorizontalFlip(prob=0.5) resize_op = P.Resize(256) center_crop = P.CenterCrop(image_size) to_tensor = P.ToTensor() normalize_op = P.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # define map operations if do_train: trans = [ decode_op, resize_crop_op, horizontal_flip_op, to_tensor, normalize_op ] else: trans = [decode_op, resize_op, center_crop, to_tensor, normalize_op] compose = P.ComposeOp(trans) ds = ds.map(input_columns="image", operations=compose(), num_parallel_workers=8, python_multiprocessing=True) # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) # apply dataset repeat operation ds = ds.repeat(repeat_num) return ds