def test_seed_deterministic(): """ Test deterministic run with setting the seed, only works with num_parallel worker = 1 """ logger.info("test_seed_deterministic") # Save original configuration values num_parallel_workers_original = ds.config.get_num_parallel_workers() seed_original = ds.config.get_seed() ds.config.set_seed(0) ds.config.set_num_parallel_workers(1) # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # seed will be read in during constructor call random_crop_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200]) decode_op = c_vision.Decode() data1 = data1.map(operations=decode_op, input_columns=["image"]) data1 = data1.map(operations=random_crop_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = data2.map(operations=decode_op, input_columns=["image"]) # If seed is set up on constructor, so the two ops output deterministic sequence random_crop_op2 = c_vision.RandomCrop([512, 512], [200, 200, 200, 200]) data2 = data2.map(operations=random_crop_op2, input_columns=["image"]) dataset_equal(data1, data2, 0) # Restore original configuration values ds.config.set_num_parallel_workers(num_parallel_workers_original) ds.config.set_seed(seed_original)
def test_cpp_uniform_augment_random_crop_badinput(num_ops=1): """ Test UniformAugment with greater crop size """ logger.info("Test CPP UniformAugment with random_crop bad input") batch_size = 2 cifar10_dir = "../data/dataset/testCifar10Data" ds1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False) # shape = [32,32,3] transforms_ua = [ # Note: crop size [224, 224] > image size [32, 32] C.RandomCrop(size=[224, 224]), C.RandomHorizontalFlip() ] uni_aug = C.UniformAugment(transforms=transforms_ua, num_ops=num_ops) ds1 = ds1.map(operations=uni_aug, input_columns="image") # apply DatasetOps ds1 = ds1.batch(batch_size, drop_remainder=True, num_parallel_workers=1) num_batches = 0 try: for _ in ds1.create_dict_iterator(num_epochs=1, output_numpy=True): num_batches += 1 except Exception as e: assert "Crop size" in str(e)
def test_random_crop_01_c(): """ Test RandomCrop op with c_transforms: size is a single integer, expected to pass """ logger.info("test_random_crop_01_c") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # Note: If size is an int, a square crop of size (size, size) is returned. random_crop_op = c_vision.RandomCrop(512) decode_op = c_vision.Decode() data = data.map(operations=decode_op, input_columns=["image"]) data = data.map(operations=random_crop_op, input_columns=["image"]) filename = "random_crop_01_c_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_crop_02_c(): """ Test RandomCrop op with c_transforms: size is a list/tuple with length 2, expected to pass """ logger.info("test_random_crop_02_c") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # Note: If size is a sequence of length 2, it should be (height, width). random_crop_op = c_vision.RandomCrop([512, 375]) decode_op = c_vision.Decode() data = data.map(operations=decode_op, input_columns=["image"]) data = data.map(operations=random_crop_op, input_columns=["image"]) filename = "random_crop_02_c_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target="CPU"): data_set = ds.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=True) # define map operations trans = [] if do_train: trans += [ C.RandomCrop((32, 32), (4, 4, 4, 4)), C.RandomHorizontalFlip(prob=0.5) ] trans += [ C.Resize((48,48)), C.Rescale(1.0 / 255.0, 0.0), C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]), C.HWC2CHW() ] type_cast_op = C2.TypeCast(mstype.int32) data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8) data_set = data_set.map(operations=trans, input_columns="image", num_parallel_workers=8) # apply batch operations data_set = data_set.shuffle(buffer_size=10) data_set = data_set.batch(batch_size, drop_remainder=False) # apply dataset repeat operation data_set = data_set.repeat(repeat_num) return data_set
def create_dataset(data_path, batch_size=32, repeat_size=1, mode="train"): """ create dataset for train or test """ cifar_ds = ds.Cifar10Dataset(data_path) rescale = 1.0 / 255.0 shift = 0.0 resize_op = CV.Resize((cfg.image_height, cfg.image_width)) rescale_op = CV.Rescale(rescale, shift) normalize_op = CV.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) if mode == "train": random_crop_op = CV.RandomCrop([32, 32], [4, 4, 4, 4]) random_horizontal_op = CV.RandomHorizontalFlip() channel_swap_op = CV.HWC2CHW() typecast_op = C.TypeCast(mstype.int32) cifar_ds = cifar_ds.map(operations=typecast_op, input_columns="label") if mode == "train": cifar_ds = cifar_ds.map(operations=random_crop_op, input_columns="image") cifar_ds = cifar_ds.map(operations=random_horizontal_op, input_columns="image") cifar_ds = cifar_ds.map(operations=resize_op, input_columns="image") cifar_ds = cifar_ds.map(operations=rescale_op, input_columns="image") cifar_ds = cifar_ds.map(operations=normalize_op, input_columns="image") cifar_ds = cifar_ds.map(operations=channel_swap_op, input_columns="image") cifar_ds = cifar_ds.shuffle(buffer_size=cfg.buffer_size) cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True) cifar_ds = cifar_ds.repeat(repeat_size) return cifar_ds
def test_random_crop_05_c(): """ Test RandomCrop op with c_transforms: input image size < crop size but pad_if_needed is enabled, expected to pass """ logger.info("test_random_crop_05_c") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # Note: The size of the image is 4032*2268 random_crop_op = c_vision.RandomCrop([2268, 4033], [200, 200, 200, 200], pad_if_needed=True) decode_op = c_vision.Decode() data = data.map(operations=decode_op, input_columns=["image"]) data = data.map(operations=random_crop_op, input_columns=["image"]) filename = "random_crop_05_c_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_crop_08_c(): """ Test RandomCrop op with c_transforms: padding_mode is Border.EDGE, expected to pass """ logger.info("test_random_crop_08_c") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # Note: The padding_mode is Border.EDGE. random_crop_op = c_vision.RandomCrop(512, [200, 200, 200, 200], padding_mode=mode.Border.EDGE) decode_op = c_vision.Decode() data = data.map(operations=decode_op, input_columns=["image"]) data = data.map(operations=random_crop_op, input_columns=["image"]) filename = "random_crop_08_c_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_crop_op_c(plot=False): """ Test RandomCrop Op in c transforms """ logger.info("test_random_crop_op_c") # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) random_crop_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200]) decode_op = c_vision.Decode() data1 = data1.map(operations=decode_op, input_columns=["image"]) data1 = data1.map(operations=random_crop_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = data2.map(operations=decode_op, input_columns=["image"]) image_cropped = [] image = [] for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image1 = item1["image"] image2 = item2["image"] image_cropped.append(image1) image.append(image2) if plot: visualize_list(image, image_cropped)
def test_random_crop_comp(plot=False): """ Test RandomCrop and compare between python and c image augmentation """ logger.info("Test RandomCrop with c_transform and py_transform comparison") cropped_size = 512 # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) random_crop_op = c_vision.RandomCrop(cropped_size) decode_op = c_vision.Decode() data1 = data1.map(operations=decode_op, input_columns=["image"]) data1 = data1.map(operations=random_crop_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) transforms = [ py_vision.Decode(), py_vision.RandomCrop(cropped_size), py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) data2 = data2.map(operations=transform, input_columns=["image"]) image_c_cropped = [] image_py_cropped = [] for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), data2.create_dict_iterator(num_epochs=1, output_numpy=True)): c_image = item1["image"] py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_c_cropped.append(c_image) image_py_cropped.append(py_image) if plot: visualize_list(image_c_cropped, image_py_cropped, visualize_mode=2)
def create_dataset(data_home, repeat_num=1, batch_size=32, do_train=True, device_target="CPU"): """ create data for next use such as training or inferring """ cifar_ds = ds.Cifar10Dataset(data_home,num_parallel_workers=8, shuffle=True) c_trans = [] if do_train: c_trans += [ C.RandomCrop((32, 32), (4, 4, 4, 4)), C.RandomHorizontalFlip(prob=0.5) ] c_trans += [ C.Resize((224, 224)), C.Rescale(1.0 / 255.0, 0.0), C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]), C.HWC2CHW() ] type_cast_op = C2.TypeCast(mstype.int32) cifar_ds = cifar_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8) cifar_ds = cifar_ds.map(operations=c_trans, input_columns="image", num_parallel_workers=8) cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True) cifar_ds = cifar_ds.repeat(repeat_num) return cifar_ds
def create_dataset_cifar10(data_path, batch_size=32, repeat_size=1, status="train", target="Ascend"): """ create dataset for train or test """ if target == "Ascend": device_num, rank_id = _get_rank_info() if target != "Ascend" or device_num == 1: cifar_ds = ds.Cifar10Dataset(data_path) else: cifar_ds = ds.Cifar10Dataset(data_path, num_parallel_workers=8, shuffle=True, num_shards=device_num, shard_id=rank_id) rescale = 1.0 / 255.0 shift = 0.0 cfg = alexnet_cifar10_cfg resize_op = CV.Resize((cfg.image_height, cfg.image_width)) rescale_op = CV.Rescale(rescale, shift) normalize_op = CV.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) if status == "train": random_crop_op = CV.RandomCrop([32, 32], [4, 4, 4, 4]) random_horizontal_op = CV.RandomHorizontalFlip() channel_swap_op = CV.HWC2CHW() typecast_op = C.TypeCast(mstype.int32) cifar_ds = cifar_ds.map(input_columns="label", operations=typecast_op, num_parallel_workers=8) if status == "train": cifar_ds = cifar_ds.map(input_columns="image", operations=random_crop_op, num_parallel_workers=8) cifar_ds = cifar_ds.map(input_columns="image", operations=random_horizontal_op, num_parallel_workers=8) cifar_ds = cifar_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=8) cifar_ds = cifar_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=8) cifar_ds = cifar_ds.map(input_columns="image", operations=normalize_op, num_parallel_workers=8) cifar_ds = cifar_ds.map(input_columns="image", operations=channel_swap_op, num_parallel_workers=8) cifar_ds = cifar_ds.shuffle(buffer_size=cfg.buffer_size) cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True) cifar_ds = cifar_ds.repeat(repeat_size) return cifar_ds
def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32): """ Create a train or eval dataset. Args: dataset_path (str): The path of dataset. do_train (bool): Whether dataset is used for train or eval. repeat_num (int): The repeat times of dataset. Default: 1. batch_size (int): The batch size of dataset. Default: 32. Returns: Dataset. """ if do_train: dataset_path = os.path.join(dataset_path, 'train') do_shuffle = True else: dataset_path = os.path.join(dataset_path, 'eval') do_shuffle = False if device_num == 1 or not do_train: ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=do_shuffle) else: ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=do_shuffle, num_shards=device_num, shard_id=device_id) resize_height = 224 resize_width = 224 rescale = 1.0 / 255.0 shift = 0.0 # define map operations random_crop_op = C.RandomCrop((32, 32), (4, 4, 4, 4)) random_horizontal_flip_op = C.RandomHorizontalFlip(device_id / (device_id + 1)) resize_op = C.Resize((resize_height, resize_width)) rescale_op = C.Rescale(rescale, shift) normalize_op = C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) change_swap_op = C.HWC2CHW() trans = [] if do_train: trans += [random_crop_op, random_horizontal_flip_op] trans += [resize_op, rescale_op, normalize_op, change_swap_op] type_cast_op = C2.TypeCast(mstype.int32) ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8) ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8) # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) # apply dataset repeat operation ds = ds.repeat(repeat_num) return ds
def test_uniform_augment_callable(num_ops=2): """ Test UniformAugment is callable """ logger.info("test_uniform_augment_callable") img = np.fromfile("../data/dataset/apple.jpg", dtype=np.uint8) logger.info("Image.type: {}, Image.shape: {}".format(type(img), img.shape)) decode_op = C.Decode() img = decode_op(img) assert img.shape == (2268, 4032, 3) transforms_ua = [C.RandomCrop(size=[400, 400], padding=[32, 32, 32, 32]), C.RandomCrop(size=[400, 400], padding=[32, 32, 32, 32])] uni_aug = C.UniformAugment(transforms=transforms_ua, num_ops=num_ops) img = uni_aug(img) assert img.shape == (2268, 4032, 3) or img.shape == (400, 400, 3)
def create_dataset(args, training, data_path, batch_size=32, repeat_size=1, num_parallel_workers=1): # define dataset cifar_ds = ds.Cifar10Dataset(data_path) # define operation parameters resize_height, resize_width = 224, 224 rescale = 1.0 / 255.0 shift = 0.0 rescale_nml = 1 / 0.3081 shift_nml = -1 * 0.1307 / 0.3081 # define map operations random_crop_op = C.RandomCrop( (32, 32), (4, 4, 4, 4)) # padding_mode default CONSTANT if args.debug: print(f'Random crop op: {random_crop_op}') random_horizontal_op = C.RandomHorizontalFlip() resize_op = C.Resize( (resize_height, resize_width)) # interpolation default BILINEAR rescale_op = C.Rescale(rescale, shift) normalize_op = C.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) if args.debug: print(f'Normalize operation: {normalize_op}') changeswap_op = C.HWC2CHW() type_cast_op = C2.TypeCast(mstype.int32) if args.debug: print(f'Type cast operation: {type_cast_op}') c_trans = [] if training: c_trans = [random_crop_op, random_horizontal_op] c_trans += [resize_op, rescale_op, normalize_op, changeswap_op] if args.debug: print(f'C transform: {c_trans}') # apply map operations on images cifar_ds = cifar_ds.map(operations=type_cast_op, input_columns="label") cifar_ds = cifar_ds.map(operations=c_trans, input_columns="image") # apply shuffle ops cifar_ds = cifar_ds.shuffle(buffer_size=10) # apply batch ops cifar_ds = cifar_ds.batch(batch_size=batch_size, drop_remainder=True) # apply repeat operators cifar_ds = cifar_ds.repeat(repeat_size) return cifar_ds
def create_dataset(data_path, is_train=True, batch_size=32): # import import mindspore.common.dtype as mstype import mindspore.dataset.engine as de import mindspore.dataset.transforms.c_transforms as C2 import mindspore.dataset.vision.c_transforms as C from mindspore.common import set_seed set_seed(1) # shard num_shards = shard_id = None rand_size = os.getenv("RANK_SIZE") rand_id = os.getenv("RANK_ID") if rand_size is not None and rand_id is not None: num_shards = int(rand_size) shard_id = int(rand_id) # define dataset data_path = os.path.join( data_path, "cifar-10-batches-bin" if is_train else "cifar-10-verify-bin") ds = de.Cifar10Dataset(data_path, shuffle=True, num_shards=num_shards, shard_id=shard_id, num_parallel_workers=8, num_samples=None) # define ops comps_ops = list() # train or val if is_train: comps_ops.append(C.RandomCrop((32, 32), (4, 4, 4, 4))) comps_ops.append(C.RandomHorizontalFlip(prob=0.5)) comps_ops.append(C.Resize((224, 224))) comps_ops.append(C.Rescale(1 / 255.0, 0.)) comps_ops.append( C.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])) comps_ops.append(C.HWC2CHW()) # map ops ds = ds.map(input_columns=["image"], operations=comps_ops, num_parallel_workers=8) ds = ds.map(input_columns=["label"], operations=C2.TypeCast(mstype.int32), num_parallel_workers=8) # batch & repeat ds = ds.batch(batch_size=batch_size, drop_remainder=is_train) ds = ds.repeat(count=1) return ds
def test_cpp_uniform_augment(plot=False, num_ops=2): """ Test UniformAugment """ logger.info("Test CPP UniformAugment") # Original Images data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) transforms_original = [C.Decode(), C.Resize(size=[224, 224]), F.ToTensor()] ds_original = data_set.map(operations=transforms_original, input_columns="image") ds_original = ds_original.batch(512) for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_original = np.append(images_original, np.transpose(image.asnumpy(), (0, 2, 3, 1)), axis=0) # UniformAugment Images data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) transforms_ua = [C.RandomCrop(size=[224, 224], padding=[32, 32, 32, 32]), C.RandomHorizontalFlip(), C.RandomVerticalFlip(), C.RandomColorAdjust(), C.RandomRotation(degrees=45)] uni_aug = C.UniformAugment(transforms=transforms_ua, num_ops=num_ops) transforms_all = [C.Decode(), C.Resize(size=[224, 224]), uni_aug, F.ToTensor()] ds_ua = data_set.map(operations=transforms_all, input_columns="image", num_parallel_workers=1) ds_ua = ds_ua.batch(512) for idx, (image, _) in enumerate(ds_ua): if idx == 0: images_ua = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_ua = np.append(images_ua, np.transpose(image.asnumpy(), (0, 2, 3, 1)), axis=0) if plot: visualize_list(images_original, images_ua) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_ua[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse))))
def test_seed_undeterministic(): """ Test seed with num parallel workers in c, this test is expected to fail some of the time """ logger.info("test_seed_undeterministic") # Save original configuration values num_parallel_workers_original = ds.config.get_num_parallel_workers() seed_original = ds.config.get_seed() ds.config.set_seed(0) ds.config.set_num_parallel_workers(3) # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # We get the seed when constructor is called random_crop_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200]) decode_op = c_vision.Decode() data1 = data1.map(operations=decode_op, input_columns=["image"]) data1 = data1.map(operations=random_crop_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = data2.map(operations=decode_op, input_columns=["image"]) # Since seed is set up on constructor, so the two ops output deterministic sequence. # Assume the generated random sequence "a" = [1, 2, 3, 4, 5, 6] <- pretend these are random random_crop_op2 = c_vision.RandomCrop([512, 512], [200, 200, 200, 200]) data2 = data2.map(operations=random_crop_op2, input_columns=["image"]) try: dataset_equal(data1, data2, 0) except Exception as e: # two datasets both use numbers from the generated sequence "a" logger.info("Got an exception in DE: {}".format(str(e))) assert "Array" in str(e) # Restore original configuration values ds.config.set_num_parallel_workers(num_parallel_workers_original) ds.config.set_seed(seed_original)
def create_dataset(data_home, do_train, batch_size=32, repeat_num=1): """ create a train or evaluate cifar10 dataset for resnet50 Args: dataset_path(string): the path of dataset. do_train(bool): whether dataset is used for train or eval. repeat_num(int): the repeat times of dataset. Default: 1 batch_size(int): the batch size of dataset. Default: 32 target(str): the device target. Default: Ascend Returns: dataset """ # define dataset cifar_ds = ds.Cifar10Dataset(data_home) resize_height = 224 resize_width = 224 rescale = 1.0 / 255.0 shift = 0.0 # define map operations random_crop_op = C.RandomCrop( (32, 32), (4, 4, 4, 4)) # padding_mode default CONSTANT random_horizontal_op = C.RandomHorizontalFlip() resize_op = C.Resize( (resize_height, resize_width)) # interpolation default BILINEAR rescale_op = C.Rescale(rescale, shift) normalize_op = C.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) changeswap_op = C.HWC2CHW() type_cast_op = C2.TypeCast(mstype.int32) c_trans = [] if do_train: c_trans = [random_crop_op, random_horizontal_op] c_trans += [resize_op, rescale_op, normalize_op, changeswap_op] # apply map operations on images cifar_ds = cifar_ds.map(operations=type_cast_op, input_columns="label") cifar_ds = cifar_ds.map(operations=c_trans, input_columns="image") # apply DatasetOps # buffer_size = 10000 # apply shuffle operations cifar_ds = cifar_ds.shuffle(buffer_size=10) # apply batch operations #cifar_ds = cifar_ds.batch(batch_size=args_opt.batch_size, drop_remainder=True) #fix this cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True) #fix this # apply repeat operations cifar_ds = cifar_ds.repeat(repeat_num) return cifar_ds
def create_dataset_cifar10(data_home, repeat_num=1, training=True, cifar_cfg=None): """Data operations.""" data_dir = os.path.join(data_home, "cifar-10-batches-bin") if not training: data_dir = os.path.join(data_home, "cifar-10-verify-bin") rank_size, rank_id = _get_rank_info() if training: data_set = ds.Cifar10Dataset(data_dir, num_shards=rank_size, shard_id=rank_id, shuffle=True) else: data_set = ds.Cifar10Dataset(data_dir, num_shards=rank_size, shard_id=rank_id, shuffle=False) resize_height = cifar_cfg.image_height resize_width = cifar_cfg.image_width # define map operations random_crop_op = vision.RandomCrop( (32, 32), (4, 4, 4, 4)) # padding_mode default CONSTANT random_horizontal_op = vision.RandomHorizontalFlip() resize_op = vision.Resize( (resize_height, resize_width)) # interpolation default BILINEAR rescale_op = vision.Rescale(1.0 / 255.0, 0.0) #normalize_op = vision.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) normalize_op = vision.Normalize((0.4914, 0.4822, 0.4465), (0.24703233, 0.24348505, 0.26158768)) changeswap_op = vision.HWC2CHW() type_cast_op = c_transforms.TypeCast(mstype.int32) c_trans = [] if training: c_trans = [random_crop_op, random_horizontal_op] c_trans += [resize_op, rescale_op, normalize_op, changeswap_op] # apply map operations on images data_set = data_set.map(operations=type_cast_op, input_columns="label") data_set = data_set.map(operations=c_trans, input_columns="image") # apply batch operations data_set = data_set.batch(batch_size=cifar_cfg.batch_size, drop_remainder=True) # apply repeat operations data_set = data_set.repeat(repeat_num) return data_set
def create_dataset(repeat_num=1, training=True, batch_size=32, rank_id=0, rank_size=1, enable_hccl=False): data_dir = data_home + "/cifar-10-batches-bin" if not training: data_dir = data_home + "/cifar-10-verify-bin" data_set = ds.Cifar10Dataset(data_dir) if enable_hccl: rank_id = rank_id rank_size = rank_size data_set = ds.Cifar10Dataset(data_dir, num_shards=rank_size, shard_id=rank_id) resize_height = 224 resize_width = 224 rescale = 1.0 / 255.0 shift = 0.0 # define map operations random_crop_op = vision.RandomCrop( (32, 32), (4, 4, 4, 4)) # padding_mode default CONSTANT random_horizontal_op = vision.RandomHorizontalFlip() # interpolation default BILINEAR resize_op = vision.Resize((resize_height, resize_width)) rescale_op = vision.Rescale(rescale, shift) normalize_op = vision.Normalize((0.4465, 0.4822, 0.4914), (0.2010, 0.1994, 0.2023)) changeswap_op = vision.HWC2CHW() type_cast_op = C.TypeCast(mstype.int32) c_trans = [] if training: c_trans = [random_crop_op, random_horizontal_op] c_trans += [resize_op, rescale_op, normalize_op, changeswap_op] # apply map operations on images data_set = data_set.map(operations=type_cast_op, input_columns="label") data_set = data_set.map(operations=c_trans, input_columns="image") # apply shuffle operations data_set = data_set.shuffle(buffer_size=1000) # apply batch operations data_set = data_set.batch(batch_size=batch_size, drop_remainder=True) # apply repeat operations data_set = data_set.repeat(repeat_num) return data_set
def create_dataset_cifar10(dataset_path, do_train, cfg, repeat_num=1): """ create a train or eval dataset Args: dataset_path(string): the path of dataset. do_train(bool): whether dataset is used for train or eval. cfg (dict): the config for creating dataset. repeat_num(int): the repeat times of dataset. Default: 1. Returns: dataset """ dataset_path = os.path.join( dataset_path, "cifar-10-batches-bin" if do_train else "cifar-10-verify-bin") if cfg.group_size == 1: data_set = ds.Cifar10Dataset(dataset_path, num_parallel_workers=cfg.work_nums, shuffle=True) else: data_set = ds.Cifar10Dataset(dataset_path, num_parallel_workers=cfg.work_nums, shuffle=True, num_shards=cfg.group_size, shard_id=cfg.rank) # define map operations trans = [] if do_train: trans.append(C.RandomCrop((32, 32), (4, 4, 4, 4))) trans.append(C.RandomHorizontalFlip(prob=0.5)) trans.append(C.Resize((299, 299))) trans.append(C.Rescale(1.0 / 255.0, 0.0)) trans.append( C.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])) trans.append(C.HWC2CHW()) type_cast_op = C2.TypeCast(mstype.int32) data_set = data_set.map(operations=trans, input_columns="image", num_parallel_workers=cfg.work_nums) data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=cfg.work_nums) # apply batch operations data_set = data_set.batch(cfg.batch_size, drop_remainder=do_train) # apply dataset repeat operation data_set = data_set.repeat(repeat_num) return data_set
def create_dataset1(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"): """ create a train or evaluate cifar10 dataset for resnet50 Args: dataset_path(string): the path of dataset. do_train(bool): whether dataset is used for train or eval. repeat_num(int): the repeat times of dataset. Default: 1 batch_size(int): the batch size of dataset. Default: 32 target(str): the device target. Default: Ascend Returns: dataset """ if target == "Ascend": device_num, rank_id = _get_rank_info() else: init() rank_id = get_rank() device_num = get_group_size() if device_num == 1: ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=True) else: ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=True, num_shards=device_num, shard_id=rank_id) # define map operations trans = [] if do_train: trans += [ C.RandomCrop((32, 32), (4, 4, 4, 4)), C.RandomHorizontalFlip(prob=0.5) ] trans += [ C.Resize((224, 224)), C.Rescale(1.0 / 255.0, 0.0), C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]), C.HWC2CHW() ] type_cast_op = C2.TypeCast(mstype.int32) ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8) ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8) # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) # apply dataset repeat operation ds = ds.repeat(repeat_num) return ds
def vgg_create_dataset100(data_home, image_size, batch_size, rank_id=0, rank_size=1, repeat_num=1, training=True, num_samples=None, shuffle=True): """Data operations.""" ds.config.set_seed(1) data_dir = os.path.join(data_home, "train") if not training: data_dir = os.path.join(data_home, "test") if num_samples is not None: data_set = ds.Cifar100Dataset(data_dir, num_shards=rank_size, shard_id=rank_id, num_samples=num_samples, shuffle=shuffle) else: data_set = ds.Cifar100Dataset(data_dir, num_shards=rank_size, shard_id=rank_id) input_columns = ["fine_label"] output_columns = ["label"] data_set = data_set.rename(input_columns=input_columns, output_columns=output_columns) data_set = data_set.project(["image", "label"]) rescale = 1.0 / 255.0 shift = 0.0 # define map operations random_crop_op = CV.RandomCrop((32, 32), (4, 4, 4, 4)) # padding_mode default CONSTANT random_horizontal_op = CV.RandomHorizontalFlip() resize_op = CV.Resize(image_size) # interpolation default BILINEAR rescale_op = CV.Rescale(rescale, shift) normalize_op = CV.Normalize((0.4465, 0.4822, 0.4914), (0.2010, 0.1994, 0.2023)) changeswap_op = CV.HWC2CHW() type_cast_op = C.TypeCast(mstype.int32) c_trans = [] if training: c_trans = [random_crop_op, random_horizontal_op] c_trans += [resize_op, rescale_op, normalize_op, changeswap_op] # apply map operations on images data_set = data_set.map(input_columns="label", operations=type_cast_op) data_set = data_set.map(input_columns="image", operations=c_trans) # apply shuffle operations data_set = data_set.shuffle(buffer_size=1000) # apply batch operations data_set = data_set.batch(batch_size=batch_size, drop_remainder=True) # apply repeat operations data_set = data_set.repeat(repeat_num) return data_set
def vgg_create_dataset(data_home, image_size, batch_size, rank_id=0, rank_size=1, repeat_num=1, training=True): """Data operations.""" de.config.set_seed(1) data_dir = os.path.join(data_home, "cifar-10-batches-bin") if not training: data_dir = os.path.join(data_home, "cifar-10-verify-bin") data_set = de.Cifar10Dataset(data_dir, num_shards=rank_size, shard_id=rank_id) rescale = 1.0 / 255.0 shift = 0.0 # define map operations random_crop_op = vision.RandomCrop( (32, 32), (4, 4, 4, 4)) # padding_mode default CONSTANT random_horizontal_op = vision.RandomHorizontalFlip() resize_op = vision.Resize(image_size) # interpolation default BILINEAR rescale_op = vision.Rescale(rescale, shift) normalize_op = vision.Normalize((0.4465, 0.4822, 0.4914), (0.2010, 0.1994, 0.2023)) changeswap_op = vision.HWC2CHW() type_cast_op = C.TypeCast(mstype.int32) c_trans = [] if training: c_trans = [random_crop_op, random_horizontal_op] c_trans += [resize_op, rescale_op, normalize_op, changeswap_op] # apply map operations on images data_set = data_set.map(operations=type_cast_op, input_columns="label") data_set = data_set.map(operations=c_trans, input_columns="image") # apply repeat operations data_set = data_set.repeat(repeat_num) # apply shuffle operations data_set = data_set.shuffle(buffer_size=10) # apply batch operations data_set = data_set.batch(batch_size=batch_size, drop_remainder=True) return data_set
def create_dataset(repeat_num=1, training=True): """ create data for next use such as training or infering """ cifar_ds = ds.Cifar10Dataset(data_home) if args_opt.run_distribute: rank_id = int(os.getenv('RANK_ID')) rank_size = int(os.getenv('RANK_SIZE')) cifar_ds = ds.Cifar10Dataset(data_home, num_shards=rank_size, shard_id=rank_id) resize_height = 224 resize_width = 224 rescale = 1.0 / 255.0 shift = 0.0 # define map operations random_crop_op = C.RandomCrop( (32, 32), (4, 4, 4, 4)) # padding_mode default CONSTANT random_horizontal_op = C.RandomHorizontalFlip() resize_op = C.Resize( (resize_height, resize_width)) # interpolation default BILINEAR rescale_op = C.Rescale(rescale, shift) normalize_op = C.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) changeswap_op = C.HWC2CHW() type_cast_op = C2.TypeCast(mstype.int32) c_trans = [] if training: c_trans = [random_crop_op, random_horizontal_op] c_trans += [resize_op, rescale_op, normalize_op, changeswap_op] # apply map operations on images cifar_ds = cifar_ds.map(operations=type_cast_op, input_columns="label") cifar_ds = cifar_ds.map(operations=c_trans, input_columns="image") # apply shuffle operations cifar_ds = cifar_ds.shuffle(buffer_size=10) # apply batch operations cifar_ds = cifar_ds.batch(batch_size=args_opt.batch_size, drop_remainder=True) # apply repeat operations cifar_ds = cifar_ds.repeat(repeat_num) return cifar_ds
def test_bounding_box_augment_with_crop_op(plot_vis=False): """ Test BoundingBoxAugment op (passing crop op as transform) Prints images side by side with and without Aug applied + bboxes to compare and test """ logger.info("test_bounding_box_augment_with_crop_op") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False, decode=True) dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False, decode=True) # Ratio is set to 0.9 to apply RandomCrop of size (50, 50) on 90% of the bounding boxes. test_op = c_vision.BoundingBoxAugment(c_vision.RandomCrop(50), 0.9) # map to apply ops dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], column_order=["image", "bbox"]) filename = "bounding_box_augment_crop_c_result.npz" save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN) unaugSamp, augSamp = [], [] for unAug, Aug in zip( dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True), dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)): unaugSamp.append(unAug) augSamp.append(Aug) if plot_vis: visualize_with_bounding_boxes(unaugSamp, augSamp) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_deterministic_run_fail(): """ Test RandomCrop with seed, expected to fail """ logger.info("test_deterministic_run_fail") # Save original configuration values num_parallel_workers_original = ds.config.get_num_parallel_workers() seed_original = ds.config.get_seed() # when we set the seed all operations within our dataset should be deterministic ds.config.set_seed(0) ds.config.set_num_parallel_workers(1) # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # Assuming we get the same seed on calling constructor, if this op is re-used then result won't be # the same in between the two datasets. For example, RandomCrop constructor takes seed (0) # outputs a deterministic series of numbers, e,g "a" = [1, 2, 3, 4, 5, 6] <- pretend these are random random_crop_op = c_vision.RandomCrop([512, 512], [200, 200, 200, 200]) decode_op = c_vision.Decode() data1 = data1.map(operations=decode_op, input_columns=["image"]) data1 = data1.map(operations=random_crop_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = data2.map(operations=decode_op, input_columns=["image"]) # If seed is set up on constructor data2 = data2.map(operations=random_crop_op, input_columns=["image"]) try: dataset_equal(data1, data2, 0) except Exception as e: # two datasets split the number out of the sequence a logger.info("Got an exception in DE: {}".format(str(e))) assert "Array" in str(e) # Restore original configuration values ds.config.set_num_parallel_workers(num_parallel_workers_original) ds.config.set_seed(seed_original)
def create_dataset(data_path, repeat_num=1, batch_size=32, rank_id=0, rank_size=1): """create dataset""" resize_height = 224 resize_width = 224 rescale = 1.0 / 255.0 shift = 0.0 # get rank_id and rank_size rank_id = get_rank() rank_size = get_group_size() data_set = ds.Cifar10Dataset(data_path, num_shards=rank_size, shard_id=rank_id) # define map operations random_crop_op = vision.RandomCrop((32, 32), (4, 4, 4, 4)) random_horizontal_op = vision.RandomHorizontalFlip() resize_op = vision.Resize((resize_height, resize_width)) rescale_op = vision.Rescale(rescale, shift) normalize_op = vision.Normalize((0.4465, 0.4822, 0.4914), (0.2010, 0.1994, 0.2023)) changeswap_op = vision.HWC2CHW() type_cast_op = C.TypeCast(mstype.int32) c_trans = [random_crop_op, random_horizontal_op] c_trans += [resize_op, rescale_op, normalize_op, changeswap_op] # apply map operations on images data_set = data_set.map(operations=type_cast_op, input_columns="label") data_set = data_set.map(operations=c_trans, input_columns="image") # apply shuffle operations data_set = data_set.shuffle(buffer_size=10) # apply batch operations data_set = data_set.batch(batch_size=batch_size, drop_remainder=True) # apply repeat operations data_set = data_set.repeat(repeat_num) return data_set
def test_random_crop_06_c(): """ Test RandomCrop op with c_transforms: invalid size, expected to raise TypeError """ logger.info("test_random_crop_06_c") # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) try: # Note: if size is neither an int nor a list of length 2, an exception will raise random_crop_op = c_vision.RandomCrop([512, 512, 375]) decode_op = c_vision.Decode() data = data.map(operations=decode_op, input_columns=["image"]) data = data.map(operations=random_crop_op, input_columns=["image"]) except TypeError as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "Size should be a single integer" in str(e)