def test_mixup_batch_fail3(): """ Test MixUpBatch op We expect this to fail because label column is not passed to mixup_batch """ logger.info("test_mixup_batch_fail3") # Original Images ds_original = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) ds_original = ds_original.batch(5, drop_remainder=True) images_original = None for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = image else: images_original = np.append(images_original, image, axis=0) # MixUp Images data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) one_hot_op = data_trans.OneHot(num_classes=10) data1 = data1.map(input_columns=["label"], operations=one_hot_op) mixup_batch_op = vision.MixUpBatch() data1 = data1.batch(5, drop_remainder=True) data1 = data1.map(input_columns=["image"], operations=mixup_batch_op) with pytest.raises(RuntimeError) as error: images_mixup = np.array([]) for idx, (image, _) in enumerate(data1): if idx == 0: images_mixup = image else: images_mixup = np.append(images_mixup, image, axis=0) error_message = "Both images and labels columns are required" assert error_message in str(error.value)
def loader(self): """Dataloader arrtribute which is a unified interface to generate the data. :return: a batch data :rtype: dict, list, optional """ ms_dataset = GeneratorDataset(self.dataset, ["image", "label"], sampler=self.sampler) # ms_dataset.set_dataset_size(len(self.dataset)) # TODO delete, only mindspore 0.5 need ms_dataset = self.convert_dtype(ms_dataset) if self.args.shuffle: buffer_size = self.args.get("buffer_size", len(self.dataset)) ms_dataset = ms_dataset.shuffle(buffer_size=buffer_size) if self.args.get("mixup", False): num_class = self.args.get("num_class") one_hot_op = C2.OneHot(num_classes=num_class) ms_dataset = ms_dataset.map(operations=one_hot_op, input_columns=["label"]) mixup_batch_op = vision.MixUpBatch(2) ms_dataset = ms_dataset.batch(self.args.batch_size) ms_dataset = ms_dataset.map(operations=mixup_batch_op, input_columns=["image", "label"]) else: ms_dataset = ms_dataset.batch(self.args.batch_size) from mindspore.dataset.engine.datasets import BatchDataset, MapDataset BatchDataset.__len__ = BatchDataset.get_dataset_size MapDataset.__len__ = MapDataset.get_dataset_size return ms_dataset
def test_random_choice(): """ Test RandomChoice op """ ds.config.set_seed(0) def test_config(arr, op_list): try: data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False) data = data.map(operations=ops.RandomChoice(op_list), input_columns=["col"]) res = [] for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): res.append(i["col"].tolist()) return res except (TypeError, ValueError) as e: return str(e) # Test whether an operation would be randomly chosen. # In order to prevent random failure, both results need to be checked. res1 = test_config([[0, 1, 2]], [ops.PadEnd([4], 0), ops.Slice([0, 2])]) assert res1 in [[[0, 1, 2, 0]], [[0, 2]]] # Test nested structure res2 = test_config([[0, 1, 2]], [ ops.Compose([ops.Duplicate(), ops.Concatenate()]), ops.Compose([ops.Slice([0, 1]), ops.OneHot(2)]) ]) assert res2 in [[[[1, 0], [0, 1]]], [[0, 1, 2, 0, 1, 2]]] # Test RandomChoice where there is only 1 operation assert test_config([[4, 3], [2, 1]], [ops.Slice([0])]) == [[4], [2]]
def test_mnist_dataset(remove_json_files=True): data_dir = "../data/dataset/testMnistData" ds.config.set_seed(1) data1 = ds.MnistDataset(data_dir, 100) one_hot_encode = c.OneHot(10) # num_classes is input argument data1 = data1.map(input_columns="label", operations=one_hot_encode) # batch_size is input argument data1 = data1.batch(batch_size=10, drop_remainder=True) ds.serialize(data1, "mnist_dataset_pipeline.json") assert validate_jsonfile("mnist_dataset_pipeline.json") is True data2 = ds.deserialize(json_filepath="mnist_dataset_pipeline.json") ds.serialize(data2, "mnist_dataset_pipeline_1.json") assert validate_jsonfile("mnist_dataset_pipeline_1.json") is True assert filecmp.cmp('mnist_dataset_pipeline.json', 'mnist_dataset_pipeline_1.json') data3 = ds.deserialize(json_filepath="mnist_dataset_pipeline_1.json") num = 0 for data1, data2, data3 in zip(data1.create_dict_iterator(), data2.create_dict_iterator(), data3.create_dict_iterator()): assert np.array_equal(data1['image'], data2['image']) assert np.array_equal(data1['image'], data3['image']) assert np.array_equal(data1['label'], data2['label']) assert np.array_equal(data1['label'], data3['label']) num += 1 logger.info("mnist total num samples is {}".format(str(num))) assert num == 10 if remove_json_files: delete_json_files()
def test_mixup_batch_fail4(): """ Test MixUpBatch Fail 2 We expect this to fail because alpha is zero """ logger.info("test_mixup_batch_fail4") # Original Images ds_original = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) ds_original = ds_original.batch(5) images_original = np.array([]) for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = image else: images_original = np.append(images_original, image, axis=0) # MixUp Images data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) one_hot_op = data_trans.OneHot(num_classes=10) data1 = data1.map(input_columns=["label"], operations=one_hot_op) with pytest.raises(ValueError) as error: vision.MixUpBatch(0.0) error_message = "Input is not within the required interval" assert error_message in str(error.value)
def test_cutmix_batch_nchw_md5(): """ Test CutMixBatch on a batch of CHW images with MD5: """ logger.info("test_cutmix_batch_nchw_md5") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # CutMixBatch Images data = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) hwc2chw_op = vision.HWC2CHW() data = data.map(input_columns=["image"], operations=hwc2chw_op) one_hot_op = data_trans.OneHot(num_classes=10) data = data.map(input_columns=["label"], operations=one_hot_op) cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NCHW) data = data.batch(5, drop_remainder=True) data = data.map(input_columns=["image", "label"], operations=cutmix_batch_op) filename = "cutmix_batch_c_nchw_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_one_hot(): """ Test OneHot Tensor Operator """ logger.info("test_one_hot") depth = 10 # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) one_hot_op = data_trans.OneHot(num_classes=depth) data1 = data1.map(input_columns=["label"], operations=one_hot_op, columns_order=["label"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["label"], shuffle=False) num_iter = 0 for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): assert len(item1) == len(item2) label1 = item1["label"] label2 = one_hot(item2["label"][0], depth) mse = diff_mse(label1, label2) logger.info("DE one_hot: {}, Numpy one_hot: {}, diff: {}".format( label1, label2, mse)) assert mse == 0 num_iter += 1 assert num_iter == 3
def test_cutmix_batch_fail5(): """ Test CutMixBatch op We expect this to fail because label column is not passed to cutmix_batch """ logger.info("test_cutmix_batch_fail5") # CutMixBatch Images data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) one_hot_op = data_trans.OneHot(num_classes=10) data1 = data1.map(operations=one_hot_op, input_columns=["label"]) cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC) data1 = data1.batch(5, drop_remainder=True) data1 = data1.map(operations=cutmix_batch_op, input_columns=["image"]) with pytest.raises(RuntimeError) as error: images_cutmix = np.array([]) for idx, (image, _) in enumerate(data1): if idx == 0: images_cutmix = image.asnumpy() else: images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0) error_message = "both image and label columns are required" assert error_message in str(error.value)
def test_cutmix_batch_fail6(): """ Test CutMixBatch op We expect this to fail because image_batch_format passed to CutMixBatch doesn't match the format of the images """ logger.info("test_cutmix_batch_fail6") # CutMixBatch Images data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) one_hot_op = data_trans.OneHot(num_classes=10) data1 = data1.map(input_columns=["label"], operations=one_hot_op) cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NCHW) data1 = data1.batch(5, drop_remainder=True) data1 = data1.map(input_columns=["image", "label"], operations=cutmix_batch_op) with pytest.raises(RuntimeError) as error: images_cutmix = np.array([]) for idx, (image, _) in enumerate(data1): if idx == 0: images_cutmix = image else: images_cutmix = np.append(images_cutmix, image, axis=0) error_message = "CutMixBatch: Image doesn't match the given image format." assert error_message in str(error.value)
def test_manifest_sampler_chain_batch_repeat(): """ Test ManifestDataset sampler chain DistributedSampler->SequentialSampler, with batch then repeat """ logger.info("test_manifest_sampler_chain_batch_repeat") manifest_file = "../data/dataset/testManifestData/test5trainimgs.json" # Create sampler chain DistributedSampler->SequentialSampler sampler = ds.DistributedSampler(num_shards=1, shard_id=0, shuffle=False, num_samples=5) child_sampler = ds.SequentialSampler() sampler.add_child(child_sampler) # Create ManifestDataset with sampler chain data1 = ds.ManifestDataset(manifest_file, decode=True, sampler=sampler) one_hot_encode = c_transforms.OneHot(3) data1 = data1.map(operations=one_hot_encode, input_columns=["label"]) data1 = data1.batch(batch_size=5, drop_remainder=False) data1 = data1.repeat(count=2) # Verify dataset size data1_size = data1.get_dataset_size() logger.info("dataset size is: {}".format(data1_size)) assert data1_size == 2
def test_cutmix_batch_fail1(): """ Test CutMixBatch Fail 1 We expect this to fail because the images and labels are not batched """ logger.info("test_cutmix_batch_fail1") # CutMixBatch Images data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) one_hot_op = data_trans.OneHot(num_classes=10) data1 = data1.map(operations=one_hot_op, input_columns=["label"]) cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC) with pytest.raises(RuntimeError) as error: data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"]) for idx, (image, _) in enumerate(data1): if idx == 0: images_cutmix = image.asnumpy() else: images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0) error_message = "You must make sure images are HWC or CHW and batch " assert error_message in str(error.value)
def test_mixup_batch_fail1(): """ Test MixUpBatch Fail 1 We expect this to fail because the images and labels are not batched """ logger.info("test_mixup_batch_fail1") # Original Images ds_original = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) ds_original = ds_original.batch(5) images_original = np.array([]) for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = image else: images_original = np.append(images_original, image, axis=0) # MixUp Images data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) one_hot_op = data_trans.OneHot(num_classes=10) data1 = data1.map(input_columns=["label"], operations=one_hot_op) mixup_batch_op = vision.MixUpBatch(0.1) with pytest.raises(RuntimeError) as error: data1 = data1.map(input_columns=["image", "label"], operations=mixup_batch_op) for idx, (image, _) in enumerate(data1): if idx == 0: images_mixup = image else: images_mixup = np.append(images_mixup, image, axis=0) error_message = "You must make sure images are HWC or CHW and batch" assert error_message in str(error.value)
def test_mix_up_multi(): """ Test multi batch mix up op """ logger.info("Test several batch mix up op") resize_height = 224 resize_width = 224 # Create dataset and define map operations ds1 = ds.ImageFolderDatasetV2(DATA_DIR_2) num_classes = 3 decode_op = c_vision.Decode() resize_op = c_vision.Resize((resize_height, resize_width), c_vision.Inter.LINEAR) one_hot_encode = c.OneHot(num_classes) # num_classes is input argument ds1 = ds1.map(input_columns=["image"], operations=decode_op) ds1 = ds1.map(input_columns=["image"], operations=resize_op) ds1 = ds1.map(input_columns=["label"], operations=one_hot_encode) # apply batch operations batch_size = 3 ds1 = ds1.batch(batch_size, drop_remainder=True) ds2 = ds1 alpha = 0.2 transforms = [ py_vision.MixUp(batch_size=batch_size, alpha=alpha, is_single=False) ] ds1 = ds1.map(input_columns=["image", "label"], operations=transforms) num_iter = 0 batch1_image1 = 0 for data1, data2 in zip(ds1.create_dict_iterator(), ds2.create_dict_iterator()): image1 = data1["image"] label1 = data1["label"] logger.info("label: {}".format(label1)) image2 = data2["image"] label2 = data2["label"] logger.info("label2: {}".format(label2)) if num_iter == 0: batch1_image1 = image1 if num_iter == 1: lam = np.abs(label2 - label1) logger.info("lam value in multi: {}".format(lam)) for index in range(batch_size): if np.square(lam[index]).mean() != 0: lam_value = 1 - np.sum(lam[index]) / 2 img_golden = lam_value * image2[index] + ( 1 - lam_value) * batch1_image1[index] assert image1[index].all() == img_golden.all() logger.info("====test several batch mixup ok====") break num_iter = num_iter + 1
def test_cutmix_batch_success4(plot=False): """ Test CutMixBatch on a dataset where OneHot returns a 2D vector """ logger.info("test_cutmix_batch_success4") ds_original = ds.CelebADataset(DATA_DIR3, shuffle=False) decode_op = vision.Decode() ds_original = ds_original.map(operations=[decode_op], input_columns=["image"]) resize_op = vision.Resize([224, 224]) ds_original = ds_original.map(operations=[resize_op], input_columns=["image"]) ds_original = ds_original.batch(2, drop_remainder=True) images_original = None for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = image.asnumpy() else: images_original = np.append(images_original, image.asnumpy(), axis=0) # CutMix Images data1 = ds.CelebADataset(dataset_dir=DATA_DIR3, shuffle=False) decode_op = vision.Decode() data1 = data1.map(operations=[decode_op], input_columns=["image"]) resize_op = vision.Resize([224, 224]) data1 = data1.map(operations=[resize_op], input_columns=["image"]) one_hot_op = data_trans.OneHot(num_classes=100) data1 = data1.map(operations=one_hot_op, input_columns=["attr"]) cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 0.5, 0.9) data1 = data1.batch(2, drop_remainder=True) data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "attr"]) images_cutmix = None for idx, (image, _) in enumerate(data1): if idx == 0: images_cutmix = image.asnumpy() else: images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0) if plot: visualize_list(images_original, images_cutmix) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_cutmix[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse))))
def test_cutmix_batch_success3(plot=False): """ Test CutMixBatch op with default values for alpha and prob on a batch of HWC images on ImageFolderDataset """ logger.info("test_cutmix_batch_success3") ds_original = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False) decode_op = vision.Decode() ds_original = ds_original.map(operations=[decode_op], input_columns=["image"]) resize_op = vision.Resize([224, 224]) ds_original = ds_original.map(operations=[resize_op], input_columns=["image"]) ds_original = ds_original.batch(4, pad_info={}, drop_remainder=True) images_original = None for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = image.asnumpy() else: images_original = np.append(images_original, image.asnumpy(), axis=0) # CutMix Images data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False) decode_op = vision.Decode() data1 = data1.map(operations=[decode_op], input_columns=["image"]) resize_op = vision.Resize([224, 224]) data1 = data1.map(operations=[resize_op], input_columns=["image"]) one_hot_op = data_trans.OneHot(num_classes=10) data1 = data1.map(operations=one_hot_op, input_columns=["label"]) cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC) data1 = data1.batch(4, pad_info={}, drop_remainder=True) data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"]) images_cutmix = None for idx, (image, _) in enumerate(data1): if idx == 0: images_cutmix = image.asnumpy() else: images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0) if plot: visualize_list(images_original, images_cutmix) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_cutmix[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse))))
def test_manifest_dataset_multi_label_onehot(): data = ds.ManifestDataset(DATA_FILE, decode=True, shuffle=False) expect_label = [[[0, 1, 0], [1, 0, 0]], [[1, 0, 0], [1, 0, 1]]] one_hot_encode = data_trans.OneHot(3) data = data.map(operations=one_hot_encode, input_columns=["label"]) data = data.map(operations=multi_label_hot, input_columns=["label"]) data = data.batch(2) count = 0 for item in data.create_dict_iterator(num_epochs=1, output_numpy=True): assert item["label"].tolist() == expect_label[count] logger.info("item[image] is {}".format(item["image"])) count = count + 1
def generate_mnist_dataset(data_path, batch_size=32, repeat_size=1, samples=None, num_parallel_workers=1, sparse=True): """ create dataset for training or testing """ # define dataset ds1 = ds.MnistDataset(data_path, num_samples=samples) # define operation parameters resize_height, resize_width = 32, 32 rescale = 1.0 / 255.0 shift = 0.0 # define map operations resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR) rescale_op = CV.Rescale(rescale, shift) hwc2chw_op = CV.HWC2CHW() type_cast_op = C.TypeCast(mstype.int32) # apply map operations on images if not sparse: one_hot_enco = C.OneHot(10) ds1 = ds1.map(input_columns="label", operations=one_hot_enco, num_parallel_workers=num_parallel_workers) type_cast_op = C.TypeCast(mstype.float32) ds1 = ds1.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers) ds1 = ds1.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers) ds1 = ds1.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers) ds1 = ds1.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers) # apply DatasetOps buffer_size = 10000 ds1 = ds1.shuffle(buffer_size=buffer_size) ds1 = ds1.batch(batch_size, drop_remainder=True) ds1 = ds1.repeat(repeat_size) return ds1
def test_mixup_batch_success4(plot=False): """ Test MixUpBatch op on a dataset where OneHot returns a 2D vector. Alpha parameter will be selected by default in this case """ logger.info("test_mixup_batch_success4") # Original Images ds_original = ds.CelebADataset(DATA_DIR3, shuffle=False) decode_op = vision.Decode() ds_original = ds_original.map(operations=[decode_op], input_columns=["image"]) ds_original = ds_original.batch(2, drop_remainder=True) images_original = None for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = image.asnumpy() else: images_original = np.append(images_original, image.asnumpy(), axis=0) # MixUp Images data1 = ds.CelebADataset(DATA_DIR3, shuffle=False) decode_op = vision.Decode() data1 = data1.map(operations=[decode_op], input_columns=["image"]) one_hot_op = data_trans.OneHot(num_classes=100) data1 = data1.map(operations=one_hot_op, input_columns=["attr"]) mixup_batch_op = vision.MixUpBatch() data1 = data1.batch(2, drop_remainder=True) data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "attr"]) images_mixup = np.array([]) for idx, (image, _) in enumerate(data1): if idx == 0: images_mixup = image.asnumpy() else: images_mixup = np.append(images_mixup, image.asnumpy(), axis=0) if plot: visualize_list(images_original, images_mixup) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_mixup[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse))))
def test_mixup_batch_success2(plot=False): """ Test MixUpBatch op with specified alpha parameter on ImageFolderDataset """ logger.info("test_mixup_batch_success2") # Original Images ds_original = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False) decode_op = vision.Decode() ds_original = ds_original.map(operations=[decode_op], input_columns=["image"]) ds_original = ds_original.batch(4, pad_info={}, drop_remainder=True) images_original = None for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = image.asnumpy() else: images_original = np.append(images_original, image.asnumpy(), axis=0) # MixUp Images data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False) decode_op = vision.Decode() data1 = data1.map(operations=[decode_op], input_columns=["image"]) one_hot_op = data_trans.OneHot(num_classes=10) data1 = data1.map(operations=one_hot_op, input_columns=["label"]) mixup_batch_op = vision.MixUpBatch(2.0) data1 = data1.batch(4, pad_info={}, drop_remainder=True) data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"]) images_mixup = None for idx, (image, _) in enumerate(data1): if idx == 0: images_mixup = image.asnumpy() else: images_mixup = np.append(images_mixup, image.asnumpy(), axis=0) if plot: visualize_list(images_original, images_mixup) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_mixup[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse))))
def test_mix_up_single(): """ Test single batch mix up op """ logger.info("Test single batch mix up op") resize_height = 224 resize_width = 224 # Create dataset and define map operations ds1 = ds.ImageFolderDataset(DATA_DIR_2) num_classes = 10 decode_op = c_vision.Decode() resize_op = c_vision.Resize((resize_height, resize_width), c_vision.Inter.LINEAR) one_hot_encode = c.OneHot(num_classes) # num_classes is input argument ds1 = ds1.map(operations=decode_op, input_columns=["image"]) ds1 = ds1.map(operations=resize_op, input_columns=["image"]) ds1 = ds1.map(operations=one_hot_encode, input_columns=["label"]) # apply batch operations batch_size = 3 ds1 = ds1.batch(batch_size, drop_remainder=True) ds2 = ds1 alpha = 0.2 transforms = [py_vision.MixUp(batch_size=batch_size, alpha=alpha, is_single=True) ] ds1 = ds1.map(operations=transforms, input_columns=["image", "label"]) for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1, output_numpy=True), ds2.create_dict_iterator(num_epochs=1, output_numpy=True)): image1 = data1["image"] label = data1["label"] logger.info("label is {}".format(label)) image2 = data2["image"] label2 = data2["label"] logger.info("label2 is {}".format(label2)) lam = np.abs(label - label2) for index in range(batch_size - 1): if np.square(lam[index]).mean() != 0: lam_value = 1 - np.sum(lam[index]) / 2 img_golden = lam_value * image2[index] + (1 - lam_value) * image2[index + 1] assert image1[index].all() == img_golden.all() logger.info("====test single batch mixup ok====")
def test_cutmix_batch_fail4(): """ Test CutMixBatch Fail 2 We expect this to fail because prob is negative """ logger.info("test_cutmix_batch_fail4") # CutMixBatch Images data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) one_hot_op = data_trans.OneHot(num_classes=10) data1 = data1.map(input_columns=["label"], operations=one_hot_op) with pytest.raises(ValueError) as error: vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 1, -1) error_message = "Input is not within the required interval" assert error_message in str(error.value)
def test_case_3(): """ Test Map """ logger.info("Test Map Rescale and Resize, then Shuffle") data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) # define data augmentation parameters rescale = 1.0 / 255.0 shift = 0.0 resize_height, resize_width = 224, 224 # define map operations decode_op = vision.Decode() rescale_op = vision.Rescale(rescale, shift) # resize_op = vision.Resize(resize_height, resize_width, # InterpolationMode.DE_INTER_LINEAR) # Bilinear mode resize_op = vision.Resize((resize_height, resize_width)) # apply map operations on images data1 = data1.map(input_columns=["image"], operations=decode_op) data1 = data1.map(input_columns=["image"], operations=rescale_op) data1 = data1.map(input_columns=["image"], operations=resize_op) # # apply ont-hot encoding on labels num_classes = 4 one_hot_encode = data_trans.OneHot( num_classes) # num_classes is input argument data1 = data1.map(input_columns=["label"], operations=one_hot_encode) # # # apply Datasets buffer_size = 100 seed = 10 batch_size = 2 ds.config.set_seed(seed) data1 = data1.shuffle( buffer_size=buffer_size) # 10000 as in imageNet train script data1 = data1.batch(batch_size, drop_remainder=True) num_iter = 0 for item in data1.create_dict_iterator(): # each data is a dictionary # in this example, each dictionary has keys "image" and "label" logger.info("image is: {}".format(item["image"])) logger.info("label is: {}".format(item["label"])) num_iter += 1 logger.info("Number of data in data1: {}".format(num_iter))
def test_cutmix_batch_success1(plot=False): """ Test CutMixBatch op with specified alpha and prob parameters on a batch of CHW images """ logger.info("test_cutmix_batch_success1") # Original Images ds_original = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) ds_original = ds_original.batch(5, drop_remainder=True) images_original = None for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = image.asnumpy() else: images_original = np.append(images_original, image.asnumpy(), axis=0) # CutMix Images data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) hwc2chw_op = vision.HWC2CHW() data1 = data1.map(operations=hwc2chw_op, input_columns=["image"]) one_hot_op = data_trans.OneHot(num_classes=10) data1 = data1.map(operations=one_hot_op, input_columns=["label"]) cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NCHW, 2.0, 0.5) data1 = data1.batch(5, drop_remainder=True) data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"]) images_cutmix = None for idx, (image, _) in enumerate(data1): if idx == 0: images_cutmix = image.asnumpy().transpose(0, 2, 3, 1) else: images_cutmix = np.append(images_cutmix, image.asnumpy().transpose(0, 2, 3, 1), axis=0) if plot: visualize_list(images_original, images_cutmix) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_cutmix[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse))))
def test_cutmix_batch_success2(plot=False): """ Test CutMixBatch op with default values for alpha and prob on a batch of rescaled HWC images """ logger.info("test_cutmix_batch_success2") # Original Images ds_original = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) ds_original = ds_original.batch(5, drop_remainder=True) images_original = None for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = image.asnumpy() else: images_original = np.append(images_original, image.asnumpy(), axis=0) # CutMix Images data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) one_hot_op = data_trans.OneHot(num_classes=10) data1 = data1.map(operations=one_hot_op, input_columns=["label"]) rescale_op = vision.Rescale((1.0 / 255.0), 0.0) data1 = data1.map(operations=rescale_op, input_columns=["image"]) cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC) data1 = data1.batch(5, drop_remainder=True) data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"]) images_cutmix = None for idx, (image, _) in enumerate(data1): if idx == 0: images_cutmix = image.asnumpy() else: images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0) if plot: visualize_list(images_original, images_cutmix) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_cutmix[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse))))
def test_one_hot_post_aug(): """ Test One Hot Encoding after Multiple Data Augmentation Operators """ logger.info("test_one_hot_post_aug") data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) # Define data augmentation parameters rescale = 1.0 / 255.0 shift = 0.0 resize_height, resize_width = 224, 224 # Define map operations decode_op = c_vision.Decode() rescale_op = c_vision.Rescale(rescale, shift) resize_op = c_vision.Resize((resize_height, resize_width)) # Apply map operations on images data1 = data1.map(input_columns=["image"], operations=decode_op) data1 = data1.map(input_columns=["image"], operations=rescale_op) data1 = data1.map(input_columns=["image"], operations=resize_op) # Apply one-hot encoding on labels depth = 4 one_hot_encode = data_trans.OneHot(depth) data1 = data1.map(input_columns=["label"], operations=one_hot_encode) # Apply datasets ops buffer_size = 100 seed = 10 batch_size = 2 ds.config.set_seed(seed) data1 = data1.shuffle(buffer_size=buffer_size) data1 = data1.batch(batch_size, drop_remainder=True) num_iter = 0 for item in data1.create_dict_iterator(): logger.info("image is: {}".format(item["image"])) logger.info("label is: {}".format(item["label"])) num_iter += 1 assert num_iter == 1
def test_mixup_batch_success2(plot=False): """ Test MixUpBatch op without specified alpha parameter. Alpha parameter will be selected by default in this case """ logger.info("test_mixup_batch_success2") # Original Images ds_original = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) ds_original = ds_original.batch(5, drop_remainder=True) images_original = None for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = image else: images_original = np.append(images_original, image, axis=0) # MixUp Images data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) one_hot_op = data_trans.OneHot(num_classes=10) data1 = data1.map(input_columns=["label"], operations=one_hot_op) mixup_batch_op = vision.MixUpBatch() data1 = data1.batch(5, drop_remainder=True) data1 = data1.map(input_columns=["image", "label"], operations=mixup_batch_op) images_mixup = np.array([]) for idx, (image, _) in enumerate(data1): if idx == 0: images_mixup = image else: images_mixup = np.append(images_mixup, image, axis=0) if plot: visualize_list(images_original, images_mixup) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_mixup[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse))))
def test_one_hot(): """ Test OneHot Tensor Operator """ logger.info("test_one_hot") depth = 10 # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) one_hot_op = data_trans.OneHot(num_classes=depth) data1 = data1.map(operations=one_hot_op, input_columns=["label"], column_order=["label"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["label"], shuffle=False) assert dataset_equal_with_function(data1, data2, 0, one_hot, depth)
def test_mixup_batch_md5(): """ Test MixUpBatch with MD5: """ logger.info("test_mixup_batch_md5") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # MixUp Images data = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) one_hot_op = data_trans.OneHot(num_classes=10) data = data.map(input_columns=["label"], operations=one_hot_op) mixup_batch_op = vision.MixUpBatch() data = data.batch(5, drop_remainder=True) data = data.map(input_columns=["image", "label"], operations=mixup_batch_op) filename = "mixup_batch_c_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)