Esempio n. 1
0
    def __init__(self,
                 data_dir,
                 training=True,
                 use_third_trsfm=False,
                 use_auto_augment=False,
                 num_parallel_workers=8,
                 device_num=1,
                 device_id=0):

        if not training:
            trsfm = Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.4914, 0.4822, 0.4465),
                                     (0.2023, 0.1994, 0.2010)),
            ])
        else:
            if not use_third_trsfm:
                trsfm = Compose([
                    transforms.ToPIL(),
                    transforms.RandomResizedCrop(size=32, scale=(0.2, 1.)),
                    transforms.RandomColorAdjust(0.4, 0.4, 0.4, 0.4),
                    transforms.RandomGrayscale(prob=0.2),
                    transforms.RandomHorizontalFlip(),
                    transforms.ToTensor(),
                    transforms.Normalize((0.4914, 0.4822, 0.4465),
                                         (0.2023, 0.1994, 0.2010)),
                ])
            else:
                if use_auto_augment:
                    trsfm = Compose([
                        transforms.ToPIL(),
                        transforms.RandomResizedCrop(size=32, scale=(0.2, 1.)),
                        transforms.RandomHorizontalFlip(),
                        CIFAR10Policy(),
                        transforms.ToTensor(),
                        transforms.Normalize((0.4914, 0.4822, 0.4465),
                                             (0.2023, 0.1994, 0.2010)),
                    ])
                else:
                    rand_augment = RandAugment(n=2, m=10)
                    trsfm = Compose([
                        transforms.ToPIL(),
                        transforms.RandomResizedCrop(size=32, scale=(0.2, 1.)),
                        transforms.RandomHorizontalFlip(),
                        rand_augment,
                        transforms.ToTensor(),
                        transforms.Normalize((0.4914, 0.4822, 0.4465),
                                             (0.2023, 0.1994, 0.2010)),
                    ])

        self.trsfm = trsfm
        self.data_dir = data_dir
        self.num_parallel_workers = num_parallel_workers
        self.device_num = device_num
        self.device_id = device_id
Esempio n. 2
0
def test_c_py_compose_vision_module(plot=False, run_golden=True):
    """
    Test combining Python and C++ vision transforms
    """
    original_seed = config_get_set_seed(10)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    def test_config(plot, file_name, op_list):
        data_dir = "../data/dataset/testImageNetData/train/"
        data1 = ds.ImageFolderDataset(dataset_dir=data_dir, shuffle=False)
        data1 = data1.map(operations=op_list, input_columns=["image"])
        data2 = ds.ImageFolderDataset(dataset_dir=data_dir, shuffle=False)
        data2 = data2.map(operations=c_vision.Decode(), input_columns=["image"])
        original_images = []
        transformed_images = []

        for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
            transformed_images.append(item["image"])
        for item in data2.create_dict_iterator(num_epochs=1, output_numpy=True):
            original_images.append(item["image"])

        if run_golden:
            # Compare with expected md5 from images
            save_and_check_md5(data1, file_name, generate_golden=GENERATE_GOLDEN)

        if plot:
            visualize_list(original_images, transformed_images)

    test_config(op_list=[c_vision.Decode(),
                         py_vision.ToPIL(),
                         py_vision.Resize((224, 224)),
                         np.array],
                plot=plot, file_name="compose_c_py_1.npz")

    test_config(op_list=[c_vision.Decode(),
                         c_vision.Resize((224, 244)),
                         py_vision.ToPIL(),
                         np.array,
                         c_vision.Resize((24, 24))],
                plot=plot, file_name="compose_c_py_2.npz")

    test_config(op_list=[py_vision.Decode(),
                         py_vision.Resize((224, 224)),
                         np.array,
                         c_vision.RandomColor()],
                plot=plot, file_name="compose_c_py_3.npz")

    # Restore configuration
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers((original_num_parallel_workers))
Esempio n. 3
0
def test_to_pil_02():
    """
    Test ToPIL Op with md5 comparison: input is not PIL image
    Expected to pass
    """
    logger.info("test_to_pil_02")

    # Generate dataset
    data1 = ds.TFRecordDataset(DATA_DIR,
                               SCHEMA_DIR,
                               columns_list=["image"],
                               shuffle=False)
    decode_op = c_vision.Decode()
    transforms = [
        # If input type is not PIL.
        py_vision.ToPIL(),
        py_vision.CenterCrop(375),
        py_vision.ToTensor()
    ]
    transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
    data1 = data1.map(operations=decode_op, input_columns=["image"])
    data1 = data1.map(operations=transform, input_columns=["image"])

    # Compare with expected md5 from images
    filename = "to_pil_02_result.npz"
    save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN)
Esempio n. 4
0
def test_equalize_py_c(plot=False):
    """
    Test Equalize Cpp op and python op
    """
    logger.info("Test Equalize cpp and python op")

    # equalize Images in cpp
    data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
    data_set = data_set.map(operations=[C.Decode(),
                                        C.Resize((224, 224))],
                            input_columns=["image"])

    ds_c_equalize = data_set.map(operations=C.Equalize(),
                                 input_columns="image")

    ds_c_equalize = ds_c_equalize.batch(512)

    for idx, (image, _) in enumerate(ds_c_equalize):
        if idx == 0:
            images_c_equalize = image.asnumpy()
        else:
            images_c_equalize = np.append(images_c_equalize,
                                          image.asnumpy(),
                                          axis=0)

    # Equalize images in python
    data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
    data_set = data_set.map(operations=[C.Decode(),
                                        C.Resize((224, 224))],
                            input_columns=["image"])

    transforms_p_equalize = mindspore.dataset.transforms.py_transforms.Compose(
        [lambda img: img.astype(np.uint8),
         F.ToPIL(),
         F.Equalize(), np.array])

    ds_p_equalize = data_set.map(operations=transforms_p_equalize,
                                 input_columns="image")

    ds_p_equalize = ds_p_equalize.batch(512)

    for idx, (image, _) in enumerate(ds_p_equalize):
        if idx == 0:
            images_p_equalize = image.asnumpy()
        else:
            images_p_equalize = np.append(images_p_equalize,
                                          image.asnumpy(),
                                          axis=0)

    num_samples = images_c_equalize.shape[0]
    mse = np.zeros(num_samples)
    for i in range(num_samples):
        mse[i] = diff_mse(images_p_equalize[i], images_c_equalize[i])
    logger.info("MSE= {}".format(str(np.mean(mse))))

    if plot:
        visualize_list(images_c_equalize, images_p_equalize, visualize_mode=2)
Esempio n. 5
0
def test_compare_random_color_op(degrees=None, plot=False):
    """
    Compare Random Color op in Python and Cpp
    """

    logger.info("test_random_color_op")

    original_seed = config_get_set_seed(5)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # Decode with rgb format set to True
    data1 = ds.TFRecordDataset(C_DATA_DIR,
                               C_SCHEMA_DIR,
                               columns_list=["image"],
                               shuffle=False)
    data2 = ds.TFRecordDataset(C_DATA_DIR,
                               C_SCHEMA_DIR,
                               columns_list=["image"],
                               shuffle=False)

    if degrees is None:
        c_op = vision.RandomColor()
        p_op = F.RandomColor()
    else:
        c_op = vision.RandomColor(degrees)
        p_op = F.RandomColor(degrees)

    transforms_random_color_py = mindspore.dataset.transforms.py_transforms.Compose(
        [lambda img: img.astype(np.uint8),
         F.ToPIL(), p_op, np.array])

    data1 = data1.map(operations=[vision.Decode(), c_op],
                      input_columns=["image"])
    data2 = data2.map(operations=[vision.Decode()], input_columns=["image"])
    data2 = data2.map(operations=transforms_random_color_py,
                      input_columns=["image"])

    image_random_color_op = []
    image = []

    for item1, item2 in zip(
            data1.create_dict_iterator(num_epochs=1, output_numpy=True),
            data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
        actual = item1["image"]
        expected = item2["image"]
        image_random_color_op.append(actual)
        image.append(expected)
        assert actual.shape == expected.shape
        mse = diff_mse(actual, expected)
        logger.info("MSE= {}".format(str(np.mean(mse))))

    # Restore configuration
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)

    if plot:
        visualize_list(image, image_random_color_op)
Esempio n. 6
0
def test_auto_contrast_c(plot=False):
    """
    Test AutoContrast C Op
    """
    logger.info("Test AutoContrast C Op")

    # AutoContrast Images
    data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
    data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"])
    python_op = F.AutoContrast(cutoff=10.0, ignore=[10, 20])
    c_op = C.AutoContrast(cutoff=10.0, ignore=[10, 20])
    transforms_op = mindspore.dataset.transforms.py_transforms.Compose([lambda img: F.ToPIL()(img.astype(np.uint8)),
                                                                        python_op,
                                                                        np.array])

    ds_auto_contrast_py = data_set.map(operations=transforms_op, input_columns="image")

    ds_auto_contrast_py = ds_auto_contrast_py.batch(512)

    for idx, (image, _) in enumerate(ds_auto_contrast_py):
        if idx == 0:
            images_auto_contrast_py = image.asnumpy()
        else:
            images_auto_contrast_py = np.append(images_auto_contrast_py,
                                                image.asnumpy(),
                                                axis=0)

    data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
    data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"])

    ds_auto_contrast_c = data_set.map(operations=c_op, input_columns="image")

    ds_auto_contrast_c = ds_auto_contrast_c.batch(512)

    for idx, (image, _) in enumerate(ds_auto_contrast_c):
        if idx == 0:
            images_auto_contrast_c = image.asnumpy()
        else:
            images_auto_contrast_c = np.append(images_auto_contrast_c,
                                               image.asnumpy(),
                                               axis=0)

    num_samples = images_auto_contrast_c.shape[0]
    mse = np.zeros(num_samples)
    for i in range(num_samples):
        mse[i] = diff_mse(images_auto_contrast_c[i], images_auto_contrast_py[i])
    logger.info("MSE= {}".format(str(np.mean(mse))))
    np.testing.assert_equal(np.mean(mse), 0.0)

    # Compare with expected md5 from images
    filename = "autocontrast_01_result_c.npz"
    save_and_check_md5(ds_auto_contrast_c, filename, generate_golden=GENERATE_GOLDEN)

    if plot:
        visualize_list(images_auto_contrast_c, images_auto_contrast_py, visualize_mode=2)
Esempio n. 7
0
def test_auto_contrast_one_channel_c(plot=False):
    """
    Test AutoContrast C op with one channel
    """
    logger.info("Test AutoContrast C Op With One Channel Images")

    # AutoContrast Images
    data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
    data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"])
    python_op = F.AutoContrast()
    c_op = C.AutoContrast()
    # not using F.ToTensor() since it converts to floats
    transforms_op = mindspore.dataset.transforms.py_transforms.Compose(
        [lambda img: (np.array(img)[:, :, 0]).astype(np.uint8),
         F.ToPIL(),
         python_op,
         np.array])

    ds_auto_contrast_py = data_set.map(operations=transforms_op, input_columns="image")

    ds_auto_contrast_py = ds_auto_contrast_py.batch(512)

    for idx, (image, _) in enumerate(ds_auto_contrast_py):
        if idx == 0:
            images_auto_contrast_py = image.asnumpy()
        else:
            images_auto_contrast_py = np.append(images_auto_contrast_py,
                                                image.asnumpy(),
                                                axis=0)

    data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
    data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224)), lambda img: np.array(img[:, :, 0])],
                            input_columns=["image"])

    ds_auto_contrast_c = data_set.map(operations=c_op, input_columns="image")

    ds_auto_contrast_c = ds_auto_contrast_c.batch(512)

    for idx, (image, _) in enumerate(ds_auto_contrast_c):
        if idx == 0:
            images_auto_contrast_c = image.asnumpy()
        else:
            images_auto_contrast_c = np.append(images_auto_contrast_c,
                                               image.asnumpy(),
                                               axis=0)

    num_samples = images_auto_contrast_c.shape[0]
    mse = np.zeros(num_samples)
    for i in range(num_samples):
        mse[i] = diff_mse(images_auto_contrast_c[i], images_auto_contrast_py[i])
    logger.info("MSE= {}".format(str(np.mean(mse))))
    np.testing.assert_equal(np.mean(mse), 0.0)

    if plot:
        visualize_list(images_auto_contrast_c, images_auto_contrast_py, visualize_mode=2)
Esempio n. 8
0
 def __call__(self, imgs, labels, batchInfo):
     # assert the imgs object are pil_images
     ret_imgs = []
     ret_labels = []
     py_to_pil_op = P.ToPIL()
     to_tensor = P.ToTensor()
     normalize_op = P.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD)
     rand_augment_ops = transform_utils.rand_augment_transform(
         self.config_str, self.hparams)
     for i, image in enumerate(imgs):
         img_pil = py_to_pil_op(image)
         img_pil = rand_augment_ops(img_pil)
         img_array = to_tensor(img_pil)
         img_array = normalize_op(img_array)
         ret_imgs.append(img_array)
         ret_labels.append(labels[i])
     return np.array(ret_imgs), np.array(ret_labels)
Esempio n. 9
0
def get_de_dataset(args):
    '''get_de_dataset'''
    lbl_transforms = [F.ToType(np.int32)]
    transform_label = F2.Compose(lbl_transforms)

    drop_remainder = False

    transforms = [
        F.ToPIL(),
        F.RandomHorizontalFlip(),
        F.ToTensor(),
        F.Normalize(mean=[0.5], std=[0.5])
    ]
    transform = F2.Compose(transforms)
    cache_path = os.path.join('cache', os.path.basename(args.data_dir),
                              'data_cache.pkl')
    print(cache_path)
    if not os.path.exists(os.path.dirname(cache_path)):
        os.makedirs(os.path.dirname(cache_path))
    dataset = CustomDataset(args.data_dir, cache_path, args.is_distributed)
    args.logger.info("dataset len:{}".format(dataset.__len__()))
    sampler = DistributedCustomSampler(dataset,
                                       num_replicas=args.world_size,
                                       rank=args.local_rank,
                                       is_distributed=args.is_distributed)
    de_dataset = de.GeneratorDataset(dataset, ["image", "label"],
                                     sampler=sampler)
    args.logger.info("after sampler de_dataset datasize :{}".format(
        de_dataset.get_dataset_size()))
    de_dataset = de_dataset.map(input_columns="image", operations=transform)
    de_dataset = de_dataset.map(input_columns="label",
                                operations=transform_label)
    de_dataset = de_dataset.project(columns=["image", "label"])
    de_dataset = de_dataset.batch(args.per_batch_size,
                                  drop_remainder=drop_remainder)
    num_iter_per_npu = math.ceil(
        len(dataset) * 1.0 / args.world_size / args.per_batch_size)
    num_classes = len(dataset.classes)

    return de_dataset, num_iter_per_npu, num_classes
Esempio n. 10
0
def create_dataset(args, dataset_mode, repeat_num=1):
    """
    create a train or evaluate cifar10 dataset for SimCLR
    """
    if args.dataset_name != "cifar10":
        raise ValueError("Unsupported dataset.")
    if dataset_mode in ("train_endcoder", "train_classifier"):
        dataset_path = args.train_dataset_path
    else:
        dataset_path = args.eval_dataset_path
    if args.run_distribute and args.device_target == "Ascend":
        data_set = ds.Cifar10Dataset(dataset_path,
                                     num_parallel_workers=8,
                                     shuffle=True,
                                     num_shards=args.device_num,
                                     shard_id=args.device_id)
    else:
        data_set = ds.Cifar10Dataset(dataset_path,
                                     num_parallel_workers=8,
                                     shuffle=True)
    # define map operations
    trans = []
    if dataset_mode == "train_endcoder":
        if args.use_crop:
            trans += [C.Resize(256, interpolation=Inter.BICUBIC)]
            trans += [
                C.RandomResizedCrop(size=(32, 32),
                                    scale=(0.31, 1),
                                    interpolation=Inter.BICUBIC,
                                    max_attempts=100)
            ]
        if args.use_flip:
            trans += [C.RandomHorizontalFlip(prob=0.5)]
        if args.use_color_jitter:
            scale = 0.6
            color_jitter = C.RandomColorAdjust(0.8 * scale, 0.8 * scale,
                                               0.8 * scale, 0.2 * scale)
            trans += [C2.RandomApply([color_jitter], prob=0.8)]
        if args.use_color_gray:
            trans += [
                py_vision.ToPIL(),
                py_vision.RandomGrayscale(prob=0.2), np.array
            ]  # need to convert PIL image to a NumPy array to pass it to C++ operation
        if args.use_blur:
            trans += [C2.RandomApply([gaussian_blur], prob=0.8)]
        if args.use_norm:
            trans += [
                C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
            ]
        trans += [C2.TypeCast(mstype.float32), C.HWC2CHW()]
    else:
        trans += [C.Resize(32)]
        trans += [C2.TypeCast(mstype.float32)]
        if args.use_norm:
            trans += [
                C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
            ]
        trans += [C.HWC2CHW()]
    type_cast_op = C2.TypeCast(mstype.int32)
    data_set = data_set.map(operations=type_cast_op,
                            input_columns="label",
                            num_parallel_workers=8)
    data_set = data_set.map(operations=copy_column,
                            input_columns=["image", "label"],
                            output_columns=["image1", "image2", "label"],
                            column_order=["image1", "image2", "label"],
                            num_parallel_workers=8)
    data_set = data_set.map(operations=trans,
                            input_columns=["image1"],
                            num_parallel_workers=8)
    data_set = data_set.map(operations=trans,
                            input_columns=["image2"],
                            num_parallel_workers=8)
    # apply batch operations
    data_set = data_set.batch(args.batch_size, drop_remainder=True)
    # apply dataset repeat operation
    data_set = data_set.repeat(repeat_num)
    return data_set
def test_random_sharpness_c_py(degrees=(1.0, 1.0), plot=False):
    """
    Test Random Sharpness C and python Op
    """
    logger.info("Test RandomSharpness C and python Op")

    # RandomSharpness Images
    data = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
    data = data.map(operations=[C.Decode(), C.Resize((200, 300))],
                    input_columns=["image"])

    python_op = F.RandomSharpness(degrees)
    c_op = C.RandomSharpness(degrees)

    transforms_op = mindspore.dataset.transforms.py_transforms.Compose(
        [lambda img: F.ToPIL()(img.astype(np.uint8)), python_op, np.array])

    ds_random_sharpness_py = data.map(operations=transforms_op,
                                      input_columns="image")

    ds_random_sharpness_py = ds_random_sharpness_py.batch(512)

    for idx, (image, _) in enumerate(
            ds_random_sharpness_py.create_tuple_iterator(output_numpy=True)):
        if idx == 0:
            images_random_sharpness_py = image

        else:
            images_random_sharpness_py = np.append(images_random_sharpness_py,
                                                   image,
                                                   axis=0)

    data = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
    data = data.map(operations=[C.Decode(), C.Resize((200, 300))],
                    input_columns=["image"])

    ds_images_random_sharpness_c = data.map(operations=c_op,
                                            input_columns="image")

    ds_images_random_sharpness_c = ds_images_random_sharpness_c.batch(512)

    for idx, (image, _) in enumerate(
            ds_images_random_sharpness_c.create_tuple_iterator(
                output_numpy=True)):
        if idx == 0:
            images_random_sharpness_c = image

        else:
            images_random_sharpness_c = np.append(images_random_sharpness_c,
                                                  image,
                                                  axis=0)

    num_samples = images_random_sharpness_c.shape[0]
    mse = np.zeros(num_samples)
    for i in range(num_samples):
        mse[i] = diff_mse(images_random_sharpness_c[i],
                          images_random_sharpness_py[i])
    logger.info("MSE= {}".format(str(np.mean(mse))))
    if plot:
        visualize_list(images_random_sharpness_c,
                       images_random_sharpness_py,
                       visualize_mode=2)