Exemple #1
0
def test_imagefolder():
    data = ds.ImageFolderDataset("../data/dataset/testPK/data/")
    assert data.get_dataset_size() == 44
    assert data.num_classes() == 4
    data = data.shuffle(100)
    assert data.num_classes() == 4

    data = ds.ImageFolderDataset("../data/dataset/testPK/data/",
                                 num_samples=10)
    assert data.get_dataset_size() == 10
    assert data.num_classes() == 4

    data = ds.ImageFolderDataset("../data/dataset/testPK/data/",
                                 class_indexing={
                                     "class1": 1,
                                     "class2": 22
                                 })
    assert data.num_classes() == 2

    data = ds.ImageFolderDataset("../data/dataset/testPK/data/",
                                 class_indexing={
                                     "class1": 1,
                                     "wrong name": 22
                                 })
    err_msg = ""
    try:
        data.num_classes()
    except RuntimeError as e:
        err_msg = str(e)
    assert "wrong name doesn't exist" in err_msg
def test_auto_contrast_invalid_cutoff_param_py():
    """
    Test AutoContrast python Op with invalid cutoff parameter
    """
    logger.info("Test AutoContrast python Op with invalid cutoff parameter")
    try:
        data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
        data_set = data_set.map(operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
                                                                                                F.Resize((224, 224)),
                                                                                                F.AutoContrast(
                                                                                                    cutoff=-10.0),
                                                                                                F.ToTensor()])],
                                input_columns=["image"])
    except ValueError as error:
        logger.info("Got an exception in DE: {}".format(str(error)))
        assert "Input cutoff is not within the required interval of (0 to 100)." in str(error)
    try:
        data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
        data_set = data_set.map(
            operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
                                                                            F.Resize((224, 224)),
                                                                            F.AutoContrast(cutoff=120.0),
                                                                            F.ToTensor()])],
            input_columns=["image"])
    except ValueError as error:
        logger.info("Got an exception in DE: {}".format(str(error)))
        assert "Input cutoff is not within the required interval of (0 to 100)." in str(error)
Exemple #3
0
def test_sampler_list():
    data1 = ds.ImageFolderDataset("../data/dataset/testPK/data", sampler=[1, 3, 5])
    data21 = ds.ImageFolderDataset("../data/dataset/testPK/data", shuffle=False).take(2).skip(1)
    data22 = ds.ImageFolderDataset("../data/dataset/testPK/data", shuffle=False).take(4).skip(3)
    data23 = ds.ImageFolderDataset("../data/dataset/testPK/data", shuffle=False).take(6).skip(5)

    dataset_equal(data1, data21 + data22 + data23, 0)
def test_random_color_py(degrees=(0.1, 1.9), plot=False):
    """
    Test Python RandomColor
    """
    logger.info("Test RandomColor")

    # Original Images
    data = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)

    transforms_original = mindspore.dataset.transforms.py_transforms.Compose(
        [F.Decode(), F.Resize((224, 224)),
         F.ToTensor()])

    ds_original = data.map(operations=transforms_original,
                           input_columns="image")

    ds_original = ds_original.batch(512)

    for idx, (image, _) in enumerate(ds_original):
        if idx == 0:
            images_original = np.transpose(image.asnumpy(), (0, 2, 3, 1))
        else:
            images_original = np.append(images_original,
                                        np.transpose(image.asnumpy(),
                                                     (0, 2, 3, 1)),
                                        axis=0)

            # Random Color Adjusted Images
    data = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)

    transforms_random_color = mindspore.dataset.transforms.py_transforms.Compose(
        [
            F.Decode(),
            F.Resize((224, 224)),
            F.RandomColor(degrees=degrees),
            F.ToTensor()
        ])

    ds_random_color = data.map(operations=transforms_random_color,
                               input_columns="image")

    ds_random_color = ds_random_color.batch(512)

    for idx, (image, _) in enumerate(ds_random_color):
        if idx == 0:
            images_random_color = np.transpose(image.asnumpy(), (0, 2, 3, 1))
        else:
            images_random_color = np.append(images_random_color,
                                            np.transpose(
                                                image.asnumpy(), (0, 2, 3, 1)),
                                            axis=0)

    num_samples = images_original.shape[0]
    mse = np.zeros(num_samples)
    for i in range(num_samples):
        mse[i] = diff_mse(images_random_color[i], images_original[i])
    logger.info("MSE= {}".format(str(np.mean(mse))))

    if plot:
        visualize_list(images_original, images_random_color)
def test_auto_contrast_invalid_cutoff_param_c():
    """
    Test AutoContrast C Op with invalid cutoff parameter
    """
    logger.info("Test AutoContrast C Op with invalid cutoff parameter")
    try:
        data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
        data_set = data_set.map(operations=[C.Decode(),
                                            C.Resize((224, 224)),
                                            lambda img: np.array(img[:, :, 0])], input_columns=["image"])
        # invalid ignore
        data_set = data_set.map(operations=C.AutoContrast(cutoff=-10.0), input_columns="image")
    except ValueError as error:
        logger.info("Got an exception in DE: {}".format(str(error)))
        assert "Input cutoff is not within the required interval of (0 to 100)." in str(error)
    try:
        data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
        data_set = data_set.map(operations=[C.Decode(),
                                            C.Resize((224, 224)),
                                            lambda img: np.array(img[:, :, 0])], input_columns=["image"])
        # invalid ignore
        data_set = data_set.map(operations=C.AutoContrast(cutoff=120.0), input_columns="image")
    except ValueError as error:
        logger.info("Got an exception in DE: {}".format(str(error)))
        assert "Input cutoff is not within the required interval of (0 to 100)." in str(error)
def test_auto_contrast_invalid_ignore_param_py():
    """
    Test AutoContrast python Op with invalid ignore parameter
    """
    logger.info("Test AutoContrast python Op with invalid ignore parameter")
    try:
        data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
        data_set = data_set.map(operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
                                                                                                F.Resize((224, 224)),
                                                                                                F.AutoContrast(
                                                                                                    ignore=255.5),
                                                                                                F.ToTensor()])],
                                input_columns=["image"])
    except TypeError as error:
        logger.info("Got an exception in DE: {}".format(str(error)))
        assert "Argument ignore with value 255.5 is not of type" in str(error)
    try:
        data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
        data_set = data_set.map(operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
                                                                                                F.Resize((224, 224)),
                                                                                                F.AutoContrast(
                                                                                                    ignore=(10, 100)),
                                                                                                F.ToTensor()])],
                                input_columns=["image"])
    except TypeError as error:
        logger.info("Got an exception in DE: {}".format(str(error)))
        assert "Argument ignore with value (10,100) is not of type" in str(error)
def test_auto_contrast_invalid_ignore_param_c():
    """
    Test AutoContrast C Op with invalid ignore parameter
    """
    logger.info("Test AutoContrast C Op with invalid ignore parameter")
    try:
        data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
        data_set = data_set.map(operations=[
            C.Decode(),
            C.Resize((224, 224)), lambda img: np.array(img[:, :, 0])
        ],
                                input_columns=["image"])
        # invalid ignore
        data_set = data_set.map(operations=C.AutoContrast(ignore=255.5),
                                input_columns="image")
    except TypeError as error:
        logger.info("Got an exception in DE: {}".format(str(error)))
        assert "Argument ignore with value 255.5 is not of type" in str(error)
    try:
        data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
        data_set = data_set.map(operations=[
            C.Decode(),
            C.Resize((224, 224)), lambda img: np.array(img[:, :, 0])
        ],
                                input_columns=["image"])
        # invalid ignore
        data_set = data_set.map(operations=C.AutoContrast(ignore=(10, 100)),
                                input_columns="image")
    except TypeError as error:
        logger.info("Got an exception in DE: {}".format(str(error)))
        assert "Argument ignore with value (10,100) is not of type" in str(
            error)
def test_imagefolder_zip():
    logger.info("Test Case zip")
    # define parameters
    repeat_count = 2

    # apply dataset operations
    data1 = ds.ImageFolderDataset(DATA_DIR, num_samples=10)
    data2 = ds.ImageFolderDataset(DATA_DIR, num_samples=10)

    data1 = data1.repeat(repeat_count)
    # rename dataset2 for no conflict
    data2 = data2.rename(input_columns=["image", "label"],
                         output_columns=["image1", "label1"])
    data3 = ds.zip((data1, data2))

    num_iter = 0
    for item in data3.create_dict_iterator(
            num_epochs=1):  # each data is a dictionary
        # in this example, each dictionary has keys "image" and "label"
        logger.info("image is {}".format(item["image"]))
        logger.info("label is {}".format(item["label"]))
        num_iter += 1

    logger.info("Number of data in data1: {}".format(num_iter))
    assert num_iter == 10
Exemple #9
0
def test_sampler_list():
    data1 = ds.ImageFolderDataset("../data/dataset/testPK/data", sampler=[1, 3, 5])
    data21 = ds.ImageFolderDataset("../data/dataset/testPK/data", shuffle=False).take(2).skip(1)
    data22 = ds.ImageFolderDataset("../data/dataset/testPK/data", shuffle=False).take(4).skip(3)
    data23 = ds.ImageFolderDataset("../data/dataset/testPK/data", shuffle=False).take(6).skip(5)

    dataset_equal(data1, data21 + data22 + data23, 0)

    data3 = ds.ImageFolderDataset("../data/dataset/testPK/data", sampler=1)
    dataset_equal(data3, data21, 0)

    def bad_pipeline(sampler, msg):
        with pytest.raises(Exception) as info:
            data1 = ds.ImageFolderDataset("../data/dataset/testPK/data", sampler=sampler)
            for _ in data1:
                pass
        assert msg in str(info.value)

    bad_pipeline(sampler=[1.5, 7],
                 msg="Type of indices element must be int, but got list[0]: 1.5, type: <class 'float'>")

    bad_pipeline(sampler=["a", "b"],
                 msg="Type of indices element must be int, but got list[0]: a, type: <class 'str'>.")
    bad_pipeline(sampler="a", msg="Unsupported sampler object of type (<class 'str'>)")
    bad_pipeline(sampler="", msg="Unsupported sampler object of type (<class 'str'>)")
    bad_pipeline(sampler=np.array([1, 2]),
                 msg="Type of indices element must be int, but got list[0]: 1, type: <class 'numpy.int64'>.")
Exemple #10
0
    def test_config(plot, file_name, op_list):
        data_dir = "../data/dataset/testImageNetData/train/"
        data1 = ds.ImageFolderDataset(dataset_dir=data_dir, shuffle=False)
        data1 = data1.map(operations=op_list, input_columns=["image"])
        data2 = ds.ImageFolderDataset(dataset_dir=data_dir, shuffle=False)
        data2 = data2.map(operations=c_vision.Decode(),
                          input_columns=["image"])
        original_images = []
        transformed_images = []

        for item in data1.create_dict_iterator(num_epochs=1,
                                               output_numpy=True):
            transformed_images.append(item["image"])
        for item in data2.create_dict_iterator(num_epochs=1,
                                               output_numpy=True):
            original_images.append(item["image"])

        if run_golden:
            # Compare with expected md5 from images
            save_and_check_md5(data1,
                               file_name,
                               generate_golden=GENERATE_GOLDEN)

        if plot:
            visualize_list(original_images, transformed_images)
Exemple #11
0
def test_concat_14():
    """
    Test concat: Testing concat on two different source datasets with different dataset operations.
    """
    logger.info("test_concat_14")
    DATA_DIR = "../data/dataset/testPK/data"
    DATA_DIR2 = "../data/dataset/testImageNetData/train/"

    data1 = ds.ImageFolderDataset(DATA_DIR, num_samples=3)
    data2 = ds.ImageFolderDataset(DATA_DIR2, num_samples=2)

    transforms1 = mindspore.dataset.transforms.py_transforms.Compose(
        [F.Decode(), F.Resize((224, 224)),
         F.ToTensor()])

    data1 = data1.map(operations=transforms1, input_columns=["image"])
    data2 = data2.map(operations=transforms1, input_columns=["image"])
    data3 = data1 + data2

    expected, output = [], []
    for d in data1.create_tuple_iterator(output_numpy=True):
        expected.append(d[0])
    for d in data2.create_tuple_iterator(output_numpy=True):
        expected.append(d[0])
    for d in data3.create_tuple_iterator(output_numpy=True):
        output.append(d[0])

    assert len(expected) == len(output)
    np.array_equal(np.array(output), np.array(expected))

    assert sum([1 for _ in data3]) == 5
    assert data3.get_dataset_size() == 5
def test_auto_contrast_py(plot=False):
    """
    Test AutoContrast
    """
    logger.info("Test AutoContrast Python Op")

    # Original Images
    data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)

    transforms_original = mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
                                                                              F.Resize((224, 224)),
                                                                              F.ToTensor()])

    ds_original = data_set.map(operations=transforms_original, input_columns="image")

    ds_original = ds_original.batch(512)

    for idx, (image, _) in enumerate(ds_original):
        if idx == 0:
            images_original = np.transpose(image.asnumpy(), (0, 2, 3, 1))
        else:
            images_original = np.append(images_original,
                                        np.transpose(image.asnumpy(), (0, 2, 3, 1)),
                                        axis=0)

            # AutoContrast Images
    data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)

    transforms_auto_contrast = \
        mindspore.dataset.transforms.py_transforms.Compose([F.Decode(),
                                                            F.Resize((224, 224)),
                                                            F.AutoContrast(cutoff=10.0, ignore=[10, 20]),
                                                            F.ToTensor()])

    ds_auto_contrast = data_set.map(operations=transforms_auto_contrast, input_columns="image")

    ds_auto_contrast = ds_auto_contrast.batch(512)

    for idx, (image, _) in enumerate(ds_auto_contrast):
        if idx == 0:
            images_auto_contrast = np.transpose(image.asnumpy(), (0, 2, 3, 1))
        else:
            images_auto_contrast = np.append(images_auto_contrast,
                                             np.transpose(image.asnumpy(), (0, 2, 3, 1)),
                                             axis=0)

    num_samples = images_original.shape[0]
    mse = np.zeros(num_samples)
    for i in range(num_samples):
        mse[i] = diff_mse(images_auto_contrast[i], images_original[i])
    logger.info("MSE= {}".format(str(np.mean(mse))))

    # Compare with expected md5 from images
    filename = "autocontrast_01_result_py.npz"
    save_and_check_md5(ds_auto_contrast, filename, generate_golden=GENERATE_GOLDEN)

    if plot:
        visualize_list(images_original, images_auto_contrast)
Exemple #13
0
def test_equalize_py(plot=False):
    """
    Test Equalize py op
    """
    logger.info("Test Equalize")

    # Original Images
    data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)

    transforms_original = mindspore.dataset.transforms.py_transforms.Compose(
        [F.Decode(), F.Resize((224, 224)),
         F.ToTensor()])

    ds_original = data_set.map(operations=transforms_original,
                               input_columns="image")

    ds_original = ds_original.batch(512)

    for idx, (image, _) in enumerate(ds_original):
        if idx == 0:
            images_original = np.transpose(image.asnumpy(), (0, 2, 3, 1))
        else:
            images_original = np.append(images_original,
                                        np.transpose(image.asnumpy(),
                                                     (0, 2, 3, 1)),
                                        axis=0)

            # Color Equalized Images
    data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)

    transforms_equalize = mindspore.dataset.transforms.py_transforms.Compose(
        [F.Decode(),
         F.Resize((224, 224)),
         F.Equalize(),
         F.ToTensor()])

    ds_equalize = data_set.map(operations=transforms_equalize,
                               input_columns="image")

    ds_equalize = ds_equalize.batch(512)

    for idx, (image, _) in enumerate(ds_equalize):
        if idx == 0:
            images_equalize = np.transpose(image.asnumpy(), (0, 2, 3, 1))
        else:
            images_equalize = np.append(images_equalize,
                                        np.transpose(image.asnumpy(),
                                                     (0, 2, 3, 1)),
                                        axis=0)

    num_samples = images_original.shape[0]
    mse = np.zeros(num_samples)
    for i in range(num_samples):
        mse[i] = diff_mse(images_equalize[i], images_original[i])
    logger.info("MSE= {}".format(str(np.mean(mse))))

    if plot:
        visualize_list(images_original, images_equalize)
def test_cpp_uniform_augment(plot=False, num_ops=2):
    """
    Test UniformAugment
    """
    logger.info("Test CPP UniformAugment")

    # Original Images
    data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)

    transforms_original = [C.Decode(), C.Resize(size=[224, 224]),
                           F.ToTensor()]

    ds_original = data_set.map(operations=transforms_original, input_columns="image")

    ds_original = ds_original.batch(512)

    for idx, (image, _) in enumerate(ds_original):
        if idx == 0:
            images_original = np.transpose(image.asnumpy(), (0, 2, 3, 1))
        else:
            images_original = np.append(images_original,
                                        np.transpose(image.asnumpy(), (0, 2, 3, 1)),
                                        axis=0)

    # UniformAugment Images
    data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
    transforms_ua = [C.RandomCrop(size=[224, 224], padding=[32, 32, 32, 32]),
                     C.RandomHorizontalFlip(),
                     C.RandomVerticalFlip(),
                     C.RandomColorAdjust(),
                     C.RandomRotation(degrees=45)]

    uni_aug = C.UniformAugment(transforms=transforms_ua, num_ops=num_ops)

    transforms_all = [C.Decode(), C.Resize(size=[224, 224]),
                      uni_aug,
                      F.ToTensor()]

    ds_ua = data_set.map(operations=transforms_all, input_columns="image", num_parallel_workers=1)

    ds_ua = ds_ua.batch(512)

    for idx, (image, _) in enumerate(ds_ua):
        if idx == 0:
            images_ua = np.transpose(image.asnumpy(), (0, 2, 3, 1))
        else:
            images_ua = np.append(images_ua,
                                  np.transpose(image.asnumpy(), (0, 2, 3, 1)),
                                  axis=0)
    if plot:
        visualize_list(images_original, images_ua)

    num_samples = images_original.shape[0]
    mse = np.zeros(num_samples)
    for i in range(num_samples):
        mse[i] = diff_mse(images_ua[i], images_original[i])
    logger.info("MSE= {}".format(str(np.mean(mse))))
Exemple #15
0
def test_equalize_py_c(plot=False):
    """
    Test Equalize Cpp op and python op
    """
    logger.info("Test Equalize cpp and python op")

    # equalize Images in cpp
    data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
    data_set = data_set.map(operations=[C.Decode(),
                                        C.Resize((224, 224))],
                            input_columns=["image"])

    ds_c_equalize = data_set.map(operations=C.Equalize(),
                                 input_columns="image")

    ds_c_equalize = ds_c_equalize.batch(512)

    for idx, (image, _) in enumerate(ds_c_equalize):
        if idx == 0:
            images_c_equalize = image.asnumpy()
        else:
            images_c_equalize = np.append(images_c_equalize,
                                          image.asnumpy(),
                                          axis=0)

    # Equalize images in python
    data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
    data_set = data_set.map(operations=[C.Decode(),
                                        C.Resize((224, 224))],
                            input_columns=["image"])

    transforms_p_equalize = mindspore.dataset.transforms.py_transforms.Compose(
        [lambda img: img.astype(np.uint8),
         F.ToPIL(),
         F.Equalize(), np.array])

    ds_p_equalize = data_set.map(operations=transforms_p_equalize,
                                 input_columns="image")

    ds_p_equalize = ds_p_equalize.batch(512)

    for idx, (image, _) in enumerate(ds_p_equalize):
        if idx == 0:
            images_p_equalize = image.asnumpy()
        else:
            images_p_equalize = np.append(images_p_equalize,
                                          image.asnumpy(),
                                          axis=0)

    num_samples = images_c_equalize.shape[0]
    mse = np.zeros(num_samples)
    for i in range(num_samples):
        mse[i] = diff_mse(images_p_equalize[i], images_c_equalize[i])
    logger.info("MSE= {}".format(str(np.mean(mse))))

    if plot:
        visualize_list(images_c_equalize, images_p_equalize, visualize_mode=2)
def create_dataset(dataset_path,
                   do_train,
                   repeat_num=1,
                   batch_size=32,
                   target="GPU",
                   dtype="fp16",
                   device_num=1):
    ds.config.set_numa_enable(True)
    if device_num == 1:
        data_set = ds.ImageFolderDataset(dataset_path,
                                         num_parallel_workers=4,
                                         shuffle=True)
    else:
        data_set = ds.ImageFolderDataset(dataset_path,
                                         num_parallel_workers=4,
                                         shuffle=True,
                                         num_shards=device_num,
                                         shard_id=get_rank())
    image_size = 224
    mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
    std = [0.229 * 255, 0.224 * 255, 0.225 * 255]

    # define map operations
    normalize_op = C.Normalize(mean=mean, std=std)
    if dtype == "fp16":
        if args_opt.eval:
            x_dtype = "float32"
        else:
            x_dtype = "float16"
        normalize_op = C.NormalizePad(mean=mean, std=std, dtype=x_dtype)
    if do_train:
        trans = [
            C.RandomCropDecodeResize(image_size,
                                     scale=(0.08, 1.0),
                                     ratio=(0.75, 1.333)),
            C.RandomHorizontalFlip(prob=0.5),
            normalize_op,
        ]
    else:
        trans = [
            C.Decode(),
            C.Resize(256),
            C.CenterCrop(image_size),
            normalize_op,
        ]
    if dtype == "fp32":
        trans.append(C.HWC2CHW())
    data_set = data_set.map(operations=trans,
                            input_columns="image",
                            num_parallel_workers=8)
    # apply batch operations
    data_set = data_set.batch(batch_size, drop_remainder=True)
    # apply dataset repeat operation
    if repeat_num > 1:
        data_set = data_set.repeat(repeat_num)

    return data_set
def test_random_sharpness_c(degrees=(1.6, 1.6), plot=False):
    """
    Test RandomSharpness cpp op
    """
    print(degrees)
    logger.info("Test RandomSharpness cpp op")

    # Original Images
    data = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)

    transforms_original = [C.Decode(), C.Resize((224, 224))]

    ds_original = data.map(operations=transforms_original,
                           input_columns="image")

    ds_original = ds_original.batch(512)

    for idx, (image, _) in enumerate(
            ds_original.create_tuple_iterator(output_numpy=True)):
        if idx == 0:
            images_original = image
        else:
            images_original = np.append(images_original, image, axis=0)

            # Random Sharpness Adjusted Images
    data = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)

    c_op = C.RandomSharpness()
    if degrees is not None:
        c_op = C.RandomSharpness(degrees)

    transforms_random_sharpness = [C.Decode(), C.Resize((224, 224)), c_op]

    ds_random_sharpness = data.map(operations=transforms_random_sharpness,
                                   input_columns="image")

    ds_random_sharpness = ds_random_sharpness.batch(512)

    for idx, (image, _) in enumerate(
            ds_random_sharpness.create_tuple_iterator(output_numpy=True)):
        if idx == 0:
            images_random_sharpness = image
        else:
            images_random_sharpness = np.append(images_random_sharpness,
                                                image,
                                                axis=0)

    num_samples = images_original.shape[0]
    mse = np.zeros(num_samples)
    for i in range(num_samples):
        mse[i] = diff_mse(images_random_sharpness[i], images_original[i])

    logger.info("MSE= {}".format(str(np.mean(mse))))

    if plot:
        visualize_list(images_original, images_random_sharpness)
Exemple #18
0
def create_dataset(dataset_path, config, do_train, repeat_num=1):
    """
    create a train or eval dataset

    Args:
        dataset_path(string): the path of dataset.
        config(dict): config of dataset.
        do_train(bool): whether dataset is used for train or eval.
        repeat_num(int): the repeat times of dataset. Default: 1.

    Returns:
        dataset
    """
    rank = config.rank
    group_size = config.group_size
    if group_size == 1:
        data_set = ds.ImageFolderDataset(dataset_path,
                                         num_parallel_workers=config.work_nums,
                                         shuffle=True)
    else:
        data_set = ds.ImageFolderDataset(dataset_path,
                                         num_parallel_workers=config.work_nums,
                                         shuffle=True,
                                         num_shards=group_size,
                                         shard_id=rank)
    # define map operations
    if do_train:
        trans = [
            C.RandomCropDecodeResize(config.image_size),
            C.RandomHorizontalFlip(prob=0.5),
            C.RandomColorAdjust(brightness=0.4, saturation=0.5)  # fast mode
        ]
    else:
        trans = [
            C.Decode(),
            C.Resize(int(config.image_size / 0.875)),
            C.CenterCrop(config.image_size)
        ]
    trans += [
        C.Rescale(1.0 / 255.0, 0.0),
        C.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
        C.HWC2CHW()
    ]
    type_cast_op = C2.TypeCast(mstype.int32)
    data_set = data_set.map(operations=trans,
                            input_columns="image",
                            num_parallel_workers=config.work_nums)
    data_set = data_set.map(operations=type_cast_op,
                            input_columns="label",
                            num_parallel_workers=config.work_nums)
    # apply batch operations
    data_set = data_set.batch(config.batch_size, drop_remainder=True)
    # apply dataset repeat operation
    data_set = data_set.repeat(repeat_num)
    return data_set
def test_auto_contrast_one_channel_c(plot=False):
    """
    Test AutoContrast C op with one channel
    """
    logger.info("Test AutoContrast C Op With One Channel Images")

    # AutoContrast Images
    data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
    data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"])
    python_op = F.AutoContrast()
    c_op = C.AutoContrast()
    # not using F.ToTensor() since it converts to floats
    transforms_op = mindspore.dataset.transforms.py_transforms.Compose(
        [lambda img: (np.array(img)[:, :, 0]).astype(np.uint8),
         F.ToPIL(),
         python_op,
         np.array])

    ds_auto_contrast_py = data_set.map(operations=transforms_op, input_columns="image")

    ds_auto_contrast_py = ds_auto_contrast_py.batch(512)

    for idx, (image, _) in enumerate(ds_auto_contrast_py):
        if idx == 0:
            images_auto_contrast_py = image.asnumpy()
        else:
            images_auto_contrast_py = np.append(images_auto_contrast_py,
                                                image.asnumpy(),
                                                axis=0)

    data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
    data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224)), lambda img: np.array(img[:, :, 0])],
                            input_columns=["image"])

    ds_auto_contrast_c = data_set.map(operations=c_op, input_columns="image")

    ds_auto_contrast_c = ds_auto_contrast_c.batch(512)

    for idx, (image, _) in enumerate(ds_auto_contrast_c):
        if idx == 0:
            images_auto_contrast_c = image.asnumpy()
        else:
            images_auto_contrast_c = np.append(images_auto_contrast_c,
                                               image.asnumpy(),
                                               axis=0)

    num_samples = images_auto_contrast_c.shape[0]
    mse = np.zeros(num_samples)
    for i in range(num_samples):
        mse[i] = diff_mse(images_auto_contrast_c[i], images_auto_contrast_py[i])
    logger.info("MSE= {}".format(str(np.mean(mse))))
    np.testing.assert_equal(np.mean(mse), 0.0)

    if plot:
        visualize_list(images_auto_contrast_c, images_auto_contrast_py, visualize_mode=2)
def test_auto_contrast_c(plot=False):
    """
    Test AutoContrast C Op
    """
    logger.info("Test AutoContrast C Op")

    # AutoContrast Images
    data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
    data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"])
    python_op = F.AutoContrast(cutoff=10.0, ignore=[10, 20])
    c_op = C.AutoContrast(cutoff=10.0, ignore=[10, 20])
    transforms_op = mindspore.dataset.transforms.py_transforms.Compose([lambda img: F.ToPIL()(img.astype(np.uint8)),
                                                                        python_op,
                                                                        np.array])

    ds_auto_contrast_py = data_set.map(operations=transforms_op, input_columns="image")

    ds_auto_contrast_py = ds_auto_contrast_py.batch(512)

    for idx, (image, _) in enumerate(ds_auto_contrast_py):
        if idx == 0:
            images_auto_contrast_py = image.asnumpy()
        else:
            images_auto_contrast_py = np.append(images_auto_contrast_py,
                                                image.asnumpy(),
                                                axis=0)

    data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
    data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"])

    ds_auto_contrast_c = data_set.map(operations=c_op, input_columns="image")

    ds_auto_contrast_c = ds_auto_contrast_c.batch(512)

    for idx, (image, _) in enumerate(ds_auto_contrast_c):
        if idx == 0:
            images_auto_contrast_c = image.asnumpy()
        else:
            images_auto_contrast_c = np.append(images_auto_contrast_c,
                                               image.asnumpy(),
                                               axis=0)

    num_samples = images_auto_contrast_c.shape[0]
    mse = np.zeros(num_samples)
    for i in range(num_samples):
        mse[i] = diff_mse(images_auto_contrast_c[i], images_auto_contrast_py[i])
    logger.info("MSE= {}".format(str(np.mean(mse))))
    np.testing.assert_equal(np.mean(mse), 0.0)

    # Compare with expected md5 from images
    filename = "autocontrast_01_result_c.npz"
    save_and_check_md5(ds_auto_contrast_c, filename, generate_golden=GENERATE_GOLDEN)

    if plot:
        visualize_list(images_auto_contrast_c, images_auto_contrast_py, visualize_mode=2)
Exemple #21
0
def test_cutmix_batch_success3(plot=False):
    """
    Test CutMixBatch op with default values for alpha and prob on a batch of HWC images on ImageFolderDataset
    """
    logger.info("test_cutmix_batch_success3")

    ds_original = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False)
    decode_op = vision.Decode()
    ds_original = ds_original.map(operations=[decode_op],
                                  input_columns=["image"])
    resize_op = vision.Resize([224, 224])
    ds_original = ds_original.map(operations=[resize_op],
                                  input_columns=["image"])
    ds_original = ds_original.batch(4, pad_info={}, drop_remainder=True)

    images_original = None
    for idx, (image, _) in enumerate(ds_original):
        if idx == 0:
            images_original = image.asnumpy()
        else:
            images_original = np.append(images_original,
                                        image.asnumpy(),
                                        axis=0)

    # CutMix Images
    data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False)

    decode_op = vision.Decode()
    data1 = data1.map(operations=[decode_op], input_columns=["image"])

    resize_op = vision.Resize([224, 224])
    data1 = data1.map(operations=[resize_op], input_columns=["image"])

    one_hot_op = data_trans.OneHot(num_classes=10)
    data1 = data1.map(operations=one_hot_op, input_columns=["label"])

    cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
    data1 = data1.batch(4, pad_info={}, drop_remainder=True)
    data1 = data1.map(operations=cutmix_batch_op,
                      input_columns=["image", "label"])

    images_cutmix = None
    for idx, (image, _) in enumerate(data1):
        if idx == 0:
            images_cutmix = image.asnumpy()
        else:
            images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0)
    if plot:
        visualize_list(images_original, images_cutmix)

    num_samples = images_original.shape[0]
    mse = np.zeros(num_samples)
    for i in range(num_samples):
        mse[i] = diff_mse(images_cutmix[i], images_original[i])
    logger.info("MSE= {}".format(str(np.mean(mse))))
Exemple #22
0
def test_imagefolder():
    data = ds.ImageFolderDataset("../data/dataset/testPK/data/")
    assert data.get_dataset_size() == 44
    assert data.num_classes() == 4
    data = data.shuffle(100)
    assert data.num_classes() == 4

    data = ds.ImageFolderDataset("../data/dataset/testPK/data/",
                                 num_samples=10)
    assert data.get_dataset_size() == 10
    assert data.num_classes() == 4
Exemple #23
0
def create_dataset(dataset_path,
                   do_train,
                   batch_size=16,
                   device_num=1,
                   rank=0):
    """
    create a train or eval dataset

    Args:
        dataset_path(string): the path of dataset.
        do_train(bool): whether dataset is used for train or eval.
        batch_size(int): the batch size of dataset. Default: 16.
        device_num (int): Number of shards that the dataset should be divided into (default=1).
        rank (int): The shard ID within num_shards (default=0).

    Returns:
        dataset
    """
    if device_num == 1:
        data_set = ds.ImageFolderDataset(dataset_path,
                                         num_parallel_workers=8,
                                         shuffle=True)
    else:
        data_set = ds.ImageFolderDataset(dataset_path,
                                         num_parallel_workers=8,
                                         shuffle=True,
                                         num_shards=device_num,
                                         shard_id=rank)
    # define map operations
    if do_train:
        trans = [
            C.RandomCropDecodeResize(299),
            C.RandomHorizontalFlip(prob=0.5),
            C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4)
        ]
    else:
        trans = [C.Decode(), C.Resize(320), C.CenterCrop(299)]
    trans += [
        C.Normalize(mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5]),
        C.HWC2CHW(),
        C2.TypeCast(mstype.float32)
    ]

    type_cast_op = C2.TypeCast(mstype.int32)
    data_set = data_set.map(input_columns="image",
                            operations=trans,
                            num_parallel_workers=8)
    data_set = data_set.map(input_columns="label",
                            operations=type_cast_op,
                            num_parallel_workers=8)
    # apply batch operations
    data_set = data_set.batch(batch_size, drop_remainder=True)
    return data_set
Exemple #24
0
def test_imagenet_rawdata_dataset_size():
    ds_total = ds.ImageFolderDataset(IMAGENET_RAWDATA_DIR)
    assert ds_total.get_dataset_size() == 6

    ds_shard_1_0 = ds.ImageFolderDataset(IMAGENET_RAWDATA_DIR, num_shards=1, shard_id=0)
    assert ds_shard_1_0.get_dataset_size() == 6

    ds_shard_2_0 = ds.ImageFolderDataset(IMAGENET_RAWDATA_DIR, num_shards=2, shard_id=0)
    assert ds_shard_2_0.get_dataset_size() == 3

    ds_shard_3_0 = ds.ImageFolderDataset(IMAGENET_RAWDATA_DIR, num_shards=3, shard_id=0)
    assert ds_shard_3_0.get_dataset_size() == 2
Exemple #25
0
def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
    """
    Create a train or eval dataset.

    Args:
        dataset_path (str): The path of dataset.
        do_train (bool): Whether dataset is used for train or eval.
        repeat_num (int): The repeat times of dataset. Default: 1.
        batch_size (int): The batch size of dataset. Default: 32.

    Returns:
        Dataset.
    """

    do_shuffle = bool(do_train)

    if device_num == 1 or not do_train:
        ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=config.work_nums, shuffle=do_shuffle)
    else:
        ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=config.work_nums,
                                   shuffle=do_shuffle, num_shards=device_num, shard_id=device_id)

    image_length = 299
    if do_train:
        trans = [
            C.RandomCropDecodeResize(image_length, scale=(0.08, 1.0), ratio=(0.75, 1.333)),
            C.RandomHorizontalFlip(prob=0.5),
            C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4)
            ]
    else:
        trans = [
            C.Decode(),
            C.Resize(image_length),
            C.CenterCrop(image_length)
            ]
    trans += [
        C.Rescale(1.0 / 255.0, 0.0),
        C.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        C.HWC2CHW()
    ]

    type_cast_op = C2.TypeCast(mstype.int32)

    ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=config.work_nums)
    ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=config.work_nums)

    # apply batch operations
    ds = ds.batch(batch_size, drop_remainder=True)

    # apply dataset repeat operation
    ds = ds.repeat(repeat_num)
    return ds
Exemple #26
0
def create_dataset(dataset_path, do_train, rank, group_size, repeat_num=1):
    """
    create a train or eval dataset

    Args:
        dataset_path(string): the path of dataset.
        do_train(bool): whether dataset is used for train or eval.
        rank (int): The shard ID within num_shards (default=None).
        group_size (int): Number of shards that the dataset should be divided into (default=None).
        repeat_num(int): the repeat times of dataset. Default: 1.

    Returns:
        dataset
    """
    if group_size == 1:
        data_set = ds.ImageFolderDataset(dataset_path,
                                         num_parallel_workers=cfg.work_nums,
                                         shuffle=True)
    else:
        data_set = ds.ImageFolderDataset(dataset_path,
                                         num_parallel_workers=cfg.work_nums,
                                         shuffle=True,
                                         num_shards=group_size,
                                         shard_id=rank)
    # define map operations
    if do_train:
        trans = [
            C.RandomCropDecodeResize(224),
            C.RandomHorizontalFlip(prob=0.5),
            C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4)
        ]
    else:
        trans = [C.Decode(), C.Resize(256), C.CenterCrop(224)]
    trans += [
        toBGR(),
        C.Rescale(1.0 / 255.0, 0.0),
        C.HWC2CHW(),
        C2.TypeCast(mstype.float32)
    ]

    type_cast_op = C2.TypeCast(mstype.int32)
    data_set = data_set.map(operations=trans,
                            input_columns="image",
                            num_parallel_workers=cfg.work_nums)
    data_set = data_set.map(operations=type_cast_op,
                            input_columns="label",
                            num_parallel_workers=cfg.work_nums)
    # apply batch operations
    data_set = data_set.batch(cfg.batch_size, drop_remainder=True)

    return data_set
Exemple #27
0
def create_dataset_imagenet(dataset_path, do_train, cfg, repeat_num=1):
    """
    create a train or eval dataset

    Args:
        dataset_path(string): the path of dataset.
        do_train(bool): whether dataset is used for train or eval.
        cfg (dict): the config for creating dataset.
        repeat_num(int): the repeat times of dataset. Default: 1.

    Returns:
        dataset
    """
    if cfg.group_size == 1:
        data_set = ds.ImageFolderDataset(dataset_path,
                                         num_parallel_workers=cfg.work_nums,
                                         shuffle=True)
    else:
        data_set = ds.ImageFolderDataset(dataset_path,
                                         num_parallel_workers=cfg.work_nums,
                                         shuffle=True,
                                         num_shards=cfg.group_size,
                                         shard_id=cfg.rank)
    # define map operations
    if do_train:
        trans = [
            C.RandomCropDecodeResize(299,
                                     scale=(0.08, 1.0),
                                     ratio=(0.75, 1.333)),
            C.RandomHorizontalFlip(prob=0.5),
            C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4)
        ]
    else:
        trans = [C.Decode(), C.Resize(299), C.CenterCrop(299)]
    trans += [
        C.Rescale(1.0 / 255.0, 0.0),
        C.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        C.HWC2CHW()
    ]
    type_cast_op = C2.TypeCast(mstype.int32)
    data_set = data_set.map(operations=trans,
                            input_columns="image",
                            num_parallel_workers=cfg.work_nums)
    data_set = data_set.map(operations=type_cast_op,
                            input_columns="label",
                            num_parallel_workers=cfg.work_nums)
    # apply batch operations
    data_set = data_set.batch(cfg.batch_size, drop_remainder=True)
    # apply dataset repeat operation
    data_set = data_set.repeat(repeat_num)
    return data_set
Exemple #28
0
def test_mixup_batch_success2(plot=False):
    """
    Test MixUpBatch op with specified alpha parameter on ImageFolderDataset
    """
    logger.info("test_mixup_batch_success2")

    # Original Images
    ds_original = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False)
    decode_op = vision.Decode()
    ds_original = ds_original.map(operations=[decode_op],
                                  input_columns=["image"])
    ds_original = ds_original.batch(4, pad_info={}, drop_remainder=True)

    images_original = None
    for idx, (image, _) in enumerate(ds_original):
        if idx == 0:
            images_original = image.asnumpy()
        else:
            images_original = np.append(images_original,
                                        image.asnumpy(),
                                        axis=0)

    # MixUp Images
    data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False)

    decode_op = vision.Decode()
    data1 = data1.map(operations=[decode_op], input_columns=["image"])

    one_hot_op = data_trans.OneHot(num_classes=10)
    data1 = data1.map(operations=one_hot_op, input_columns=["label"])

    mixup_batch_op = vision.MixUpBatch(2.0)
    data1 = data1.batch(4, pad_info={}, drop_remainder=True)
    data1 = data1.map(operations=mixup_batch_op,
                      input_columns=["image", "label"])

    images_mixup = None
    for idx, (image, _) in enumerate(data1):
        if idx == 0:
            images_mixup = image.asnumpy()
        else:
            images_mixup = np.append(images_mixup, image.asnumpy(), axis=0)
    if plot:
        visualize_list(images_original, images_mixup)

    num_samples = images_original.shape[0]
    mse = np.zeros(num_samples)
    for i in range(num_samples):
        mse[i] = diff_mse(images_mixup[i], images_original[i])
    logger.info("MSE= {}".format(str(np.mean(mse))))
Exemple #29
0
def test_equalize_c(plot=False):
    """
    Test Equalize Cpp op
    """
    logger.info("Test Equalize cpp op")

    # Original Images
    data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)

    transforms_original = [C.Decode(), C.Resize(size=[224, 224])]

    ds_original = data_set.map(operations=transforms_original,
                               input_columns="image")

    ds_original = ds_original.batch(512)

    for idx, (image, _) in enumerate(ds_original):
        if idx == 0:
            images_original = image.asnumpy()
        else:
            images_original = np.append(images_original,
                                        image.asnumpy(),
                                        axis=0)

    # Equalize Images
    data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)

    transform_equalize = [C.Decode(), C.Resize(size=[224, 224]), C.Equalize()]

    ds_equalize = data_set.map(operations=transform_equalize,
                               input_columns="image")

    ds_equalize = ds_equalize.batch(512)

    for idx, (image, _) in enumerate(ds_equalize):
        if idx == 0:
            images_equalize = image.asnumpy()
        else:
            images_equalize = np.append(images_equalize,
                                        image.asnumpy(),
                                        axis=0)
    if plot:
        visualize_list(images_original, images_equalize)

    num_samples = images_original.shape[0]
    mse = np.zeros(num_samples)
    for i in range(num_samples):
        mse[i] = diff_mse(images_equalize[i], images_original[i])
    logger.info("MSE= {}".format(str(np.mean(mse))))
Exemple #30
0
def test_imagefolder_exception():
    logger.info("Test imagefolder exception")

    def exception_func(item):
        raise Exception("Error occur!")

    def exception_func2(image, label):
        raise Exception("Error occur!")

    try:
        data = ds.ImageFolderDataset(DATA_DIR)
        data = data.map(operations=exception_func,
                        input_columns=["image"],
                        num_parallel_workers=1)
        for _ in data.__iter__():
            pass
        assert False
    except RuntimeError as e:
        assert "map operation: [PyFunc] failed. The corresponding data files" in str(
            e)

    try:
        data = ds.ImageFolderDataset(DATA_DIR)
        data = data.map(operations=exception_func2,
                        input_columns=["image", "label"],
                        output_columns=["image", "label", "label1"],
                        column_order=["image", "label", "label1"],
                        num_parallel_workers=1)
        for _ in data.__iter__():
            pass
        assert False
    except RuntimeError as e:
        assert "map operation: [PyFunc] failed. The corresponding data files" in str(
            e)

    try:
        data = ds.ImageFolderDataset(DATA_DIR)
        data = data.map(operations=vision.Decode(),
                        input_columns=["image"],
                        num_parallel_workers=1)
        data = data.map(operations=exception_func,
                        input_columns=["image"],
                        num_parallel_workers=1)
        for _ in data.__iter__():
            pass
        assert False
    except RuntimeError as e:
        assert "map operation: [PyFunc] failed. The corresponding data files" in str(
            e)