Ejemplo n.º 1
0
def test_auto_contrast_invalid_cutoff_param_c():
    """
    Test AutoContrast C Op with invalid cutoff parameter
    """
    logger.info("Test AutoContrast C Op with invalid cutoff parameter")
    try:
        ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False)
        ds = ds.map(input_columns=["image"],
                    operations=[
                        C.Decode(),
                        C.Resize((224, 224)),
                        lambda img: np.array(img[:, :, 0])
                    ])
        # invalid ignore
        ds = ds.map(input_columns="image",
                    operations=C.AutoContrast(cutoff=-10.0))
    except ValueError as error:
        logger.info("Got an exception in DE: {}".format(str(error)))
        assert "Input cutoff is not within the required interval of (0 to 100)." in str(
            error)
    try:
        ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False)
        ds = ds.map(input_columns=["image"],
                    operations=[
                        C.Decode(),
                        C.Resize((224, 224)),
                        lambda img: np.array(img[:, :, 0])
                    ])
        # invalid ignore
        ds = ds.map(input_columns="image",
                    operations=C.AutoContrast(cutoff=120.0))
    except ValueError as error:
        logger.info("Got an exception in DE: {}".format(str(error)))
        assert "Input cutoff is not within the required interval of (0 to 100)." in str(
            error)
Ejemplo n.º 2
0
def test_random_sharpness_invalid_params():
    """
    Test RandomSharpness with invalid input parameters.
    """
    logger.info("Test RandomSharpness with invalid input parameters.")
    try:
        data = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False)
        data = data.map(input_columns=["image"],
                        operations=[C.Decode(),
                                    C.Resize((224, 224)),
                                    C.RandomSharpness(10)])
    except TypeError as error:
        logger.info("Got an exception in DE: {}".format(str(error)))
        assert "tuple" in str(error)

    try:
        data = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False)
        data = data.map(input_columns=["image"],
                        operations=[C.Decode(),
                                    C.Resize((224, 224)),
                                    C.RandomSharpness((-10, 10))])
    except ValueError as error:
        logger.info("Got an exception in DE: {}".format(str(error)))
        assert "interval" in str(error)

    try:
        data = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False)
        data = data.map(input_columns=["image"],
                        operations=[C.Decode(),
                                    C.Resize((224, 224)),
                                    C.RandomSharpness((10, 5))])
    except ValueError as error:
        logger.info("Got an exception in DE: {}".format(str(error)))
        assert "(min,max)" in str(error)
Ejemplo n.º 3
0
def test_auto_contrast_invalid_ignore_param_c():
    """
    Test AutoContrast C Op with invalid ignore parameter
    """
    logger.info("Test AutoContrast C Op with invalid ignore parameter")
    try:
        ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False)
        ds = ds.map(input_columns=["image"],
                    operations=[
                        C.Decode(),
                        C.Resize((224, 224)),
                        lambda img: np.array(img[:, :, 0])
                    ])
        # invalid ignore
        ds = ds.map(input_columns="image",
                    operations=C.AutoContrast(ignore=255.5))
    except TypeError as error:
        logger.info("Got an exception in DE: {}".format(str(error)))
        assert "Argument ignore with value 255.5 is not of type" in str(error)
    try:
        ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False)
        ds = ds.map(input_columns=["image"],
                    operations=[
                        C.Decode(),
                        C.Resize((224, 224)),
                        lambda img: np.array(img[:, :, 0])
                    ])
        # invalid ignore
        ds = ds.map(input_columns="image",
                    operations=C.AutoContrast(ignore=(10, 100)))
    except TypeError as error:
        logger.info("Got an exception in DE: {}".format(str(error)))
        assert "Argument ignore with value (10,100) is not of type" in str(
            error)
Ejemplo n.º 4
0
def test_cpp_uniform_augment(plot=False, num_ops=2):
    """
    Test UniformAugment
    """
    logger.info("Test CPP UniformAugment")

    # Original Images
    ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False)

    transforms_original = [C.Decode(), C.Resize(size=[224, 224]),
                           F.ToTensor()]

    ds_original = ds.map(input_columns="image",
                         operations=transforms_original)

    ds_original = ds_original.batch(512)

    for idx, (image, _) in enumerate(ds_original):
        if idx == 0:
            images_original = np.transpose(image, (0, 2, 3, 1))
        else:
            images_original = np.append(images_original,
                                        np.transpose(image, (0, 2, 3, 1)),
                                        axis=0)

    # UniformAugment Images
    ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False)
    transforms_ua = [C.RandomCrop(size=[224, 224], padding=[32, 32, 32, 32]),
                     C.RandomHorizontalFlip(),
                     C.RandomVerticalFlip(),
                     C.RandomColorAdjust(),
                     C.RandomRotation(degrees=45)]

    uni_aug = C.UniformAugment(operations=transforms_ua, num_ops=num_ops)

    transforms_all = [C.Decode(), C.Resize(size=[224, 224]),
                      uni_aug,
                      F.ToTensor()]

    ds_ua = ds.map(input_columns="image",
                   operations=transforms_all, num_parallel_workers=1)

    ds_ua = ds_ua.batch(512)

    for idx, (image, _) in enumerate(ds_ua):
        if idx == 0:
            images_ua = np.transpose(image, (0, 2, 3, 1))
        else:
            images_ua = np.append(images_ua,
                                  np.transpose(image, (0, 2, 3, 1)),
                                  axis=0)
    if plot:
        visualize_list(images_original, images_ua)

    num_samples = images_original.shape[0]
    mse = np.zeros(num_samples)
    for i in range(num_samples):
        mse[i] = diff_mse(images_ua[i], images_original[i])
    logger.info("MSE= {}".format(str(np.mean(mse))))
Ejemplo n.º 5
0
def test_random_sharpness_c_py(degrees=(1.0, 1.0), plot=False):
    """
    Test Random Sharpness C and python Op
    """
    logger.info("Test RandomSharpness C and python Op")

    # RandomSharpness Images
    data = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False)
    data = data.map(input_columns=["image"],
                    operations=[C.Decode(), C.Resize((200, 300))])

    python_op = F.RandomSharpness(degrees)
    c_op = C.RandomSharpness(degrees)

    transforms_op = F.ComposeOp(
        [lambda img: F.ToPIL()(img.astype(np.uint8)), python_op, np.array])()

    ds_random_sharpness_py = data.map(input_columns="image",
                                      operations=transforms_op)

    ds_random_sharpness_py = ds_random_sharpness_py.batch(512)

    for idx, (image, _) in enumerate(ds_random_sharpness_py):
        if idx == 0:
            images_random_sharpness_py = image

        else:
            images_random_sharpness_py = np.append(images_random_sharpness_py,
                                                   image,
                                                   axis=0)

    data = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False)
    data = data.map(input_columns=["image"],
                    operations=[C.Decode(), C.Resize((200, 300))])

    ds_images_random_sharpness_c = data.map(input_columns="image",
                                            operations=c_op)

    ds_images_random_sharpness_c = ds_images_random_sharpness_c.batch(512)

    for idx, (image, _) in enumerate(ds_images_random_sharpness_c):
        if idx == 0:
            images_random_sharpness_c = image

        else:
            images_random_sharpness_c = np.append(images_random_sharpness_c,
                                                  image,
                                                  axis=0)

    num_samples = images_random_sharpness_c.shape[0]
    mse = np.zeros(num_samples)
    for i in range(num_samples):
        mse[i] = diff_mse(images_random_sharpness_c[i],
                          images_random_sharpness_py[i])
    logger.info("MSE= {}".format(str(np.mean(mse))))
    if plot:
        visualize_list(images_random_sharpness_c,
                       images_random_sharpness_py,
                       visualize_mode=2)
Ejemplo n.º 6
0
def test_random_sharpness_c(degrees=(1.6, 1.6), plot=False):
    """
    Test RandomSharpness cpp op
    """
    print(degrees)
    logger.info("Test RandomSharpness cpp op")

    # Original Images
    data = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False)

    transforms_original = [C.Decode(),
                           C.Resize((224, 224))]

    ds_original = data.map(input_columns="image",
                           operations=transforms_original)

    ds_original = ds_original.batch(512)

    for idx, (image, _) in enumerate(ds_original):
        if idx == 0:
            images_original = image
        else:
            images_original = np.append(images_original,
                                        image,
                                        axis=0)

            # Random Sharpness Adjusted Images
    data = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False)

    c_op = C.RandomSharpness()
    if degrees is not None:
        c_op = C.RandomSharpness(degrees)

    transforms_random_sharpness = [C.Decode(),
                                   C.Resize((224, 224)),
                                   c_op]

    ds_random_sharpness = data.map(input_columns="image",
                                   operations=transforms_random_sharpness)

    ds_random_sharpness = ds_random_sharpness.batch(512)

    for idx, (image, _) in enumerate(ds_random_sharpness):
        if idx == 0:
            images_random_sharpness = image
        else:
            images_random_sharpness = np.append(images_random_sharpness,
                                                image,
                                                axis=0)

    num_samples = images_original.shape[0]
    mse = np.zeros(num_samples)
    for i in range(num_samples):
        mse[i] = diff_mse(images_random_sharpness[i], images_original[i])

    logger.info("MSE= {}".format(str(np.mean(mse))))

    if plot:
        visualize_list(images_original, images_random_sharpness)
Ejemplo n.º 7
0
def test_auto_contrast_c(plot=False):
    """
    Test AutoContrast C Op
    """
    logger.info("Test AutoContrast C Op")

    # AutoContrast Images
    ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False)
    ds = ds.map(input_columns=["image"],
                operations=[C.Decode(), C.Resize((224, 224))])
    python_op = F.AutoContrast()
    c_op = C.AutoContrast()
    transforms_op = F.ComposeOp(
        [lambda img: F.ToPIL()(img.astype(np.uint8)), python_op, np.array])()

    ds_auto_contrast_py = ds.map(input_columns="image",
                                 operations=transforms_op)

    ds_auto_contrast_py = ds_auto_contrast_py.batch(512)

    for idx, (image, _) in enumerate(ds_auto_contrast_py):
        if idx == 0:
            images_auto_contrast_py = image
        else:
            images_auto_contrast_py = np.append(images_auto_contrast_py,
                                                image,
                                                axis=0)

    ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False)
    ds = ds.map(input_columns=["image"],
                operations=[C.Decode(), C.Resize((224, 224))])

    ds_auto_contrast_c = ds.map(input_columns="image", operations=c_op)

    ds_auto_contrast_c = ds_auto_contrast_c.batch(512)

    for idx, (image, _) in enumerate(ds_auto_contrast_c):
        if idx == 0:
            images_auto_contrast_c = image
        else:
            images_auto_contrast_c = np.append(images_auto_contrast_c,
                                               image,
                                               axis=0)

    num_samples = images_auto_contrast_c.shape[0]
    mse = np.zeros(num_samples)
    for i in range(num_samples):
        mse[i] = diff_mse(images_auto_contrast_c[i],
                          images_auto_contrast_py[i])
    logger.info("MSE= {}".format(str(np.mean(mse))))
    np.testing.assert_equal(np.mean(mse), 0.0)

    if plot:
        visualize_list(images_auto_contrast_c,
                       images_auto_contrast_py,
                       visualize_mode=2)
Ejemplo n.º 8
0
def test_invert_py_c(plot=False):
    """
    Test Invert Cpp op and python op
    """
    logger.info("Test Invert cpp and python op")

    # Invert Images in cpp
    ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False)
    ds = ds.map(input_columns=["image"],
                operations=[C.Decode(), C.Resize((224, 224))])

    ds_c_invert = ds.map(input_columns="image", operations=C.Invert())

    ds_c_invert = ds_c_invert.batch(512)

    for idx, (image, _) in enumerate(ds_c_invert):
        if idx == 0:
            images_c_invert = image
        else:
            images_c_invert = np.append(images_c_invert, image, axis=0)

    # invert images in python
    ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False)
    ds = ds.map(input_columns=["image"],
                operations=[C.Decode(), C.Resize((224, 224))])

    transforms_p_invert = F.ComposeOp(
        [lambda img: img.astype(np.uint8),
         F.ToPIL(),
         F.Invert(), np.array])

    ds_p_invert = ds.map(input_columns="image",
                         operations=transforms_p_invert())

    ds_p_invert = ds_p_invert.batch(512)

    for idx, (image, _) in enumerate(ds_p_invert):
        if idx == 0:
            images_p_invert = image
        else:
            images_p_invert = np.append(images_p_invert, image, axis=0)

    num_samples = images_c_invert.shape[0]
    mse = np.zeros(num_samples)
    for i in range(num_samples):
        mse[i] = diff_mse(images_p_invert[i], images_c_invert[i])
    logger.info("MSE= {}".format(str(np.mean(mse))))

    if plot:
        visualize_list(images_c_invert, images_p_invert, visualize_mode=2)
Ejemplo n.º 9
0
def test_cv_minddataset_repeat_reshuffle(add_and_remove_cv_file):
    """tutorial for cv minddataset."""
    columns_list = ["data", "label"]
    num_readers = 4
    data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers)
    decode_op = vision.Decode()
    data_set = data_set.map(input_columns=["data"],
                            operations=decode_op,
                            num_parallel_workers=2)
    resize_op = vision.Resize((32, 32), interpolation=Inter.LINEAR)
    data_set = data_set.map(input_columns="data",
                            operations=resize_op,
                            num_parallel_workers=2)
    data_set = data_set.batch(2)
    data_set = data_set.repeat(2)
    num_iter = 0
    labels = []
    for item in data_set.create_dict_iterator():
        logger.info(
            "-------------- get dataset size {} -----------------".format(
                num_iter))
        logger.info(
            "-------------- item[label]: {} ---------------------".format(
                item["label"]))
        logger.info(
            "-------------- item[data]: {} ----------------------".format(
                item["data"]))
        num_iter += 1
        labels.append(item["label"])
    assert num_iter == 10
    logger.info("repeat shuffle: {}".format(labels))
    assert len(labels) == 10
    assert labels[0:5] == labels[0:5]
    assert labels[0:5] != labels[5:5]
Ejemplo n.º 10
0
def skip_test_exception():
    ds.config.set_num_parallel_workers(1)
    data = ds.TFRecordDataset(DATA_DIR, columns_list=["image"])
    data = data.map(input_columns=["image"], operations=vision.Resize(100, 100))
    with pytest.raises(RuntimeError) as info:
        data.create_tuple_iterator().get_next()
    assert "The shape size 1 of input tensor is invalid" in str(info.value)
Ejemplo n.º 11
0
def create_mnist_dataset(mnist_dir, num_parallel_workers=1):
    ds = de.MnistDataset(mnist_dir)

    # apply map operations on images
    ds = ds.map(input_columns="label", operations=C.TypeCast(mstype.int32))
    ds = ds.map(input_columns="image",
                operations=VC.Resize(
                    (MNIST_CONFIG.image_height, MNIST_CONFIG.image_width),
                    interpolation=Inter.LINEAR),
                num_parallel_workers=num_parallel_workers)
    ds = ds.map(input_columns="image",
                operations=VC.Rescale(1 / 0.3081, -1 * 0.1307 / 0.3081),
                num_parallel_workers=num_parallel_workers)
    ds = ds.map(input_columns="image",
                operations=VC.Rescale(1.0 / 255.0, 0.0),
                num_parallel_workers=num_parallel_workers)
    ds = ds.map(input_columns="image",
                operations=VC.HWC2CHW(),
                num_parallel_workers=num_parallel_workers)

    # apply DatasetOps
    ds = ds.shuffle(
        buffer_size=MNIST_CONFIG.buffer_size)  # 10000 as in LeNet train script
    ds = ds.batch(MNIST_CONFIG.batch_size, drop_remainder=True)
    ds = ds.repeat(MNIST_CONFIG.repeat_size)

    return ds
Ejemplo n.º 12
0
def test_soft_dvpp_decode_resize_jpeg(plot=False):
    """
    Test SoftDvppDecodeResizeJpeg op
    """
    logger.info("test_random_decode_resize_op")

    # First dataset
    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    decode_op = vision.Decode()
    resize_op = vision.Resize((256, 512))
    data1 = data1.map(input_columns=["image"], operations=[decode_op, resize_op])

    # Second dataset
    data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    soft_dvpp_decode_resize_op = vision.SoftDvppDecodeResizeJpeg((256, 512))
    data2 = data2.map(input_columns=["image"], operations=soft_dvpp_decode_resize_op)

    num_iter = 0
    for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()):
        if num_iter > 0:
            break
        image1 = item1["image"]
        image2 = item2["image"]
        mse = diff_mse(image1, image2)
        assert mse <= 0.02
        logger.info("random_crop_decode_resize_op_{}, mse: {}".format(num_iter + 1, mse))
        if plot:
            visualize_image(image1, image2, mse)
        num_iter += 1
Ejemplo n.º 13
0
    def test_resize_md5_parameters(test_name, size, filename, seed, plot):
        """
        Test Resize with md5 check
        """
        logger.info("Test Resize with md5 check: {0}".format(test_name))
        original_seed = config_get_set_seed(seed)
        original_num_parallel_workers = config_get_set_num_parallel_workers(1)

        # Generate dataset
        data1 = ds.TFRecordDataset(DATA_DIR,
                                   SCHEMA_DIR,
                                   columns_list=["image"],
                                   shuffle=False)
        decode_op = vision.Decode()
        resize_op = vision.Resize(size)
        data1 = data1.map(input_columns=["image"], operations=decode_op)
        data2 = data1.map(input_columns=["image"], operations=resize_op)
        image_original = []
        image_resized = []
        # Compare with expected md5 from images
        save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN)

        for item1, item2 in zip(data1.create_dict_iterator(),
                                data2.create_dict_iterator()):
            image_1 = item1["image"]
            image_2 = item2["image"]
            image_original.append(image_1)
            image_resized.append(image_2)
        if plot:
            visualize_list(image_original, image_resized)

        # Restore configuration
        ds.config.set_seed(original_seed)
        ds.config.set_num_parallel_workers(original_num_parallel_workers)
Ejemplo n.º 14
0
    def test_resize_op_parameters(test_name, size, plot):
        """
        Test resize_op
        """
        logger.info("Test resize: {0}".format(test_name))
        data1 = ds.TFRecordDataset(DATA_DIR,
                                   SCHEMA_DIR,
                                   columns_list=["image"],
                                   shuffle=False)

        # define map operations
        decode_op = vision.Decode()
        resize_op = vision.Resize(size)

        # apply map operations on images
        data1 = data1.map(input_columns=["image"], operations=decode_op)

        data2 = data1.map(input_columns=["image"], operations=resize_op)
        image_original = []
        image_resized = []
        for item1, item2 in zip(data1.create_dict_iterator(),
                                data2.create_dict_iterator()):
            image_1 = item1["image"]
            image_2 = item2["image"]
            image_original.append(image_1)
            image_resized.append(image_2)
        if plot:
            visualize_list(image_original, image_resized)
Ejemplo n.º 15
0
def test_cv_minddataset_reader_basic_tutorial_5_epoch_with_batch(
        add_and_remove_cv_file):
    """tutorial for cv minderdataset."""
    columns_list = ["data", "file_name", "label"]
    num_readers = 4
    data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers)

    resize_height = 32
    resize_width = 32

    # define map operations
    decode_op = vision.Decode()
    resize_op = vision.Resize((resize_height, resize_width),
                              ds.transforms.vision.Inter.LINEAR)

    data_set = data_set.map(input_columns=["data"],
                            operations=decode_op,
                            num_parallel_workers=4)
    data_set = data_set.map(input_columns=["data"],
                            operations=resize_op,
                            num_parallel_workers=4)

    data_set = data_set.batch(2)
    assert data_set.get_dataset_size() == 5
    for epoch in range(5):
        num_iter = 0
        for data in data_set:
            logger.info("data is {}".format(data))
            num_iter += 1
        assert num_iter == 5

        data_set.reset()
Ejemplo n.º 16
0
def test_exception_02():
    """
    Test exceptions with invalid input, and test valid input
    """
    logger.info("test_exception_02")
    num_samples = -1
    with pytest.raises(ValueError) as info:
        ds.TFRecordDataset(DATA_DIR,
                           SCHEMA_DIR,
                           columns_list=["image"],
                           num_samples=num_samples)
    assert 'Input num_samples is not within the required interval of (0 to 2147483647).' in str(
        info.value)

    num_samples = 1
    data = ds.TFRecordDataset(DATA_DIR,
                              SCHEMA_DIR,
                              columns_list=["image"],
                              num_samples=num_samples)
    data = data.map(input_columns=["image"], operations=vision.Decode())
    data = data.map(input_columns=["image"],
                    operations=vision.Resize((100, 100)))
    # Confirm 1 sample in dataset
    assert sum([1 for _ in data]) == 1
    num_iters = 0
    for _ in data.create_dict_iterator():
        num_iters += 1
    assert num_iters == 1
Ejemplo n.º 17
0
def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"):
    """
    create a train or eval imagenet2012 dataset for resnet50

    Args:
        dataset_path(string): the path of dataset.
        do_train(bool): whether dataset is used for train or eval.
        repeat_num(int): the repeat times of dataset. Default: 1
        batch_size(int): the batch size of dataset. Default: 32
        target(str): the device target. Default: Ascend
    Returns:
        dataset
    """

    if target == "Ascend":
        device_num, rank_id = _get_rank_info()
    else:
        init("nccl")
        rank_id = get_rank()
        device_num = get_group_size()

    if device_num == 1:
        ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True)
    else:
        ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True,
                                     num_shards=device_num, shard_id=rank_id)

    image_size = 224
    mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
    std = [0.229 * 255, 0.224 * 255, 0.225 * 255]

    # define map operations
    if do_train:
        trans = [
            C.RandomCropDecodeResize(image_size, scale=(0.08, 1.0), ratio=(0.75, 1.333)),
            C.RandomHorizontalFlip(prob=0.5),
            C.Normalize(mean=mean, std=std),
            C.HWC2CHW()
        ]
    else:
        trans = [
            C.Decode(),
            C.Resize(256),
            C.CenterCrop(image_size),
            C.Normalize(mean=mean, std=std),
            C.HWC2CHW()
        ]

    type_cast_op = C2.TypeCast(mstype.int32)

    ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans)
    ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op)

    # apply batch operations
    ds = ds.batch(batch_size, drop_remainder=True)

    # apply dataset repeat operation
    ds = ds.repeat(repeat_num)

    return ds
Ejemplo n.º 18
0
def test_cv_minddataset_batch_size_larger_than_records(add_and_remove_cv_file):
    """tutorial for cv minddataset."""
    columns_list = ["data", "label"]
    num_readers = 4
    data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers)
    decode_op = vision.Decode()
    data_set = data_set.map(input_columns=["data"],
                            operations=decode_op,
                            num_parallel_workers=2)
    resize_op = vision.Resize((32, 32), interpolation=Inter.LINEAR)
    data_set = data_set.map(input_columns="data",
                            operations=resize_op,
                            num_parallel_workers=2)
    data_set = data_set.batch(32, drop_remainder=True)
    num_iter = 0
    for item in data_set.create_dict_iterator():
        logger.info(
            "-------------- get dataset size {} -----------------".format(
                num_iter))
        logger.info(
            "-------------- item[label]: {} ---------------------".format(
                item["label"]))
        logger.info(
            "-------------- item[data]: {} ----------------------".format(
                item["data"]))
        num_iter += 1
    assert num_iter == 0
Ejemplo n.º 19
0
def test_me_de_train_dataset():
    data_list = ["{0}/train-00001-of-01024.data".format(data_path)]
    data_set_new = ds.TFRecordDataset(
        data_list,
        schema=SCHEMA_DIR,
        columns_list=["image/encoded", "image/class/label"])

    resize_height = 224
    resize_width = 224
    rescale = 1.0 / 255.0
    shift = 0.0

    # define map operations

    decode_op = vision.Decode()
    resize_op = vision.Resize((resize_height, resize_width),
                              Inter.LINEAR)  # Bilinear as default
    rescale_op = vision.Rescale(rescale, shift)

    # apply map operations on images
    data_set_new = data_set_new.map(input_columns="image/encoded",
                                    operations=decode_op)
    data_set_new = data_set_new.map(input_columns="image/encoded",
                                    operations=resize_op)
    data_set_new = data_set_new.map(input_columns="image/encoded",
                                    operations=rescale_op)
    hwc2chw_op = vision.HWC2CHW()
    data_set_new = data_set_new.map(input_columns="image/encoded",
                                    operations=hwc2chw_op)
    data_set_new = data_set_new.repeat(1)
    # apply batch operations
    batch_size_new = 32
    data_set_new = data_set_new.batch(batch_size_new, drop_remainder=True)
    return data_set_new
Ejemplo n.º 20
0
def create_dataset(data_path, batch_size=32, repeat_size=1, mode="train"):
    """
    create dataset for train or test
    """
    cifar_ds = ds.Cifar10Dataset(data_path)
    rescale = 1.0 / 255.0
    shift = 0.0

    resize_op = CV.Resize((cfg.image_height, cfg.image_width))
    rescale_op = CV.Rescale(rescale, shift)
    normalize_op = CV.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
    if mode == "train":
        random_crop_op = CV.RandomCrop([32, 32], [4, 4, 4, 4])
        random_horizontal_op = CV.RandomHorizontalFlip()
    channel_swap_op = CV.HWC2CHW()
    typecast_op = C.TypeCast(mstype.int32)
    cifar_ds = cifar_ds.map(input_columns="label", operations=typecast_op)
    if mode == "train":
        cifar_ds = cifar_ds.map(input_columns="image", operations=random_crop_op)
        cifar_ds = cifar_ds.map(input_columns="image", operations=random_horizontal_op)
    cifar_ds = cifar_ds.map(input_columns="image", operations=resize_op)
    cifar_ds = cifar_ds.map(input_columns="image", operations=rescale_op)
    cifar_ds = cifar_ds.map(input_columns="image", operations=normalize_op)
    cifar_ds = cifar_ds.map(input_columns="image", operations=channel_swap_op)

    cifar_ds = cifar_ds.shuffle(buffer_size=cfg.buffer_size)
    cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True)
    cifar_ds = cifar_ds.repeat(repeat_size)
    return cifar_ds
Ejemplo n.º 21
0
def test_case_3():
    """
    Test Repeat & Batch
    """
    data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
    # define data augmentation parameters
    resize_height, resize_width = 224, 224

    # define map operations
    decode_op = vision.Decode()
    resize_op = vision.Resize((resize_height, resize_width))

    # apply map operations on images
    data = data.map(input_columns=["image"], operations=decode_op)
    data = data.map(input_columns=["image"], operations=resize_op)

    data = data.repeat(2)

    batch_size = 2
    data = data.batch(batch_size, drop_remainder=True)

    data = data.device_que()
    data.send()
    time.sleep(0.1)
    data.stop_send()
Ejemplo n.º 22
0
def test_exception_02():
    """
    Test multiple exceptions with invalid input
    """
    logger.info("test_exception_02")
    num_samples = 0
    with pytest.raises(ValueError) as info:
        data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_samples=num_samples)
    assert "num_samples must be greater than 0" in str(info.value)

    num_samples = -1
    with pytest.raises(ValueError) as info:
        data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_samples=num_samples)
    assert "num_samples must be greater than 0" in str(info.value)

    num_samples = 1
    data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], num_samples=num_samples)
    data = data.map(input_columns=["image"], operations=vision.Decode())
    data = data.map(input_columns=["image"], operations=vision.Resize((100, 100)))
    # Confirm 1 sample in dataset
    assert sum([1 for _ in data]) == 1
    num_iters = 0
    for _ in data.create_dict_iterator():
        num_iters += 1
    assert num_iters == 1
Ejemplo n.º 23
0
def test_sample_exception():
    num_samples = 0
    with pytest.raises(ValueError) as info:
        data = ds.TFRecordDataset(DATA_DIR,
                                  SCHEMA_DIR,
                                  columns_list=["image"],
                                  num_samples=num_samples)
    assert "num_samples must be greater than 0" in str(info.value)
    num_samples = -1
    with pytest.raises(ValueError) as info:
        data = ds.TFRecordDataset(DATA_DIR,
                                  SCHEMA_DIR,
                                  columns_list=["image"],
                                  num_samples=num_samples)
    assert "num_samples must be greater than 0" in str(info.value)
    num_samples = 1
    data = ds.TFRecordDataset(DATA_DIR,
                              SCHEMA_DIR,
                              columns_list=["image"],
                              num_samples=num_samples)
    data = data.map(input_columns=["image"], operations=vision.Decode())
    data = data.map(input_columns=["image"],
                    operations=vision.Resize((100, 100)))
    num_iters = 0
    for item in data.create_dict_iterator():
        num_iters += 1
    assert num_iters == 1
Ejemplo n.º 24
0
def create_dataset(data_path,
                   batch_size=32,
                   repeat_size=1,
                   num_parallel_workers=1):
    """
    create dataset for train or test
    """
    # define dataset
    mnist_ds = ds.MnistDataset(data_path)

    resize_height, resize_width = 32, 32
    rescale = 1.0 / 255.0
    shift = 0.0

    # define map operations
    resize_op = CV.Resize((resize_height, resize_width))  # Bilinear mode
    rescale_op = CV.Rescale(rescale, shift)
    hwc2chw_op = CV.HWC2CHW()

    # apply map operations on images
    mnist_ds = mnist_ds.map(input_columns="image",
                            operations=resize_op,
                            num_parallel_workers=num_parallel_workers)
    mnist_ds = mnist_ds.map(input_columns="image",
                            operations=rescale_op,
                            num_parallel_workers=num_parallel_workers)
    mnist_ds = mnist_ds.map(input_columns="image",
                            operations=hwc2chw_op,
                            num_parallel_workers=num_parallel_workers)

    # apply DatasetOps
    mnist_ds = mnist_ds.batch(batch_size)
    mnist_ds = mnist_ds.repeat(repeat_size)

    return mnist_ds
Ejemplo n.º 25
0
def create_dataset(data_path, batch_size=32, repeat_size=1,
                   num_parallel_workers=1):
    """
    create dataset for train or test
    """
    # define dataset
    mnist_ds = ds.MnistDataset(data_path)

    resize_height, resize_width = 32, 32
    rescale = 1.0 / 255.0
    shift = 0.0
    rescale_nml = 1 / 0.3081
    shift_nml = -1 * 0.1307 / 0.3081

    # define map operations
    resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR)  # Bilinear mode
    rescale_nml_op = CV.Rescale(rescale_nml, shift_nml)
    rescale_op = CV.Rescale(rescale, shift)
    hwc2chw_op = CV.HWC2CHW()
    type_cast_op = C.TypeCast(mstype.int32)

    # apply map operations on images
    mnist_ds = mnist_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers)
    mnist_ds = mnist_ds.map(input_columns="image", operations=resize_op, num_parallel_workers=num_parallel_workers)
    mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_op, num_parallel_workers=num_parallel_workers)
    mnist_ds = mnist_ds.map(input_columns="image", operations=rescale_nml_op, num_parallel_workers=num_parallel_workers)
    mnist_ds = mnist_ds.map(input_columns="image", operations=hwc2chw_op, num_parallel_workers=num_parallel_workers)

    # apply DatasetOps
    buffer_size = 10000
    mnist_ds = mnist_ds.shuffle(buffer_size=buffer_size)  # 10000 as in LeNet train script
    mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True)
    mnist_ds = mnist_ds.repeat(repeat_size)

    return mnist_ds
Ejemplo n.º 26
0
def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
    """
    Create a train or eval dataset.

    Args:
        dataset_path (str): The path of dataset.
        do_train (bool): Whether dataset is used for train or eval.
        repeat_num (int): The repeat times of dataset. Default: 1.
        batch_size (int): The batch size of dataset. Default: 32.

    Returns:
        Dataset.
    """
    if do_train:
        dataset_path = os.path.join(dataset_path, 'train')
        do_shuffle = True
    else:
        dataset_path = os.path.join(dataset_path, 'eval')
        do_shuffle = False

    if device_num == 1 or not do_train:
        ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=do_shuffle)
    else:
        ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=do_shuffle,
                               num_shards=device_num, shard_id=device_id)

    resize_height = 224
    resize_width = 224
    buffer_size = 100
    rescale = 1.0 / 255.0
    shift = 0.0

    # define map operations
    random_crop_op = C.RandomCrop((32, 32), (4, 4, 4, 4))
    random_horizontal_flip_op = C.RandomHorizontalFlip(device_id / (device_id + 1))

    resize_op = C.Resize((resize_height, resize_width))
    rescale_op = C.Rescale(rescale, shift)
    normalize_op = C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])

    change_swap_op = C.HWC2CHW()

    trans = []
    if do_train:
        trans += [random_crop_op, random_horizontal_flip_op]

    trans += [resize_op, rescale_op, normalize_op, change_swap_op]

    type_cast_op = C2.TypeCast(mstype.int32)

    ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op)
    ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans)

    # apply batch operations
    ds = ds.batch(batch_size, drop_remainder=True)

    # apply dataset repeat operation
    ds = ds.repeat(repeat_num)

    return ds
def test_mix_up_multi():
    """
    Test multi batch mix up op
    """
    logger.info("Test several batch mix up op")

    resize_height = 224
    resize_width = 224

    # Create dataset and define map operations
    ds1 = ds.ImageFolderDatasetV2(DATA_DIR_2)

    num_classes = 3
    decode_op = c_vision.Decode()
    resize_op = c_vision.Resize((resize_height, resize_width),
                                c_vision.Inter.LINEAR)
    one_hot_encode = c.OneHot(num_classes)  # num_classes is input argument

    ds1 = ds1.map(input_columns=["image"], operations=decode_op)
    ds1 = ds1.map(input_columns=["image"], operations=resize_op)
    ds1 = ds1.map(input_columns=["label"], operations=one_hot_encode)

    # apply batch operations
    batch_size = 3
    ds1 = ds1.batch(batch_size, drop_remainder=True)

    ds2 = ds1
    alpha = 0.2
    transforms = [
        py_vision.MixUp(batch_size=batch_size, alpha=alpha, is_single=False)
    ]
    ds1 = ds1.map(input_columns=["image", "label"], operations=transforms)
    num_iter = 0
    batch1_image1 = 0
    for data1, data2 in zip(ds1.create_dict_iterator(),
                            ds2.create_dict_iterator()):
        image1 = data1["image"]
        label1 = data1["label"]
        logger.info("label: {}".format(label1))

        image2 = data2["image"]
        label2 = data2["label"]
        logger.info("label2: {}".format(label2))

        if num_iter == 0:
            batch1_image1 = image1

        if num_iter == 1:
            lam = np.abs(label2 - label1)
            logger.info("lam value in multi: {}".format(lam))
            for index in range(batch_size):
                if np.square(lam[index]).mean() != 0:
                    lam_value = 1 - np.sum(lam[index]) / 2
                    img_golden = lam_value * image2[index] + (
                        1 - lam_value) * batch1_image1[index]
                    assert image1[index].all() == img_golden.all()
                    logger.info("====test several batch mixup ok====")
            break
        num_iter = num_iter + 1
Ejemplo n.º 28
0
def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
    """
    create a train or eval dataset

    Args:
        dataset_path(string): the path of dataset.
        do_train(bool): whether dataset is used for train or eval.
        repeat_num(int): the repeat times of dataset. Default: 1
        batch_size(int): the batch size of dataset. Default: 32

    Returns:
        dataset
    """
    device_num = int(os.getenv("DEVICE_NUM"))
    rank_id = int(os.getenv("RANK_ID"))

    if device_num == 1:
        ds = de.Cifar10Dataset(dataset_path,
                               num_parallel_workers=8,
                               shuffle=True)
    else:
        ds = de.Cifar10Dataset(dataset_path,
                               num_parallel_workers=8,
                               shuffle=True,
                               num_shards=device_num,
                               shard_id=rank_id)

    # define map operations
    trans = []
    if do_train:
        trans += [
            C.RandomCrop((32, 32), (4, 4, 4, 4)),
            C.RandomHorizontalFlip(prob=0.5)
        ]

    trans += [
        C.Resize((config.image_height, config.image_width)),
        C.Rescale(1.0 / 255.0, 0.0),
        C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]),
        C.HWC2CHW()
    ]

    type_cast_op = C2.TypeCast(mstype.int32)

    ds = ds.map(input_columns="label",
                num_parallel_workers=8,
                operations=type_cast_op)
    ds = ds.map(input_columns="image",
                num_parallel_workers=8,
                operations=trans)

    # apply batch operations
    ds = ds.batch(batch_size, drop_remainder=True)

    # apply dataset repeat operation
    ds = ds.repeat(repeat_num)

    return ds
Ejemplo n.º 29
0
def create_dataset(dataset_path, do_train, config, device_target, repeat_num=1, batch_size=32):
    """
    create a train or eval dataset

    Args:
        dataset_path(string): the path of dataset.
        do_train(bool): whether dataset is used for train or eval.
        repeat_num(int): the repeat times of dataset. Default: 1
        batch_size(int): the batch size of dataset. Default: 32

    Returns:
        dataset
    """
    if device_target == "GPU":
        if do_train:
            from mindspore.communication.management import get_rank, get_group_size
            ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True,
                                         num_shards=get_group_size(), shard_id=get_rank())
        else:
            ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True)
    else:
        raise ValueError("Unsupported device_target.")

    resize_height = config.image_height
    resize_width = config.image_width
    buffer_size = 1000

    # define map operations
    decode_op = C.Decode()
    resize_crop_op = C.RandomCropDecodeResize(resize_height, scale=(0.08, 1.0), ratio=(0.75, 1.333))
    horizontal_flip_op = C.RandomHorizontalFlip(prob=0.5)

    resize_op = C.Resize(256)
    center_crop = C.CenterCrop(resize_width)
    rescale_op = C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4)
    normalize_op = C.Normalize(mean=[0.485*255, 0.456*255, 0.406*255], std=[0.229*255, 0.224*255, 0.225*255])
    change_swap_op = C.HWC2CHW()

    if do_train:
        trans = [resize_crop_op, horizontal_flip_op, rescale_op, normalize_op, change_swap_op]
    else:
        trans = [decode_op, resize_op, center_crop, normalize_op, change_swap_op]

    type_cast_op = C2.TypeCast(mstype.int32)

    ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=8)
    ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8)

    # apply shuffle operations
    ds = ds.shuffle(buffer_size=buffer_size)

    # apply batch operations
    ds = ds.batch(batch_size, drop_remainder=True)

    # apply dataset repeat operation
    ds = ds.repeat(repeat_num)

    return ds
Ejemplo n.º 30
0
def create_dataset(data_path,
                   batch_size=32,
                   repeat_size=1,
                   num_parallel_workers=1):
    """ create dataset for train or test
    Args:
        data_path: Data path
        batch_size: The number of data records in each group
        repeat_size: The number of replicated data records
        num_parallel_workers: The number of parallel workers
    """
    # define dataset
    mnist_ds = ds.MnistDataset(data_path)

    # define operation parameters
    resize_height, resize_width = 32, 32
    rescale = 1.0 / 255.0
    shift = 0.0
    rescale_nml = 1 / 0.3081
    shift_nml = -1 * 0.1307 / 0.3081

    # define map operations
    resize_op = CV.Resize(
        (resize_height, resize_width),
        interpolation=Inter.LINEAR)  # Resize images to (32, 32)
    rescale_nml_op = CV.Rescale(rescale_nml, shift_nml)  # normalize images
    rescale_op = CV.Rescale(rescale, shift)  # rescale images
    hwc2chw_op = CV.HWC2CHW(
    )  # change shape from (height, width, channel) to (channel, height, width) to fit network.
    type_cast_op = C.TypeCast(
        mstype.int32)  # change data type of label to int32 to fit network

    # apply map operations on images
    mnist_ds = mnist_ds.map(input_columns="label",
                            operations=type_cast_op,
                            num_parallel_workers=num_parallel_workers)
    mnist_ds = mnist_ds.map(input_columns="image",
                            operations=resize_op,
                            num_parallel_workers=num_parallel_workers)
    mnist_ds = mnist_ds.map(input_columns="image",
                            operations=rescale_op,
                            num_parallel_workers=num_parallel_workers)
    mnist_ds = mnist_ds.map(input_columns="image",
                            operations=rescale_nml_op,
                            num_parallel_workers=num_parallel_workers)
    mnist_ds = mnist_ds.map(input_columns="image",
                            operations=hwc2chw_op,
                            num_parallel_workers=num_parallel_workers)

    # apply DatasetOps
    buffer_size = 10000
    mnist_ds = mnist_ds.shuffle(
        buffer_size=buffer_size)  # 10000 as in LeNet train script
    mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True)
    mnist_ds = mnist_ds.repeat(repeat_size)

    return mnist_ds