def create_dataset(repeat_num=1, batch_size=32, rank_id=0, rank_size=1):
    resize_height = 224
    resize_width = 224
    rescale = 1.0 / 255.0
    shift = 0.0

    # get rank_id and rank_size
    rank_id = get_rank()
    rank_size = get_group_size()
    data_set = ds.Cifar10Dataset(data_path,
                                 num_shards=rank_size,
                                 shard_id=rank_id)

    # define map operations
    random_crop_op = vision.RandomCrop((32, 32), (4, 4, 4, 4))
    random_horizontal_op = vision.RandomHorizontalFlip()
    resize_op = vision.Resize((resize_height, resize_width))
    rescale_op = vision.Rescale(rescale, shift)
    normalize_op = vision.Normalize((0.4465, 0.4822, 0.4914),
                                    (0.2010, 0.1994, 0.2023))
    changeswap_op = vision.HWC2CHW()
    type_cast_op = C.TypeCast(mstype.int32)

    c_trans = [random_crop_op, random_horizontal_op]
    c_trans += [resize_op, rescale_op, normalize_op, changeswap_op]

    # apply map operations on images
    data_set = data_set.map(input_columns="label", operations=type_cast_op)
    data_set = data_set.map(input_columns="image", operations=c_trans)

    # apply shuffle operations
    data_set = data_set.shuffle(buffer_size=10)

    # apply batch operations
    data_set = data_set.batch(batch_size=batch_size, drop_remainder=True)

    # apply repeat operations
    data_set = data_set.repeat(repeat_num)

    return data_set
Esempio n. 2
0
def test_deterministic_run_fail():
    """
    Test RandomCrop with seed, expected to fail
    """
    logger.info("test_deterministic_run_fail")

    # when we set the seed all operations within our dataset should be deterministic
    ds.config.set_seed(0)
    ds.config.set_num_parallel_workers(1)
    # First dataset
    data1 = ds.TFRecordDataset(DATA_DIR,
                               SCHEMA_DIR,
                               columns_list=["image"],
                               shuffle=False)
    # Assuming we get the same seed on calling constructor, if this op is re-used then result won't be
    # the same in between the two datasets. For example, RandomCrop constructor takes seed (0)
    # outputs a deterministic series of numbers, e,g "a" = [1, 2, 3, 4, 5, 6] <- pretend these are random
    random_crop_op = vision.RandomCrop([512, 512], [200, 200, 200, 200])
    decode_op = vision.Decode()
    data1 = data1.map(input_columns=["image"], operations=decode_op)
    data1 = data1.map(input_columns=["image"], operations=random_crop_op)

    # Second dataset
    data2 = ds.TFRecordDataset(DATA_DIR,
                               SCHEMA_DIR,
                               columns_list=["image"],
                               shuffle=False)
    data2 = data2.map(input_columns=["image"], operations=decode_op)
    # If seed is set up on constructor
    data2 = data2.map(input_columns=["image"], operations=random_crop_op)

    try:
        for item1, item2 in zip(data1.create_dict_iterator(),
                                data2.create_dict_iterator()):
            np.testing.assert_equal(item1["image"], item2["image"])

    except BaseException as e:
        # two datasets split the number out of the sequence a
        logger.info("Got an exception in DE: {}".format(str(e)))
        assert "Array" in str(e)
Esempio n. 3
0
def test_random_crop_comp(plot=False):
    """
    Test RandomCrop and compare between python and c image augmentation
    """
    logger.info("Test RandomCrop with c_transform and py_transform comparison")
    cropped_size = 512

    # First dataset
    data1 = ds.TFRecordDataset(DATA_DIR,
                               SCHEMA_DIR,
                               columns_list=["image"],
                               shuffle=False)
    random_crop_op = c_vision.RandomCrop(cropped_size)
    decode_op = c_vision.Decode()
    data1 = data1.map(input_columns=["image"], operations=decode_op)
    data1 = data1.map(input_columns=["image"], operations=random_crop_op)

    # Second dataset
    data2 = ds.TFRecordDataset(DATA_DIR,
                               SCHEMA_DIR,
                               columns_list=["image"],
                               shuffle=False)
    transforms = [
        py_vision.Decode(),
        py_vision.RandomCrop(cropped_size),
        py_vision.ToTensor()
    ]
    transform = py_vision.ComposeOp(transforms)
    data2 = data2.map(input_columns=["image"], operations=transform())

    image_c_cropped = []
    image_py_cropped = []
    for item1, item2 in zip(data1.create_dict_iterator(),
                            data2.create_dict_iterator()):
        c_image = item1["image"]
        py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8)
        image_c_cropped.append(c_image)
        image_py_cropped.append(py_image)
    if plot:
        visualize_list(image_c_cropped, image_py_cropped, visualize_mode=2)
Esempio n. 4
0
def test_random_crop_02_c():
    """
    Test RandomCrop op with c_transforms: size is a list/tuple with length 2, expected to pass
    """
    logger.info("test_random_crop_02_c")
    original_seed = config_get_set_seed(0)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # Generate dataset
    data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    # Note: If size is a sequence of length 2, it should be (height, width).
    random_crop_op = c_vision.RandomCrop([512, 375])
    decode_op = c_vision.Decode()
    data = data.map(input_columns=["image"], operations=decode_op)
    data = data.map(input_columns=["image"], operations=random_crop_op)

    filename = "random_crop_02_c_result.npz"
    save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)

    # Restore config setting
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
Esempio n. 5
0
def test_random_crop_01_c():
    """
    Test RandomCrop op with c_transforms: size is a single integer, expected to pass
    """
    logger.info("test_random_crop_01_c")
    original_seed = config_get_set_seed(0)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # Generate dataset
    data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    # Note: If size is an int, a square crop of size (size, size) is returned.
    random_crop_op = c_vision.RandomCrop(512)
    decode_op = c_vision.Decode()
    data = data.map(input_columns=["image"], operations=decode_op)
    data = data.map(input_columns=["image"], operations=random_crop_op)

    filename = "random_crop_01_c_result.npz"
    save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)

    # Restore config setting
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
Esempio n. 6
0
def test_random_crop_03_c():
    """
    Test RandomCrop op with c_transforms: input image size == crop size, expected to pass
    """
    logger.info("test_random_crop_03_c")
    original_seed = config_get_set_seed(0)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # Generate dataset
    data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    # Note: The size of the image is 4032*2268
    random_crop_op = c_vision.RandomCrop([2268, 4032])
    decode_op = c_vision.Decode()
    data = data.map(input_columns=["image"], operations=decode_op)
    data = data.map(input_columns=["image"], operations=random_crop_op)

    filename = "random_crop_03_c_result.npz"
    save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)

    # Restore config setting
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
Esempio n. 7
0
def test_random_crop_08_c():
    """
    Test RandomCrop op with c_transforms: padding_mode is Border.EDGE,
    expected to pass
    """
    logger.info("test_random_crop_08_c")
    ds.config.set_seed(0)
    ds.config.set_num_parallel_workers(1)

    # Generate dataset
    data = ds.TFRecordDataset(DATA_DIR,
                              SCHEMA_DIR,
                              columns_list=["image"],
                              shuffle=False)
    # Note: The padding_mode is Border.EDGE.
    random_crop_op = c_vision.RandomCrop(512, [200, 200, 200, 200],
                                         padding_mode=mode.Border.EDGE)
    decode_op = c_vision.Decode()
    data = data.map(input_columns=["image"], operations=decode_op)
    data = data.map(input_columns=["image"], operations=random_crop_op)

    filename = "random_crop_08_c_result.npz"
    save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)
Esempio n. 8
0
def test_cpp_uniform_augment_exception_pyops(num_ops=2):
    """
    Test UniformAugment invalid op in operations
    """
    logger.info("Test CPP UniformAugment invalid OP exception")

    transforms_ua = [
        C.RandomCrop(size=[224, 224], padding=[32, 32, 32, 32]),
        C.RandomHorizontalFlip(),
        C.RandomVerticalFlip(),
        C.RandomColorAdjust(),
        C.RandomRotation(degrees=45),
        F.Invert()
    ]

    with pytest.raises(TypeError) as e:
        C.UniformAugment(transforms=transforms_ua, num_ops=num_ops)

    logger.info("Got an exception in DE: {}".format(str(e)))
    assert "Argument tensor_ops[5] with value" \
           " <mindspore.dataset.transforms.vision.py_transforms.Invert" in str(e.value)
    assert "is not of type (<class 'mindspore._c_dataengine.TensorOp'>,)" in str(
        e.value)
def test_bounding_box_augment_with_crop_op(plot_vis=False):
    """
    Test BoundingBoxAugment op (passing crop op as transform)
    Prints images side by side with and without Aug applied + bboxes to compare and test
    """
    logger.info("test_bounding_box_augment_with_crop_op")

    original_seed = config_get_set_seed(0)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
    dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)

    # Ratio is set to 0.9 to apply RandomCrop of size (50, 50) on 90% of the bounding boxes.
    test_op = c_vision.BoundingBoxAugment(c_vision.RandomCrop(50), 0.9)

    # map to apply ops
    dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                            output_columns=["image", "annotation"],
                            columns_order=["image", "annotation"],
                            operations=[test_op])

    filename = "bounding_box_augment_crop_c_result.npz"
    save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN)

    unaugSamp, augSamp = [], []

    for unAug, Aug in zip(dataVoc1.create_dict_iterator(), dataVoc2.create_dict_iterator()):
        unaugSamp.append(unAug)
        augSamp.append(Aug)

    if plot_vis:
        visualize_with_bounding_boxes(unaugSamp, augSamp)

    # Restore config setting
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
Esempio n. 10
0
def create_dataset(data_path, batch_size=32, repeat_size=1, status="train"):
    """
    create dataset for train or test
    """

    rank_id = int(os.getenv('DEVICE_ID'))
    device_num = int(os.getenv('RANK_SIZE'))
    
    if device_num == 1:
        cifar_ds = ds.Cifar10Dataset(data_path, num_parallel_workers=8, shuffle=True)
    else:
        cifar_ds = ds.Cifar10Dataset(data_path, num_parallel_workers=8, shuffle=True,
                               num_shards=device_num, shard_id=rank_id)
    rescale = 1.0 / 255.0
    shift = 0.0

    resize_op = CV.Resize((cfg.image_height, cfg.image_width))
    rescale_op = CV.Rescale(rescale, shift)
    normalize_op = CV.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
    if status == "train":
        random_crop_op = CV.RandomCrop([32, 32], [4, 4, 4, 4])
        random_horizontal_op = CV.RandomHorizontalFlip()
    channel_swap_op = CV.HWC2CHW()
    typecast_op = C.TypeCast(mstype.int32)
    cifar_ds = cifar_ds.map(input_columns="label", operations=typecast_op)
    if status == "train":
        cifar_ds = cifar_ds.map(input_columns="image", operations=random_crop_op)
        cifar_ds = cifar_ds.map(input_columns="image", operations=random_horizontal_op)
    cifar_ds = cifar_ds.map(input_columns="image", operations=resize_op)
    cifar_ds = cifar_ds.map(input_columns="image", operations=rescale_op)
    cifar_ds = cifar_ds.map(input_columns="image", operations=normalize_op)
    cifar_ds = cifar_ds.map(input_columns="image", operations=channel_swap_op)

    cifar_ds = cifar_ds.shuffle(buffer_size=cfg.buffer_size)
    cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True)
    cifar_ds = cifar_ds.repeat(repeat_size)
    return cifar_ds
Esempio n. 11
0
def test_random_crop_05_c():
    """
    Test RandomCrop op with c_transforms:
    input image size < crop size but pad_if_needed is enabled,
    expected to pass
    """
    logger.info("test_random_crop_05_c")
    ds.config.set_seed(0)
    ds.config.set_num_parallel_workers(1)

    # Generate dataset
    data = ds.TFRecordDataset(DATA_DIR,
                              SCHEMA_DIR,
                              columns_list=["image"],
                              shuffle=False)
    # Note: The size of the image is 4032*2268
    random_crop_op = c_vision.RandomCrop([2268, 4033], [200, 200, 200, 200],
                                         pad_if_needed=True)
    decode_op = c_vision.Decode()
    data = data.map(input_columns=["image"], operations=decode_op)
    data = data.map(input_columns=["image"], operations=random_crop_op)

    filename = "random_crop_05_c_result.npz"
    save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)
Esempio n. 12
0
def test_random_crop_07_c():
    """
    Test RandomCrop op with c_transforms:
    padding_mode is Border.CONSTANT and fill_value is 255 (White),
    expected to pass
    """
    logger.info("test_random_crop_07_c")
    original_seed = config_get_set_seed(0)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # Generate dataset
    data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    # Note: The padding_mode is default as Border.CONSTANT and set filling color to be white.
    random_crop_op = c_vision.RandomCrop(512, [200, 200, 200, 200], fill_value=(255, 255, 255))
    decode_op = c_vision.Decode()
    data = data.map(input_columns=["image"], operations=decode_op)
    data = data.map(input_columns=["image"], operations=random_crop_op)

    filename = "random_crop_07_c_result.npz"
    save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)

    # Restore config setting
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
Esempio n. 13
0
def test_random_crop_04_c():
    """
    Test RandomCrop op with c_transforms: input image size < crop size, expected to fail
    """
    logger.info("test_random_crop_04_c")
    ds.config.set_seed(0)
    ds.config.set_num_parallel_workers(1)
    try:
        # Generate dataset
        data = ds.TFRecordDataset(DATA_DIR,
                                  SCHEMA_DIR,
                                  columns_list=["image"],
                                  shuffle=False)
        # Note: The size of the image is 4032*2268
        random_crop_op = c_vision.RandomCrop([2268, 4033])
        decode_op = c_vision.Decode()
        data = data.map(input_columns=["image"], operations=decode_op)
        data = data.map(input_columns=["image"], operations=random_crop_op)
        image_list = []
        for item in data.create_dict_iterator():
            image = item["image"]
            image_list.append(image.shape)
    except Exception as e:
        logger.info("Got an exception in DE: {}".format(str(e)))
def create_dataset(dataset_path, do_train, config, repeat_num=1):
    """
    Create a train or eval dataset.

    Args:
        dataset_path (string): The path of dataset.
        do_train (bool): Whether dataset is used for train or eval.
        config: configuration
        repeat_num (int): The repeat times of dataset. Default: 1.
    Returns:
        Dataset.
    """
    if do_train:
        dataset_path = os.path.join(dataset_path, 'train')
        do_shuffle = True
    else:
        dataset_path = os.path.join(dataset_path, 'eval')
        do_shuffle = False

    device_id = 0
    device_num = 1
    if config.platform == "GPU":
        if config.run_distribute:
            from mindspore.communication.management import get_rank, get_group_size
            device_id = get_rank()
            device_num = get_group_size()
    elif config.platform == "Ascend":
        device_id = int(os.getenv('DEVICE_ID'))
        device_num = int(os.getenv('RANK_SIZE'))

    if device_num == 1 or not do_train:
        ds = de.Cifar10Dataset(dataset_path,
                               num_parallel_workers=4,
                               shuffle=do_shuffle)
    else:
        ds = de.Cifar10Dataset(dataset_path,
                               num_parallel_workers=4,
                               shuffle=do_shuffle,
                               num_shards=device_num,
                               shard_id=device_id)

    resize_height = config.image_height
    resize_width = config.image_width
    buffer_size = 100
    rescale = 1.0 / 255.0
    shift = 0.0

    # define map operations
    random_crop_op = C.RandomCrop((32, 32), (4, 4, 4, 4))
    random_horizontal_flip_op = C.RandomHorizontalFlip(device_id /
                                                       (device_id + 1))

    resize_op = C.Resize((resize_height, resize_width))
    rescale_op = C.Rescale(rescale, shift)
    normalize_op = C.Normalize([0.4914, 0.4822, 0.4465],
                               [0.2023, 0.1994, 0.2010])

    change_swap_op = C.HWC2CHW()

    trans = []
    if do_train:
        trans += [random_crop_op, random_horizontal_flip_op]

    trans += [resize_op, rescale_op, normalize_op, change_swap_op]

    type_cast_op = C2.TypeCast(mstype.int32)

    ds = ds.map(input_columns="label", operations=type_cast_op)
    ds = ds.map(input_columns="image", operations=trans)

    # apply shuffle operations
    ds = ds.shuffle(buffer_size=buffer_size)

    # apply batch operations
    ds = ds.batch(config.batch_size, drop_remainder=True)

    # apply dataset repeat operation
    ds = ds.repeat(repeat_num)

    return ds
Esempio n. 15
0
def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
    """
    Create a train or eval dataset.

    Args:
        dataset_path (str): The path of dataset.
        do_train (bool): Whether dataset is used for train or eval.
        repeat_num (int): The repeat times of dataset. Default: 1.
        batch_size (int): The batch size of dataset. Default: 32.

    Returns:
        Dataset.
    """
    if do_train:
        dataset_path = os.path.join(dataset_path, 'train')
        do_shuffle = True
    else:
        dataset_path = os.path.join(dataset_path, 'eval')
        do_shuffle = False

    if device_num == 1 or not do_train:
        ds = de.Cifar10Dataset(dataset_path,
                               num_parallel_workers=4,
                               shuffle=do_shuffle)
    else:
        ds = de.Cifar10Dataset(dataset_path,
                               num_parallel_workers=4,
                               shuffle=do_shuffle,
                               num_shards=device_num,
                               shard_id=device_id)

    resize_height = 224
    resize_width = 224
    buffer_size = 100
    rescale = 1.0 / 255.0
    shift = 0.0

    # define map operations
    random_crop_op = C.RandomCrop((32, 32), (4, 4, 4, 4))
    random_horizontal_flip_op = C.RandomHorizontalFlip(device_id /
                                                       (device_id + 1))

    resize_op = C.Resize((resize_height, resize_width))
    rescale_op = C.Rescale(rescale, shift)
    normalize_op = C.Normalize([0.4914, 0.4822, 0.4465],
                               [0.2023, 0.1994, 0.2010])

    change_swap_op = C.HWC2CHW()

    trans = []
    if do_train:
        trans += [random_crop_op, random_horizontal_flip_op]

    trans += [resize_op, rescale_op, normalize_op, change_swap_op]

    type_cast_op = C2.TypeCast(mstype.int32)

    ds = ds.map(input_columns="label", operations=type_cast_op)
    ds = ds.map(input_columns="image", operations=trans)

    # apply shuffle operations
    ds = ds.shuffle(buffer_size=buffer_size)

    # apply batch operations
    ds = ds.batch(batch_size, drop_remainder=True)

    # apply dataset repeat operation
    ds = ds.repeat(repeat_num)

    return ds
Esempio n. 16
0
def create_dataset(dataset_path,
                   do_train,
                   repeat_num=1,
                   batch_size=32,
                   target="Ascend"):
    """
    create a train or eval dataset

    Args:
        dataset_path(string): the path of dataset.
        do_train(bool): whether dataset is used for train or eval.
        repeat_num(int): the repeat times of dataset. Default: 1
        batch_size(int): the batch size of dataset. Default: 32
        target(str): the device target. Default: Ascend

    Returns:
        dataset
    """
    if target == "Ascend":
        device_num = int(os.getenv("DEVICE_NUM"))
        rank_id = int(os.getenv("RANK_ID"))
    else:
        init("nccl")
        rank_id = get_rank()
        device_num = get_group_size()

    if device_num == 1:
        ds = de.Cifar10Dataset(dataset_path,
                               num_parallel_workers=8,
                               shuffle=True)
    else:
        ds = de.Cifar10Dataset(dataset_path,
                               num_parallel_workers=8,
                               shuffle=True,
                               num_shards=device_num,
                               shard_id=rank_id)

    # define map operations
    trans = []
    if do_train:
        trans += [
            C.RandomCrop((32, 32), (4, 4, 4, 4)),
            C.RandomHorizontalFlip(prob=0.5)
        ]

    trans += [
        C.Resize((config.image_height, config.image_width)),
        C.Rescale(1.0 / 255.0, 0.0),
        C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]),
        C.HWC2CHW()
    ]

    type_cast_op = C2.TypeCast(mstype.int32)

    ds = ds.map(input_columns="label",
                num_parallel_workers=8,
                operations=type_cast_op)
    ds = ds.map(input_columns="image",
                num_parallel_workers=8,
                operations=trans)

    # apply batch operations
    ds = ds.batch(batch_size, drop_remainder=True)

    # apply dataset repeat operation
    ds = ds.repeat(repeat_num)

    return ds
Esempio n. 17
0
def test_cpp_uniform_augment(plot=False, num_ops=2):
    """
    Test UniformAugment
    """
    logger.info("Test CPP UniformAugment")

    # Original Images
    ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False)

    transforms_original = [C.Decode(), C.Resize(size=[224, 224]), F.ToTensor()]

    ds_original = ds.map(input_columns="image", operations=transforms_original)

    ds_original = ds_original.batch(512)

    for idx, (image, _) in enumerate(ds_original):
        if idx == 0:
            images_original = np.transpose(image, (0, 2, 3, 1))
        else:
            images_original = np.append(images_original,
                                        np.transpose(image, (0, 2, 3, 1)),
                                        axis=0)

    # UniformAugment Images
    ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False)
    transforms_ua = [
        C.RandomCrop(size=[224, 224], padding=[32, 32, 32, 32]),
        C.RandomHorizontalFlip(),
        C.RandomVerticalFlip(),
        C.RandomColorAdjust(),
        C.RandomRotation(degrees=45)
    ]

    uni_aug = C.UniformAugment(operations=transforms_ua, num_ops=num_ops)

    transforms_all = [
        C.Decode(),
        C.Resize(size=[224, 224]), uni_aug,
        F.ToTensor()
    ]

    ds_ua = ds.map(input_columns="image",
                   operations=transforms_all,
                   num_parallel_workers=1)

    ds_ua = ds_ua.batch(512)

    for idx, (image, _) in enumerate(ds_ua):
        if idx == 0:
            images_ua = np.transpose(image, (0, 2, 3, 1))
        else:
            images_ua = np.append(images_ua,
                                  np.transpose(image, (0, 2, 3, 1)),
                                  axis=0)
    if plot:
        visualize_list(images_original, images_ua)

    num_samples = images_original.shape[0]
    mse = np.zeros(num_samples)
    for i in range(num_samples):
        mse[i] = diff_mse(images_ua[i], images_original[i])
    logger.info("MSE= {}".format(str(np.mean(mse))))
Esempio n. 18
0
def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
    """
    create a train or eval dataset

    Args:
        dataset_path(string): the path of dataset.
        do_train(bool): whether dataset is used for train or eval.
        repeat_num(int): the repeat times of dataset. Default: 1
        batch_size(int): the batch size of dataset. Default: 32

    Returns:
        dataset
    """
    device_num = int(os.getenv("DEVICE_NUM"))
    rank_id = int(os.getenv("RANK_ID"))

    if device_num == 1:
        ds = de.Cifar10Dataset(dataset_path,
                               num_parallel_workers=4,
                               shuffle=True)
    else:
        ds = de.Cifar10Dataset(dataset_path,
                               num_parallel_workers=4,
                               shuffle=True,
                               num_shards=device_num,
                               shard_id=rank_id)

    resize_height = config.image_height
    resize_width = config.image_width
    rescale = 1.0 / 255.0
    shift = 0.0

    # define map operations
    random_crop_op = C.RandomCrop((32, 32), (4, 4, 4, 4))
    random_horizontal_flip_op = C.RandomHorizontalFlip(rank_id / (rank_id + 1))

    resize_op = C.Resize((resize_height, resize_width))
    rescale_op = C.Rescale(rescale, shift)
    normalize_op = C.Normalize([0.4914, 0.4822, 0.4465],
                               [0.2023, 0.1994, 0.2010])

    change_swap_op = C.HWC2CHW()

    trans = []
    if do_train:
        trans += [random_crop_op, random_horizontal_flip_op]

    trans += [resize_op, rescale_op, normalize_op, change_swap_op]

    type_cast_op = C2.TypeCast(mstype.int32)

    ds = ds.map(input_columns="label", operations=type_cast_op)
    ds = ds.map(input_columns="image", operations=trans)

    # apply shuffle operations
    ds = ds.shuffle(buffer_size=config.buffer_size)

    # apply batch operations
    ds = ds.batch(batch_size, drop_remainder=True)

    # apply dataset repeat operation
    ds = ds.repeat(repeat_num)

    return ds