Ejemplo n.º 1
0
def test_mixup_batch_fail5():
    """
    Test MixUpBatch Fail 5
    We expect this to fail because labels are not OntHot encoded
    """
    logger.info("test_mixup_batch_fail5")

    # Original Images
    ds_original = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
    ds_original = ds_original.batch(5)

    images_original = np.array([])
    for idx, (image, _) in enumerate(ds_original):
        if idx == 0:
            images_original = image
        else:
            images_original = np.append(images_original, image, axis=0)

    # MixUp Images
    data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)

    mixup_batch_op = vision.MixUpBatch()
    data1 = data1.batch(5, drop_remainder=True)
    data1 = data1.map(input_columns=["image", "label"], operations=mixup_batch_op)

    with pytest.raises(RuntimeError) as error:
        images_mixup = np.array([])
        for idx, (image, _) in enumerate(data1):
            if idx == 0:
                images_mixup = image
            else:
                images_mixup = np.append(images_mixup, image, axis=0)
    error_message = "MixUpBatch: Wrong labels shape. The second column (labels) must have a shape of NC or NLC"
    assert error_message in str(error.value)
Ejemplo n.º 2
0
def test_mixup_batch_fail1():
    """
    Test MixUpBatch Fail 1
    We expect this to fail because the images and labels are not batched
    """
    logger.info("test_mixup_batch_fail1")

    # Original Images
    ds_original = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
    ds_original = ds_original.batch(5)

    images_original = np.array([])
    for idx, (image, _) in enumerate(ds_original):
        if idx == 0:
            images_original = image
        else:
            images_original = np.append(images_original, image, axis=0)

    # MixUp Images
    data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)

    one_hot_op = data_trans.OneHot(num_classes=10)
    data1 = data1.map(input_columns=["label"], operations=one_hot_op)
    mixup_batch_op = vision.MixUpBatch(0.1)
    with pytest.raises(RuntimeError) as error:
        data1 = data1.map(input_columns=["image", "label"], operations=mixup_batch_op)
        for idx, (image, _) in enumerate(data1):
            if idx == 0:
                images_mixup = image
            else:
                images_mixup = np.append(images_mixup, image, axis=0)
        error_message = "You must make sure images are HWC or CHW and batch"
        assert error_message in str(error.value)
Ejemplo n.º 3
0
def create_dataset_cifar10(data_path,
                           batch_size=32,
                           repeat_size=1,
                           status="train",
                           target="Ascend"):
    """
    create dataset for train or test
    """

    if target == "Ascend":
        device_num, rank_id = _get_rank_info()

    if target != "Ascend" or device_num == 1:
        cifar_ds = ds.Cifar10Dataset(data_path)
    else:
        cifar_ds = ds.Cifar10Dataset(data_path,
                                     num_parallel_workers=8,
                                     shuffle=True,
                                     num_shards=device_num,
                                     shard_id=rank_id)
    rescale = 1.0 / 255.0
    shift = 0.0
    cfg = alexnet_cifar10_cfg

    resize_op = CV.Resize((cfg.image_height, cfg.image_width))
    rescale_op = CV.Rescale(rescale, shift)
    normalize_op = CV.Normalize((0.4914, 0.4822, 0.4465),
                                (0.2023, 0.1994, 0.2010))
    if status == "train":
        random_crop_op = CV.RandomCrop([32, 32], [4, 4, 4, 4])
        random_horizontal_op = CV.RandomHorizontalFlip()
    channel_swap_op = CV.HWC2CHW()
    typecast_op = C.TypeCast(mstype.int32)
    cifar_ds = cifar_ds.map(input_columns="label",
                            operations=typecast_op,
                            num_parallel_workers=8)
    if status == "train":
        cifar_ds = cifar_ds.map(input_columns="image",
                                operations=random_crop_op,
                                num_parallel_workers=8)
        cifar_ds = cifar_ds.map(input_columns="image",
                                operations=random_horizontal_op,
                                num_parallel_workers=8)
    cifar_ds = cifar_ds.map(input_columns="image",
                            operations=resize_op,
                            num_parallel_workers=8)
    cifar_ds = cifar_ds.map(input_columns="image",
                            operations=rescale_op,
                            num_parallel_workers=8)
    cifar_ds = cifar_ds.map(input_columns="image",
                            operations=normalize_op,
                            num_parallel_workers=8)
    cifar_ds = cifar_ds.map(input_columns="image",
                            operations=channel_swap_op,
                            num_parallel_workers=8)

    cifar_ds = cifar_ds.shuffle(buffer_size=cfg.buffer_size)
    cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True)
    cifar_ds = cifar_ds.repeat(repeat_size)
    return cifar_ds
Ejemplo n.º 4
0
def test_mixup_batch_fail4():
    """
    Test MixUpBatch Fail 2
    We expect this to fail because alpha is zero
    """
    logger.info("test_mixup_batch_fail4")

    # Original Images
    ds_original = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
    ds_original = ds_original.batch(5)

    images_original = np.array([])
    for idx, (image, _) in enumerate(ds_original):
        if idx == 0:
            images_original = image
        else:
            images_original = np.append(images_original, image, axis=0)

    # MixUp Images
    data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)

    one_hot_op = data_trans.OneHot(num_classes=10)
    data1 = data1.map(input_columns=["label"], operations=one_hot_op)
    with pytest.raises(ValueError) as error:
        vision.MixUpBatch(0.0)
        error_message = "Input is not within the required interval"
        assert error_message in str(error.value)
Ejemplo n.º 5
0
def test_mixup_batch_fail3():
    """
    Test MixUpBatch op
    We expect this to fail because label column is not passed to mixup_batch
    """
    logger.info("test_mixup_batch_fail3")
    # Original Images
    ds_original = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
    ds_original = ds_original.batch(5, drop_remainder=True)

    images_original = None
    for idx, (image, _) in enumerate(ds_original):
        if idx == 0:
            images_original = image
        else:
            images_original = np.append(images_original, image, axis=0)

    # MixUp Images
    data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)

    one_hot_op = data_trans.OneHot(num_classes=10)
    data1 = data1.map(input_columns=["label"], operations=one_hot_op)
    mixup_batch_op = vision.MixUpBatch()
    data1 = data1.batch(5, drop_remainder=True)
    data1 = data1.map(input_columns=["image"], operations=mixup_batch_op)

    with pytest.raises(RuntimeError) as error:
        images_mixup = np.array([])
        for idx, (image, _) in enumerate(data1):
            if idx == 0:
                images_mixup = image
            else:
                images_mixup = np.append(images_mixup, image, axis=0)
    error_message = "Both images and labels columns are required"
    assert error_message in str(error.value)
Ejemplo n.º 6
0
def create_dataset(data_home, repeat_num=1, training=True):
    """Data operations."""
    ds.config.set_seed(1)

    rank_size = int(
        os.environ.get("RANK_SIZE")) if os.environ.get("RANK_SIZE") else None

    device_num = int(os.getenv("RANK_SIZE"))
    rank_id = int(os.getenv("DEVICE_ID"))

    if device_num == 1:
        data_set = ds.Cifar10Dataset(data_home,
                                     num_parallel_workers=8,
                                     shuffle=True)
    else:
        data_set = ds.Cifar10Dataset(data_home,
                                     num_parallel_workers=8,
                                     shuffle=True,
                                     num_shards=device_num,
                                     shard_id=rank_id)

    resize_height = cfg.image_height
    resize_width = cfg.image_width
    rescale = 1.0 / 255.0
    shift = 0.0

    # define map operations
    random_crop_op = vision.RandomCrop(
        (32, 32), (4, 4, 4, 4))  # padding_mode default CONSTANT
    random_horizontal_op = vision.RandomHorizontalFlip()
    resize_op = vision.Resize(
        (resize_height, resize_width))  # interpolation default BILINEAR
    rescale_op = vision.Rescale(rescale, shift)
    normalize_op = vision.Normalize((0.4465, 0.4822, 0.4914),
                                    (0.2010, 0.1994, 0.2023))
    changeswap_op = vision.HWC2CHW()
    type_cast_op = C.TypeCast(mstype.int32)

    c_trans = []
    if training:
        c_trans = [random_crop_op, random_horizontal_op]
    c_trans += [resize_op, rescale_op, normalize_op, changeswap_op]

    # apply map operations on images
    data_set = data_set.map(input_columns="label", operations=type_cast_op)
    data_set = data_set.map(input_columns="image", operations=c_trans)

    # apply repeat operations
    data_set = data_set.repeat(repeat_num)

    # apply shuffle operations
    data_set = data_set.shuffle(buffer_size=10)

    # apply batch operations
    data_set = data_set.batch(batch_size=cfg.batch_size, drop_remainder=True)

    return data_set
Ejemplo n.º 7
0
def create_dataset(repeat_num=1,
                   training=True,
                   batch_size=32,
                   rank_id=0,
                   rank_size=1,
                   enable_hccl=False):
    data_dir = data_home + "/cifar-10-batches-bin"
    if not training:
        data_dir = data_home + "/cifar-10-verify-bin"
    data_set = ds.Cifar10Dataset(data_dir)

    if enable_hccl:
        rank_id = rank_id
        rank_size = rank_size
        data_set = ds.Cifar10Dataset(data_dir,
                                     num_shards=rank_size,
                                     shard_id=rank_id)

    resize_height = 224
    resize_width = 224
    rescale = 1.0 / 255.0
    shift = 0.0

    # define map operations
    random_crop_op = vision.RandomCrop(
        (32, 32), (4, 4, 4, 4))  # padding_mode default CONSTANT
    random_horizontal_op = vision.RandomHorizontalFlip()
    # interpolation default BILINEAR
    resize_op = vision.Resize((resize_height, resize_width))
    rescale_op = vision.Rescale(rescale, shift)
    normalize_op = vision.Normalize((0.4465, 0.4822, 0.4914),
                                    (0.2010, 0.1994, 0.2023))
    changeswap_op = vision.HWC2CHW()
    type_cast_op = C.TypeCast(mstype.int32)

    c_trans = []
    if training:
        c_trans = [random_crop_op, random_horizontal_op]
    c_trans += [resize_op, rescale_op, normalize_op, changeswap_op]

    # apply map operations on images
    data_set = data_set.map(input_columns="label", operations=type_cast_op)
    data_set = data_set.map(input_columns="image", operations=c_trans)

    # apply shuffle operations
    data_set = data_set.shuffle(buffer_size=1000)

    # apply batch operations
    data_set = data_set.batch(batch_size=batch_size, drop_remainder=True)

    # apply repeat operations
    data_set = data_set.repeat(repeat_num)

    return data_set
Ejemplo n.º 8
0
def create_dataset_cifar10(data_home,
                           repeat_num=1,
                           training=True,
                           cifar_cfg=None):
    """Data operations."""
    data_dir = os.path.join(data_home, "cifar-10-batches-bin")
    if not training:
        data_dir = os.path.join(data_home, "cifar-10-verify-bin")

    rank_size, rank_id = _get_rank_info()
    if training:
        data_set = ds.Cifar10Dataset(data_dir,
                                     num_shards=rank_size,
                                     shard_id=rank_id,
                                     shuffle=True)
    else:
        data_set = ds.Cifar10Dataset(data_dir,
                                     num_shards=rank_size,
                                     shard_id=rank_id,
                                     shuffle=False)

    resize_height = cifar_cfg.image_height
    resize_width = cifar_cfg.image_width

    # define map operations
    random_crop_op = vision.RandomCrop(
        (32, 32), (4, 4, 4, 4))  # padding_mode default CONSTANT
    random_horizontal_op = vision.RandomHorizontalFlip()
    resize_op = vision.Resize(
        (resize_height, resize_width))  # interpolation default BILINEAR
    rescale_op = vision.Rescale(1.0 / 255.0, 0.0)
    #normalize_op = vision.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
    normalize_op = vision.Normalize((0.4914, 0.4822, 0.4465),
                                    (0.24703233, 0.24348505, 0.26158768))
    changeswap_op = vision.HWC2CHW()
    type_cast_op = c_transforms.TypeCast(mstype.int32)

    c_trans = []
    if training:
        c_trans = [random_crop_op, random_horizontal_op]
    c_trans += [resize_op, rescale_op, normalize_op, changeswap_op]

    # apply map operations on images
    data_set = data_set.map(operations=type_cast_op, input_columns="label")
    data_set = data_set.map(operations=c_trans, input_columns="image")

    # apply batch operations
    data_set = data_set.batch(batch_size=cifar_cfg.batch_size,
                              drop_remainder=True)

    # apply repeat operations
    data_set = data_set.repeat(repeat_num)

    return data_set
Ejemplo n.º 9
0
def test_cifar_exception_file_path():
    def exception_func(item):
        raise Exception("Error occur!")

    try:
        data = ds.Cifar10Dataset(DATA_DIR_10)
        data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1)
        num_rows = 0
        for _ in data.create_dict_iterator():
            num_rows += 1
        assert False
    except RuntimeError as e:
        assert "map operation: [PyFunc] failed. The corresponding data files" in str(e)

    try:
        data = ds.Cifar10Dataset(DATA_DIR_10)
        data = data.map(operations=exception_func, input_columns=["label"], num_parallel_workers=1)
        num_rows = 0
        for _ in data.create_dict_iterator():
            num_rows += 1
        assert False
    except RuntimeError as e:
        assert "map operation: [PyFunc] failed. The corresponding data files" in str(e)

    try:
        data = ds.Cifar100Dataset(DATA_DIR_100)
        data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1)
        num_rows = 0
        for _ in data.create_dict_iterator():
            num_rows += 1
        assert False
    except RuntimeError as e:
        assert "map operation: [PyFunc] failed. The corresponding data files" in str(e)

    try:
        data = ds.Cifar100Dataset(DATA_DIR_100)
        data = data.map(operations=exception_func, input_columns=["coarse_label"], num_parallel_workers=1)
        num_rows = 0
        for _ in data.create_dict_iterator():
            num_rows += 1
        assert False
    except RuntimeError as e:
        assert "map operation: [PyFunc] failed. The corresponding data files" in str(e)

    try:
        data = ds.Cifar100Dataset(DATA_DIR_100)
        data = data.map(operations=exception_func, input_columns=["fine_label"], num_parallel_workers=1)
        num_rows = 0
        for _ in data.create_dict_iterator():
            num_rows += 1
        assert False
    except RuntimeError as e:
        assert "map operation: [PyFunc] failed. The corresponding data files" in str(e)
Ejemplo n.º 10
0
def create_dataset_cifar10(dataset_path, do_train, cfg, repeat_num=1):
    """
    create a train or eval dataset

    Args:
        dataset_path(string): the path of dataset.
        do_train(bool): whether dataset is used for train or eval.
        cfg (dict): the config for creating dataset.
        repeat_num(int): the repeat times of dataset. Default: 1.

    Returns:
        dataset
    """
    dataset_path = os.path.join(
        dataset_path,
        "cifar-10-batches-bin" if do_train else "cifar-10-verify-bin")
    if cfg.group_size == 1:
        data_set = ds.Cifar10Dataset(dataset_path,
                                     num_parallel_workers=cfg.work_nums,
                                     shuffle=True)
    else:
        data_set = ds.Cifar10Dataset(dataset_path,
                                     num_parallel_workers=cfg.work_nums,
                                     shuffle=True,
                                     num_shards=cfg.group_size,
                                     shard_id=cfg.rank)

    # define map operations
    trans = []
    if do_train:
        trans.append(C.RandomCrop((32, 32), (4, 4, 4, 4)))
        trans.append(C.RandomHorizontalFlip(prob=0.5))

    trans.append(C.Resize((299, 299)))
    trans.append(C.Rescale(1.0 / 255.0, 0.0))
    trans.append(
        C.Normalize(mean=[0.4914, 0.4822, 0.4465],
                    std=[0.2023, 0.1994, 0.2010]))
    trans.append(C.HWC2CHW())

    type_cast_op = C2.TypeCast(mstype.int32)
    data_set = data_set.map(operations=trans,
                            input_columns="image",
                            num_parallel_workers=cfg.work_nums)
    data_set = data_set.map(operations=type_cast_op,
                            input_columns="label",
                            num_parallel_workers=cfg.work_nums)
    # apply batch operations
    data_set = data_set.batch(cfg.batch_size, drop_remainder=do_train)
    # apply dataset repeat operation
    data_set = data_set.repeat(repeat_num)
    return data_set
Ejemplo n.º 11
0
def test_cifar_usage():
    """
    test usage of cifar
    """
    logger.info("Test Cifar100Dataset usage flag")

    # flag, if True, test cifar10 else test cifar100
    def test_config(usage, flag=True, cifar_path=None):
        if cifar_path is None:
            cifar_path = DATA_DIR_10 if flag else DATA_DIR_100
        try:
            data = ds.Cifar10Dataset(cifar_path, usage=usage) if flag else ds.Cifar100Dataset(cifar_path, usage=usage)
            num_rows = 0
            for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True):
                num_rows += 1
        except (ValueError, TypeError, RuntimeError) as e:
            return str(e)
        return num_rows

    # test the usage of CIFAR100
    assert test_config("train") == 10000
    assert test_config("all") == 10000
    assert "usage is not within the valid set of ['train', 'test', 'all']" in test_config("invalid")
    assert "Argument usage with value ['list'] is not of type (<class 'str'>,)" in test_config(["list"])
    assert "no valid data matching the dataset API Cifar10Dataset" in test_config("test")

    # test the usage of CIFAR10
    assert test_config("test", False) == 10000
    assert test_config("all", False) == 10000
    assert "no valid data matching the dataset API Cifar100Dataset" in test_config("train", False)
    assert "usage is not within the valid set of ['train', 'test', 'all']" in test_config("invalid", False)

    # change this directory to the folder that contains all cifar10 files
    all_cifar10 = None
    if all_cifar10 is not None:
        assert test_config("train", True, all_cifar10) == 50000
        assert test_config("test", True, all_cifar10) == 10000
        assert test_config("all", True, all_cifar10) == 60000
        assert ds.Cifar10Dataset(all_cifar10, usage="train").get_dataset_size() == 50000
        assert ds.Cifar10Dataset(all_cifar10, usage="test").get_dataset_size() == 10000
        assert ds.Cifar10Dataset(all_cifar10, usage="all").get_dataset_size() == 60000

    # change this directory to the folder that contains all cifar100 files
    all_cifar100 = None
    if all_cifar100 is not None:
        assert test_config("train", False, all_cifar100) == 50000
        assert test_config("test", False, all_cifar100) == 10000
        assert test_config("all", False, all_cifar100) == 60000
        assert ds.Cifar100Dataset(all_cifar100, usage="train").get_dataset_size() == 50000
        assert ds.Cifar100Dataset(all_cifar100, usage="test").get_dataset_size() == 10000
        assert ds.Cifar100Dataset(all_cifar100, usage="all").get_dataset_size() == 60000
Ejemplo n.º 12
0
def test_cifar10_exception():
    """
    Test error cases for Cifar10Dataset
    """
    logger.info("Test error cases for Cifar10Dataset")
    error_msg_1 = "sampler and shuffle cannot be specified at the same time"
    with pytest.raises(RuntimeError, match=error_msg_1):
        ds.Cifar10Dataset(DATA_DIR_10, shuffle=False, sampler=ds.PKSampler(3))

    error_msg_2 = "sampler and sharding cannot be specified at the same time"
    with pytest.raises(RuntimeError, match=error_msg_2):
        ds.Cifar10Dataset(DATA_DIR_10, sampler=ds.PKSampler(3), num_shards=2, shard_id=0)

    error_msg_3 = "num_shards is specified and currently requires shard_id as well"
    with pytest.raises(RuntimeError, match=error_msg_3):
        ds.Cifar10Dataset(DATA_DIR_10, num_shards=10)

    error_msg_4 = "shard_id is specified but num_shards is not"
    with pytest.raises(RuntimeError, match=error_msg_4):
        ds.Cifar10Dataset(DATA_DIR_10, shard_id=0)

    error_msg_5 = "Input shard_id is not within the required interval"
    with pytest.raises(ValueError, match=error_msg_5):
        ds.Cifar10Dataset(DATA_DIR_10, num_shards=2, shard_id=-1)
    with pytest.raises(ValueError, match=error_msg_5):
        ds.Cifar10Dataset(DATA_DIR_10, num_shards=2, shard_id=5)

    error_msg_6 = "num_parallel_workers exceeds"
    with pytest.raises(ValueError, match=error_msg_6):
        ds.Cifar10Dataset(DATA_DIR_10, shuffle=False, num_parallel_workers=0)
    with pytest.raises(ValueError, match=error_msg_6):
        ds.Cifar10Dataset(DATA_DIR_10, shuffle=False, num_parallel_workers=88)
Ejemplo n.º 13
0
def test_cifar10_sequential_sampler():
    """
    Test Cifar10Dataset with SequentialSampler
    """
    logger.info("Test Cifar10Dataset Op with SequentialSampler")
    num_samples = 30
    sampler = ds.SequentialSampler(num_samples=num_samples)
    data1 = ds.Cifar10Dataset(DATA_DIR_10, sampler=sampler)
    data2 = ds.Cifar10Dataset(DATA_DIR_10, shuffle=False, num_samples=num_samples)
    num_iter = 0
    for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()):
        np.testing.assert_equal(item1["label"], item2["label"])
        num_iter += 1
    assert num_iter == num_samples
Ejemplo n.º 14
0
def create_dataset(repeat_num=1, training=True):
    """
    create data for next use such as training or infering
    """
    cifar_ds = ds.Cifar10Dataset(data_home)

    if args_opt.run_distribute:
        rank_id = int(os.getenv('RANK_ID'))
        rank_size = int(os.getenv('RANK_SIZE'))
        cifar_ds = ds.Cifar10Dataset(data_home,
                                     num_shards=rank_size,
                                     shard_id=rank_id)

    resize_height = 224
    resize_width = 224
    rescale = 1.0 / 255.0
    shift = 0.0

    # define map operations
    random_crop_op = C.RandomCrop(
        (32, 32), (4, 4, 4, 4))  # padding_mode default CONSTANT
    random_horizontal_op = C.RandomHorizontalFlip()
    resize_op = C.Resize(
        (resize_height, resize_width))  # interpolation default BILINEAR
    rescale_op = C.Rescale(rescale, shift)
    normalize_op = C.Normalize((0.4914, 0.4822, 0.4465),
                               (0.2023, 0.1994, 0.2010))
    changeswap_op = C.HWC2CHW()
    type_cast_op = C2.TypeCast(mstype.int32)

    c_trans = []
    if training:
        c_trans = [random_crop_op, random_horizontal_op]
    c_trans += [resize_op, rescale_op, normalize_op, changeswap_op]

    # apply map operations on images
    cifar_ds = cifar_ds.map(operations=type_cast_op, input_columns="label")
    cifar_ds = cifar_ds.map(operations=c_trans, input_columns="image")

    # apply shuffle operations
    cifar_ds = cifar_ds.shuffle(buffer_size=10)

    # apply batch operations
    cifar_ds = cifar_ds.batch(batch_size=args_opt.batch_size,
                              drop_remainder=True)

    # apply repeat operations
    cifar_ds = cifar_ds.repeat(repeat_num)

    return cifar_ds
Ejemplo n.º 15
0
def test_cutmix_batch_fail5():
    """
    Test CutMixBatch op
    We expect this to fail because label column is not passed to cutmix_batch
    """
    logger.info("test_cutmix_batch_fail5")

    # CutMixBatch Images
    data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)

    one_hot_op = data_trans.OneHot(num_classes=10)
    data1 = data1.map(operations=one_hot_op, input_columns=["label"])
    cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
    data1 = data1.batch(5, drop_remainder=True)
    data1 = data1.map(operations=cutmix_batch_op, input_columns=["image"])

    with pytest.raises(RuntimeError) as error:
        images_cutmix = np.array([])
        for idx, (image, _) in enumerate(data1):
            if idx == 0:
                images_cutmix = image.asnumpy()
            else:
                images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0)
    error_message = "both image and label columns are required"
    assert error_message in str(error.value)
Ejemplo n.º 16
0
def test_cutmix_batch_fail1():
    """
    Test CutMixBatch Fail 1
    We expect this to fail because the images and labels are not batched
    """
    logger.info("test_cutmix_batch_fail1")

    # CutMixBatch Images
    data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)

    one_hot_op = data_trans.OneHot(num_classes=10)
    data1 = data1.map(operations=one_hot_op, input_columns=["label"])
    cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
    with pytest.raises(RuntimeError) as error:
        data1 = data1.map(operations=cutmix_batch_op,
                          input_columns=["image", "label"])
        for idx, (image, _) in enumerate(data1):
            if idx == 0:
                images_cutmix = image.asnumpy()
            else:
                images_cutmix = np.append(images_cutmix,
                                          image.asnumpy(),
                                          axis=0)
        error_message = "You must make sure images are HWC or CHW and batch "
        assert error_message in str(error.value)
Ejemplo n.º 17
0
    def get_dataset(self):
        print("get data from dir:", self.data_dir)
        if self.device_num == 1:
            ds_ = ds.Cifar10Dataset(
                self.data_dir, num_parallel_workers=self.num_parallel_workers)
        else:
            ds_ = ds.Cifar10Dataset(
                self.data_dir,
                num_parallel_workers=self.num_parallel_workers,
                num_shards=self.device_num,
                shard_id=self.device_id)

        ds_ = ds_.map(input_columns=["image"], operations=self.trsfm)
        typecast_op = C.TypeCast(mstype.int32)
        ds_ = ds_.map(input_columns=["label"], operations=typecast_op)
        return ds_
Ejemplo n.º 18
0
def test_cpp_uniform_augment_random_crop_badinput(num_ops=1):
    """
    Test UniformAugment with greater crop size
    """
    logger.info("Test CPP UniformAugment with random_crop bad input")
    batch_size = 2
    cifar10_dir = "../data/dataset/testCifar10Data"
    ds1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False)  # shape = [32,32,3]

    transforms_ua = [
        # Note: crop size [224, 224] > image size [32, 32]
        C.RandomCrop(size=[224, 224]),
        C.RandomHorizontalFlip()
    ]
    uni_aug = C.UniformAugment(transforms=transforms_ua, num_ops=num_ops)
    ds1 = ds1.map(operations=uni_aug, input_columns="image")

    # apply DatasetOps
    ds1 = ds1.batch(batch_size, drop_remainder=True, num_parallel_workers=1)
    num_batches = 0
    try:
        for _ in ds1.create_dict_iterator(num_epochs=1, output_numpy=True):
            num_batches += 1
    except Exception as e:
        assert "Crop size" in str(e)
Ejemplo n.º 19
0
def test_cutmix_batch_fail6():
    """
    Test CutMixBatch op
    We expect this to fail because image_batch_format passed to CutMixBatch doesn't match the format of the images
    """
    logger.info("test_cutmix_batch_fail6")

    # CutMixBatch Images
    data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)

    one_hot_op = data_trans.OneHot(num_classes=10)
    data1 = data1.map(input_columns=["label"], operations=one_hot_op)
    cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NCHW)
    data1 = data1.batch(5, drop_remainder=True)
    data1 = data1.map(input_columns=["image", "label"],
                      operations=cutmix_batch_op)

    with pytest.raises(RuntimeError) as error:
        images_cutmix = np.array([])
        for idx, (image, _) in enumerate(data1):
            if idx == 0:
                images_cutmix = image
            else:
                images_cutmix = np.append(images_cutmix, image, axis=0)
    error_message = "CutMixBatch: Image doesn't match the given image format."
    assert error_message in str(error.value)
Ejemplo n.º 20
0
def test_cutmix_batch_nchw_md5():
    """
    Test CutMixBatch on a batch of CHW images with MD5:
    """
    logger.info("test_cutmix_batch_nchw_md5")
    original_seed = config_get_set_seed(0)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # CutMixBatch Images
    data = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
    hwc2chw_op = vision.HWC2CHW()
    data = data.map(input_columns=["image"], operations=hwc2chw_op)
    one_hot_op = data_trans.OneHot(num_classes=10)
    data = data.map(input_columns=["label"], operations=one_hot_op)
    cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NCHW)
    data = data.batch(5, drop_remainder=True)
    data = data.map(input_columns=["image", "label"],
                    operations=cutmix_batch_op)

    filename = "cutmix_batch_c_nchw_result.npz"
    save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)

    # Restore config setting
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
Ejemplo n.º 21
0
def test_cutmix_batch_fail7():
    """
    Test CutMixBatch op
    We expect this to fail because labels are not in one-hot format
    """
    logger.info("test_cutmix_batch_fail7")

    # CutMixBatch Images
    data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)

    cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
    data1 = data1.batch(5, drop_remainder=True)
    data1 = data1.map(operations=cutmix_batch_op,
                      input_columns=["image", "label"])

    with pytest.raises(RuntimeError) as error:
        images_cutmix = np.array([])
        for idx, (image, _) in enumerate(data1):
            if idx == 0:
                images_cutmix = image.asnumpy()
            else:
                images_cutmix = np.append(images_cutmix,
                                          image.asnumpy(),
                                          axis=0)
    error_message = "CutMixBatch: Wrong labels shape. The second column (labels) must have a shape of NC or NLC"
    assert error_message in str(error.value)
Ejemplo n.º 22
0
def test_filte_case_dataset_cifar10():
    DATA_DIR_10 = "../data/dataset/testCifar10Data"
    dataset_c = ds.Cifar10Dataset(dataset_dir=DATA_DIR_10, num_samples=100000, shuffle=False)
    dataset_f1 = dataset_c.filter(input_columns=["image", "label"], predicate=filter_func_cifar, num_parallel_workers=1)
    for item in dataset_f1.create_dict_iterator(num_epochs=1, output_numpy=True):
        # in this example, each dictionary has keys "image" and "label"
        assert item["label"] % 3 == 0
Ejemplo n.º 23
0
def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target="CPU"):


    data_set = ds.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=True)


    # define map operations
    trans = []
    if do_train:
        trans += [
            C.RandomCrop((32, 32), (4, 4, 4, 4)),
            C.RandomHorizontalFlip(prob=0.5)
        ]

    trans += [
        C.Resize((48,48)),
        C.Rescale(1.0 / 255.0, 0.0),
        C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]),
        C.HWC2CHW()
    ]

    type_cast_op = C2.TypeCast(mstype.int32)

    data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
    data_set = data_set.map(operations=trans, input_columns="image", num_parallel_workers=8)

    # apply batch operations
    data_set = data_set.shuffle(buffer_size=10)
    data_set = data_set.batch(batch_size, drop_remainder=False)
    # apply dataset repeat operation
    data_set = data_set.repeat(repeat_num)

    return data_set
Ejemplo n.º 24
0
def test_cifar_sampler_chain():
    """
    Test Cifar sampler chain
    """
    logger.info("test_cifar_sampler_chain")

    sampler = ds.DistributedSampler(num_shards=2,
                                    shard_id=0,
                                    shuffle=False,
                                    num_samples=5)
    child_sampler = ds.RandomSampler(replacement=True, num_samples=4)
    child_sampler2 = ds.SequentialSampler(start_index=0, num_samples=2)
    child_sampler.add_child(child_sampler2)
    sampler.add_child(child_sampler)
    data1 = ds.Cifar10Dataset(CIFAR10_DATA_DIR, sampler=sampler)
    # Verify dataset size
    data1_size = data1.get_dataset_size()
    logger.info("dataset size is: {}".format(data1_size))
    assert data1_size == 1

    # Verify number of rows
    assert sum([1 for _ in data1]) == 1

    # Verify dataset contents
    res = []
    for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True):
        logger.info("item: {}".format(item))
        res.append(item)
    logger.info("dataset: {}".format(res))
Ejemplo n.º 25
0
def create_dataset(data_home, repeat_num=1, batch_size=32, do_train=True, device_target="CPU"):
    """
    create data for next use such as training or inferring
    """

    cifar_ds = ds.Cifar10Dataset(data_home,num_parallel_workers=8, shuffle=True)

    c_trans = []
    if do_train:
        c_trans += [
            C.RandomCrop((32, 32), (4, 4, 4, 4)),
            C.RandomHorizontalFlip(prob=0.5)
        ]

    c_trans += [
        C.Resize((224, 224)),
        C.Rescale(1.0 / 255.0, 0.0),
        C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]),
        C.HWC2CHW()
    ]

    type_cast_op = C2.TypeCast(mstype.int32)

    cifar_ds = cifar_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
    cifar_ds = cifar_ds.map(operations=c_trans, input_columns="image", num_parallel_workers=8)

    cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True)
    cifar_ds = cifar_ds.repeat(repeat_num)

    return cifar_ds
Ejemplo n.º 26
0
def test_get_column_name_zip():
    data1 = ds.Cifar10Dataset(CIFAR10_DIR)
    assert data1.get_col_names() == ["image", "label"]
    data2 = ds.CSVDataset(CSV_DIR)
    assert data2.get_col_names() == ["1", "2", "3", "4"]
    data = ds.zip((data1, data2))
    assert data.get_col_names() == ["image", "label", "1", "2", "3", "4"]
Ejemplo n.º 27
0
def test_cifar10():
    """
    dataset parameter
    """
    logger.info("Test dataset parameter")
    data_dir_10 = "../data/dataset/testCifar10Data"
    num_repeat = 2
    batch_size = 32
    limit_dataset = 100
    # apply dataset operations
    data1 = ds.Cifar10Dataset(data_dir_10, num_samples=limit_dataset)
    data1 = data1.repeat(num_repeat)
    data1 = data1.batch(batch_size, True)
    num_epoch = 5
    # iter1 will always assume there is a next epoch and never shutdown.
    iter1 = data1.create_tuple_iterator()
    epoch_count = 0
    sample_count = 0
    for _ in range(num_epoch):
        row_count = 0
        for _ in iter1:
            # in this example, each dictionary has keys "image" and "label"
            row_count += 1
        assert row_count == int(limit_dataset * num_repeat / batch_size)
        logger.debug("row_count: ", row_count)
        epoch_count += 1
        sample_count += row_count
    assert epoch_count == num_epoch
    logger.debug("total epochs: ", epoch_count)
    assert sample_count == int(
        limit_dataset * num_repeat / batch_size) * num_epoch
    logger.debug("total sample: ", sample_count)
Ejemplo n.º 28
0
def create_dataset(data_path, batch_size=32, repeat_size=1, mode="train"):
    """
    create dataset for train or test
    """
    cifar_ds = ds.Cifar10Dataset(data_path)
    rescale = 1.0 / 255.0
    shift = 0.0

    resize_op = CV.Resize((cfg.image_height, cfg.image_width))
    rescale_op = CV.Rescale(rescale, shift)
    normalize_op = CV.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
    if mode == "train":
        random_crop_op = CV.RandomCrop([32, 32], [4, 4, 4, 4])
        random_horizontal_op = CV.RandomHorizontalFlip()
    channel_swap_op = CV.HWC2CHW()
    typecast_op = C.TypeCast(mstype.int32)
    cifar_ds = cifar_ds.map(input_columns="label", operations=typecast_op)
    if mode == "train":
        cifar_ds = cifar_ds.map(input_columns="image", operations=random_crop_op)
        cifar_ds = cifar_ds.map(input_columns="image", operations=random_horizontal_op)
    cifar_ds = cifar_ds.map(input_columns="image", operations=resize_op)
    cifar_ds = cifar_ds.map(input_columns="image", operations=rescale_op)
    cifar_ds = cifar_ds.map(input_columns="image", operations=normalize_op)
    cifar_ds = cifar_ds.map(input_columns="image", operations=channel_swap_op)

    cifar_ds = cifar_ds.shuffle(buffer_size=cfg.buffer_size)
    cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True)
    cifar_ds = cifar_ds.repeat(repeat_size)
    return cifar_ds
Ejemplo n.º 29
0
 def pad_batch_config():
     data2 = ds.Cifar10Dataset(cifar10_dir, shuffle=False, num_samples=1000)  # shape = [32,32,3]
     data2 = data2.map(operations=(lambda x: x.reshape(-1)), input_columns="image")  # reshape to 1d
     data2 = data2.batch(batch_size=25, drop_remainder=True, pad_info={"image": ([3888], 0)})
     res = []
     for data in data2.create_dict_iterator(num_epochs=1, output_numpy=True):
         res.append(data["image"])
     return res
Ejemplo n.º 30
0
def test_cifar10_dataset_size():
    ds_total = ds.Cifar10Dataset(CIFAR10_DATA_DIR)
    assert ds_total.get_dataset_size() == 10000

    # test get_dataset_size with usage flag
    train_size = ds.Cifar100Dataset(CIFAR100_DATA_DIR,
                                    usage="train").get_dataset_size()
    assert train_size == 0
    train_size = ds.Cifar10Dataset(CIFAR10_DATA_DIR,
                                   usage="train").get_dataset_size()
    assert train_size == 10000

    all_size = ds.Cifar10Dataset(CIFAR10_DATA_DIR,
                                 usage="all").get_dataset_size()
    assert all_size == 10000

    ds_shard_1_0 = ds.Cifar10Dataset(CIFAR10_DATA_DIR,
                                     num_shards=1,
                                     shard_id=0)
    assert ds_shard_1_0.get_dataset_size() == 10000

    ds_shard_2_0 = ds.Cifar10Dataset(CIFAR10_DATA_DIR,
                                     num_shards=2,
                                     shard_id=0)
    assert ds_shard_2_0.get_dataset_size() == 5000

    ds_shard_3_0 = ds.Cifar10Dataset(CIFAR10_DATA_DIR,
                                     num_shards=3,
                                     shard_id=0)
    assert ds_shard_3_0.get_dataset_size() == 3334

    ds_shard_7_0 = ds.Cifar10Dataset(CIFAR10_DATA_DIR,
                                     num_shards=7,
                                     shard_id=0)
    assert ds_shard_7_0.get_dataset_size() == 1429