Example #1
0
def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
    """
    Create a train or eval dataset.

    Args:
        dataset_path (str): The path of dataset.
        do_train (bool): Whether dataset is used for train or eval.
        repeat_num (int): The repeat times of dataset. Default: 1.
        batch_size (int): The batch size of dataset. Default: 32.

    Returns:
        Dataset.
    """
    if do_train:
        dataset_path = os.path.join(dataset_path, 'train')
        do_shuffle = True
    else:
        dataset_path = os.path.join(dataset_path, 'eval')
        do_shuffle = False

    if device_num == 1 or not do_train:
        ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=do_shuffle)
    else:
        ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=do_shuffle,
                               num_shards=device_num, shard_id=device_id)

    resize_height = 224
    resize_width = 224
    buffer_size = 100
    rescale = 1.0 / 255.0
    shift = 0.0

    # define map operations
    random_crop_op = C.RandomCrop((32, 32), (4, 4, 4, 4))
    random_horizontal_flip_op = C.RandomHorizontalFlip(device_id / (device_id + 1))

    resize_op = C.Resize((resize_height, resize_width))
    rescale_op = C.Rescale(rescale, shift)
    normalize_op = C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])

    change_swap_op = C.HWC2CHW()

    trans = []
    if do_train:
        trans += [random_crop_op, random_horizontal_flip_op]

    trans += [resize_op, rescale_op, normalize_op, change_swap_op]

    type_cast_op = C2.TypeCast(mstype.int32)

    ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op)
    ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans)

    # apply batch operations
    ds = ds.batch(batch_size, drop_remainder=True)

    # apply dataset repeat operation
    ds = ds.repeat(repeat_num)

    return ds
Example #2
0
def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
    """
    create a train or eval dataset

    Args:
        dataset_path(string): the path of dataset.
        do_train(bool): whether dataset is used for train or eval.
        repeat_num(int): the repeat times of dataset. Default: 1
        batch_size(int): the batch size of dataset. Default: 32

    Returns:
        dataset
    """
    device_num = int(os.getenv("DEVICE_NUM"))
    rank_id = int(os.getenv("RANK_ID"))

    if device_num == 1:
        ds = de.Cifar10Dataset(dataset_path,
                               num_parallel_workers=8,
                               shuffle=True)
    else:
        ds = de.Cifar10Dataset(dataset_path,
                               num_parallel_workers=8,
                               shuffle=True,
                               num_shards=device_num,
                               shard_id=rank_id)

    # define map operations
    trans = []
    if do_train:
        trans += [
            C.RandomCrop((32, 32), (4, 4, 4, 4)),
            C.RandomHorizontalFlip(prob=0.5)
        ]

    trans += [
        C.Resize((config.image_height, config.image_width)),
        C.Rescale(1.0 / 255.0, 0.0),
        C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]),
        C.HWC2CHW()
    ]

    type_cast_op = C2.TypeCast(mstype.int32)

    ds = ds.map(input_columns="label",
                num_parallel_workers=8,
                operations=type_cast_op)
    ds = ds.map(input_columns="image",
                num_parallel_workers=8,
                operations=trans)

    # apply batch operations
    ds = ds.batch(batch_size, drop_remainder=True)

    # apply dataset repeat operation
    ds = ds.repeat(repeat_num)

    return ds
Example #3
0
def create_dataset1(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"):
    """
    create a train or evaluate cifar10 dataset for resnet50
    Args:
        dataset_path(string): the path of dataset.
        do_train(bool): whether dataset is used for train or eval.
        repeat_num(int): the repeat times of dataset. Default: 1
        batch_size(int): the batch size of dataset. Default: 32
        target(str): the device target. Default: Ascend

    Returns:
        dataset
    """
    if target == "Ascend":
        device_num, rank_id = _get_rank_info()
    else:
        init()
        rank_id = get_rank()
        device_num = get_group_size()

    if device_num == 1:
        ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=True)
    else:
        ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=True,
                               num_shards=device_num, shard_id=rank_id)

    # define map operations
    trans = []
    if do_train:
        trans += [
            C.RandomCrop((32, 32), (4, 4, 4, 4)),
            C.RandomHorizontalFlip(prob=0.5)
        ]

    trans += [
        C.Resize((224, 224)),
        C.Rescale(1.0 / 255.0, 0.0),
        C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]),
        C.HWC2CHW()
    ]

    type_cast_op = C2.TypeCast(mstype.int32)

    ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
    ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)

    # apply batch operations
    ds = ds.batch(batch_size, drop_remainder=True)
    # apply dataset repeat operation
    ds = ds.repeat(repeat_num)

    return ds
Example #4
0
def test_cpp_uniform_augment_random_crop_badinput(num_ops=1):
    """
    Test UniformAugment with greater crop size
    """
    logger.info("Test CPP UniformAugment with random_crop bad input")
    batch_size = 2
    cifar10_dir = "../data/dataset/testCifar10Data"
    ds1 = de.Cifar10Dataset(cifar10_dir, shuffle=False)  # shape = [32,32,3]

    transforms_ua = [
        # Note: crop size [224, 224] > image size [32, 32]
        C.RandomCrop(size=[224, 224]),
        C.RandomHorizontalFlip()
    ]
    uni_aug = C.UniformAugment(operations=transforms_ua, num_ops=num_ops)
    ds1 = ds1.map(input_columns="image", operations=uni_aug)

    # apply DatasetOps
    ds1 = ds1.batch(batch_size, drop_remainder=True, num_parallel_workers=1)
    num_batches = 0
    try:
        for _ in ds1.create_dict_iterator():
            num_batches += 1
    except Exception as e:
        assert "Crop size" in str(e)
Example #5
0
def create_dataset(data_path, batch_size):
    ds = de.Cifar10Dataset(
        data_path,
        num_parallel_workers=8,
        shuffle=False,
    )

    # define map operations
    trans = []
    # if do_train:
    #     trans += [
    #         # C.RandomCrop((32, 32), (4, 4, 4, 4)),
    #         # C.RandomHorizontalFlip(prob=0.5)
    #     ]

    trans += [
        C.Resize((224, 224)),
        C.Rescale(1.0 / 255.0, 0.0),
        C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]),
        C.HWC2CHW()
    ]

    type_cast_op = C2.TypeCast(mstype.int32)

    ds = ds.map(operations=type_cast_op,
                input_columns="label",
                num_parallel_workers=8)
    ds = ds.map(operations=trans,
                input_columns="image",
                num_parallel_workers=8)

    # apply batch operations
    ds = ds.batch(batch_size, drop_remainder=True)

    return ds
Example #6
0
def create_dataset(args, data_path, batch_size):
    if args.mode == 'train' and args.use_kungfu:
        rank = kfops.kungfu_current_rank()
        size = kfops.kungfu_current_cluster_size()
        ds = de.Cifar10Dataset(
            data_path,
            num_parallel_workers=8,
            shuffle=False,
            num_shards=size,
            shard_id=rank,
        )
        print('using shard %d of %d' % (rank, size))
    else:
        ds = de.Cifar10Dataset(
            data_path,
            num_parallel_workers=8,
            shuffle=False,
        )

    # define map operations
    trans = []
    # if do_train:
    #     trans += [
    #         # C.RandomCrop((32, 32), (4, 4, 4, 4)),
    #         # C.RandomHorizontalFlip(prob=0.5)
    #     ]

    trans += [
        C.Rescale(1.0 / 255.0, 0.0),
        C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]),
        C.HWC2CHW()
    ]

    type_cast_op = C2.TypeCast(mstype.int32)

    ds = ds.map(operations=type_cast_op,
                input_columns="label",
                num_parallel_workers=8)
    ds = ds.map(operations=trans,
                input_columns="image",
                num_parallel_workers=8)

    # apply batch operations
    ds = ds.batch(batch_size, drop_remainder=True)

    return ds
Example #7
0
def create_dataset(data_path, is_train=True, batch_size=32):
    # import
    import mindspore.common.dtype as mstype
    import mindspore.dataset.engine as de
    import mindspore.dataset.transforms.c_transforms as C2
    import mindspore.dataset.vision.c_transforms as C
    from mindspore.common import set_seed

    set_seed(1)

    # shard
    num_shards = shard_id = None
    rand_size = os.getenv("RANK_SIZE")
    rand_id = os.getenv("RANK_ID")
    if rand_size is not None and rand_id is not None:
        num_shards = int(rand_size)
        shard_id = int(rand_id)

    # define dataset
    data_path = os.path.join(
        data_path,
        "cifar-10-batches-bin" if is_train else "cifar-10-verify-bin")
    ds = de.Cifar10Dataset(data_path,
                           shuffle=True,
                           num_shards=num_shards,
                           shard_id=shard_id,
                           num_parallel_workers=8,
                           num_samples=None)

    # define ops
    comps_ops = list()

    # train or val
    if is_train:
        comps_ops.append(C.RandomCrop((32, 32), (4, 4, 4, 4)))
        comps_ops.append(C.RandomHorizontalFlip(prob=0.5))

    comps_ops.append(C.Resize((224, 224)))
    comps_ops.append(C.Rescale(1 / 255.0, 0.))
    comps_ops.append(
        C.Normalize(mean=[0.4914, 0.4822, 0.4465],
                    std=[0.2023, 0.1994, 0.2010]))
    comps_ops.append(C.HWC2CHW())

    # map ops
    ds = ds.map(input_columns=["image"],
                operations=comps_ops,
                num_parallel_workers=8)
    ds = ds.map(input_columns=["label"],
                operations=C2.TypeCast(mstype.int32),
                num_parallel_workers=8)

    # batch & repeat
    ds = ds.batch(batch_size=batch_size, drop_remainder=is_train)
    ds = ds.repeat(count=1)

    return ds
def create_dataset(dataset_path, do_train, config, repeat_num=1):
    """
    Create a train or eval dataset.

    Args:
        dataset_path (string): The path of dataset.
        do_train (bool): Whether dataset is used for train or eval.
        config: configuration
        repeat_num (int): The repeat times of dataset. Default: 1.
    Returns:
        Dataset.
    """
    if do_train:
        dataset_path = os.path.join(dataset_path, 'train')
        do_shuffle = True
    else:
        dataset_path = os.path.join(dataset_path, 'eval')
        do_shuffle = False

    device_id = 0
    device_num = 1
    if config.platform == "GPU":
        if config.run_distribute:
            from mindspore.communication.management import get_rank, get_group_size
            device_id = get_rank()
            device_num = get_group_size()
    elif config.platform == "Ascend":
        device_id = int(os.getenv('DEVICE_ID'))
        device_num = int(os.getenv('RANK_SIZE'))

    if device_num == 1 or not do_train:
        ds = de.Cifar10Dataset(dataset_path,
                               num_parallel_workers=4,
                               shuffle=do_shuffle)
    else:
        ds = de.Cifar10Dataset(dataset_path,
                               num_parallel_workers=4,
                               shuffle=do_shuffle,
                               num_shards=device_num,
                               shard_id=device_id)

    resize_height = config.image_height
    resize_width = config.image_width
    buffer_size = 100
    rescale = 1.0 / 255.0
    shift = 0.0

    # define map operations
    random_crop_op = C.RandomCrop((32, 32), (4, 4, 4, 4))
    random_horizontal_flip_op = C.RandomHorizontalFlip(device_id /
                                                       (device_id + 1))

    resize_op = C.Resize((resize_height, resize_width))
    rescale_op = C.Rescale(rescale, shift)
    normalize_op = C.Normalize([0.4914, 0.4822, 0.4465],
                               [0.2023, 0.1994, 0.2010])

    change_swap_op = C.HWC2CHW()

    trans = []
    if do_train:
        trans += [random_crop_op, random_horizontal_flip_op]

    trans += [resize_op, rescale_op, normalize_op, change_swap_op]

    type_cast_op = C2.TypeCast(mstype.int32)

    ds = ds.map(input_columns="label", operations=type_cast_op)
    ds = ds.map(input_columns="image", operations=trans)

    # apply shuffle operations
    ds = ds.shuffle(buffer_size=buffer_size)

    # apply batch operations
    ds = ds.batch(config.batch_size, drop_remainder=True)

    # apply dataset repeat operation
    ds = ds.repeat(repeat_num)

    return ds
Example #9
0
from mindspore import context
from mindspore.train.model import Model
from mindspore.nn.loss.loss import _Loss
from mindspore.ops import operations as P
from mindspore.ops import functional as F
from mindspore.common.tensor import Tensor
from mindspore.train.callback import Callback
from mindspore.nn.optim.momentum import Momentum
from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
from mindspore.train.serialization import load_checkpoint, load_param_into_net
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor

context.set_context(mode=context.GRAPH_MODE, enable_auto_mixed_precision=False, device_target="Ascend")

train_path = "./datasets/cifar10/train"
ds = de.Cifar10Dataset(train_path, num_parallel_workers=8, shuffle=True)
print("the cifar dataset size is :", ds.get_dataset_size())
dict1 = ds.create_dict_iterator()
datas = dict1.get_next()
image = datas["image"].asnumpy()
print("the tensor of image is:", image.shape)
plt.imshow(np.array(image))
plt.show()


def create_dataset(dataset_path, do_train, repeat_num=10, batch_size=32):
    ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=True)

    # define map operations
    trans = []
    if do_train: