Beispiel #1
0
def test_to_type_02():
    """
    Test ToType Op with md5 comparison: valid input (str)
    Expect to pass
    """
    logger.info("test_to_type_02")
    # Generate dataset
    data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    transforms = [
        py_vision.Decode(),
        py_vision.ToTensor(),
        # Note: Convert to type int
        py_vision.ToType('int')
    ]
    transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
    data = data.map(operations=transform, input_columns=["image"])

    # Compare with expected md5 from images
    filename = "to_type_02_result.npz"
    save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)
Beispiel #2
0
def test_to_type_03():
    """
    Test ToType Op: invalid input image type
    Expect to raise error
    """
    logger.info("test_to_type_03")

    try:
        # Generate dataset
        data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
        transforms = [
            py_vision.Decode(),
            # Note: If the object is not numpy, e.g. PIL image, TypeError will raise
            py_vision.ToType(np.int32)
        ]
        transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
        data = data.map(operations=transform, input_columns=["image"])
    except Exception as e:
        logger.info("Got an exception in DE: {}".format(str(e)))
        assert "Numpy" in str(e)
Beispiel #3
0
def test_to_type_05():
    """
    Test ToType Op: invalid output_type
    Expect to raise error
    """
    logger.info("test_to_type_05")

    try:
        # Generate dataset
        data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
        transforms = [
            py_vision.Decode(),
            py_vision.ToTensor(),
            # Note: if output_type is not explicitly given
            py_vision.ToType('invalid')
        ]
        transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
        data = data.map(operations=transform, input_columns=["image"])
    except Exception as e:
        logger.info("Got an exception in DE: {}".format(str(e)))
        assert "data type" in str(e)
Beispiel #4
0
def get_de_dataset(args):
    '''get_de_dataset'''
    lbl_transforms = [F.ToType(np.int32)]
    transform_label = F2.Compose(lbl_transforms)

    drop_remainder = False

    transforms = [
        F.ToPIL(),
        F.RandomHorizontalFlip(),
        F.ToTensor(),
        F.Normalize(mean=[0.5], std=[0.5])
    ]
    transform = F2.Compose(transforms)
    cache_path = os.path.join('cache', os.path.basename(args.data_dir),
                              'data_cache.pkl')
    print(cache_path)
    if not os.path.exists(os.path.dirname(cache_path)):
        os.makedirs(os.path.dirname(cache_path))
    dataset = CustomDataset(args.data_dir, cache_path, args.is_distributed)
    args.logger.info("dataset len:{}".format(dataset.__len__()))
    sampler = DistributedCustomSampler(dataset,
                                       num_replicas=args.world_size,
                                       rank=args.local_rank,
                                       is_distributed=args.is_distributed)
    de_dataset = de.GeneratorDataset(dataset, ["image", "label"],
                                     sampler=sampler)
    args.logger.info("after sampler de_dataset datasize :{}".format(
        de_dataset.get_dataset_size()))
    de_dataset = de_dataset.map(input_columns="image", operations=transform)
    de_dataset = de_dataset.map(input_columns="label",
                                operations=transform_label)
    de_dataset = de_dataset.project(columns=["image", "label"])
    de_dataset = de_dataset.batch(args.per_batch_size,
                                  drop_remainder=drop_remainder)
    num_iter_per_npu = math.ceil(
        len(dataset) * 1.0 / args.world_size / args.per_batch_size)
    num_classes = len(dataset.classes)

    return de_dataset, num_iter_per_npu, num_classes
Beispiel #5
0
def test_to_type_op():
    """
    Test ToType Op
    """
    logger.info("test_to_type_op")

    # First dataset
    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    transforms1 = [
        py_vision.Decode(),
        py_vision.ToTensor(),
        # Note: Convert the datatype from float32 to int16
        py_vision.ToType(np.int16)
    ]
    transform1 = mindspore.dataset.transforms.py_transforms.Compose(transforms1)
    data1 = data1.map(operations=transform1, input_columns=["image"])

    # Second dataset
    data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    transforms2 = [
        py_vision.Decode(),
        py_vision.ToTensor()
    ]
    transform2 = mindspore.dataset.transforms.py_transforms.Compose(transforms2)
    data2 = data2.map(operations=transform2, input_columns=["image"])

    for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True),
                            data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
        image1 = item1["image"]
        image2 = item2["image"]

        assert isinstance(image1, np.ndarray)
        assert isinstance(image2, np.ndarray)
        assert image1.dtype == np.int16
        assert image2.dtype == np.float32
        assert image1.shape == image2.shape