def test_to_type_02(): """ Test ToType Op with md5 comparison: valid input (str) Expect to pass """ logger.info("test_to_type_02") # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) transforms = [ py_vision.Decode(), py_vision.ToTensor(), # Note: Convert to type int py_vision.ToType('int') ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) data = data.map(operations=transform, input_columns=["image"]) # Compare with expected md5 from images filename = "to_type_02_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)
def test_to_type_03(): """ Test ToType Op: invalid input image type Expect to raise error """ logger.info("test_to_type_03") try: # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) transforms = [ py_vision.Decode(), # Note: If the object is not numpy, e.g. PIL image, TypeError will raise py_vision.ToType(np.int32) ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) data = data.map(operations=transform, input_columns=["image"]) except Exception as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "Numpy" in str(e)
def test_to_type_05(): """ Test ToType Op: invalid output_type Expect to raise error """ logger.info("test_to_type_05") try: # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) transforms = [ py_vision.Decode(), py_vision.ToTensor(), # Note: if output_type is not explicitly given py_vision.ToType('invalid') ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) data = data.map(operations=transform, input_columns=["image"]) except Exception as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "data type" in str(e)
def get_de_dataset(args): '''get_de_dataset''' lbl_transforms = [F.ToType(np.int32)] transform_label = F2.Compose(lbl_transforms) drop_remainder = False transforms = [ F.ToPIL(), F.RandomHorizontalFlip(), F.ToTensor(), F.Normalize(mean=[0.5], std=[0.5]) ] transform = F2.Compose(transforms) cache_path = os.path.join('cache', os.path.basename(args.data_dir), 'data_cache.pkl') print(cache_path) if not os.path.exists(os.path.dirname(cache_path)): os.makedirs(os.path.dirname(cache_path)) dataset = CustomDataset(args.data_dir, cache_path, args.is_distributed) args.logger.info("dataset len:{}".format(dataset.__len__())) sampler = DistributedCustomSampler(dataset, num_replicas=args.world_size, rank=args.local_rank, is_distributed=args.is_distributed) de_dataset = de.GeneratorDataset(dataset, ["image", "label"], sampler=sampler) args.logger.info("after sampler de_dataset datasize :{}".format( de_dataset.get_dataset_size())) de_dataset = de_dataset.map(input_columns="image", operations=transform) de_dataset = de_dataset.map(input_columns="label", operations=transform_label) de_dataset = de_dataset.project(columns=["image", "label"]) de_dataset = de_dataset.batch(args.per_batch_size, drop_remainder=drop_remainder) num_iter_per_npu = math.ceil( len(dataset) * 1.0 / args.world_size / args.per_batch_size) num_classes = len(dataset.classes) return de_dataset, num_iter_per_npu, num_classes
def test_to_type_op(): """ Test ToType Op """ logger.info("test_to_type_op") # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) transforms1 = [ py_vision.Decode(), py_vision.ToTensor(), # Note: Convert the datatype from float32 to int16 py_vision.ToType(np.int16) ] transform1 = mindspore.dataset.transforms.py_transforms.Compose(transforms1) data1 = data1.map(operations=transform1, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) transforms2 = [ py_vision.Decode(), py_vision.ToTensor() ] transform2 = mindspore.dataset.transforms.py_transforms.Compose(transforms2) data2 = data2.map(operations=transform2, input_columns=["image"]) for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image1 = item1["image"] image2 = item2["image"] assert isinstance(image1, np.ndarray) assert isinstance(image2, np.ndarray) assert image1.dtype == np.int16 assert image2.dtype == np.float32 assert image1.shape == image2.shape