def test_cifar100_exception(): """ Test error cases for Cifar100Dataset """ logger.info("Test error cases for Cifar100Dataset") error_msg_1 = "sampler and shuffle cannot be specified at the same time" with pytest.raises(RuntimeError, match=error_msg_1): ds.Cifar100Dataset(DATA_DIR_100, shuffle=False, sampler=ds.PKSampler(3)) error_msg_2 = "sampler and sharding cannot be specified at the same time" with pytest.raises(RuntimeError, match=error_msg_2): ds.Cifar100Dataset(DATA_DIR_100, sampler=ds.PKSampler(3), num_shards=2, shard_id=0) error_msg_3 = "num_shards is specified and currently requires shard_id as well" with pytest.raises(RuntimeError, match=error_msg_3): ds.Cifar100Dataset(DATA_DIR_100, num_shards=10) error_msg_4 = "shard_id is specified but num_shards is not" with pytest.raises(RuntimeError, match=error_msg_4): ds.Cifar100Dataset(DATA_DIR_100, shard_id=0) error_msg_5 = "Input shard_id is not within the required interval" with pytest.raises(ValueError, match=error_msg_5): ds.Cifar100Dataset(DATA_DIR_100, num_shards=2, shard_id=-1) with pytest.raises(ValueError, match=error_msg_5): ds.Cifar10Dataset(DATA_DIR_100, num_shards=2, shard_id=5) error_msg_6 = "num_parallel_workers exceeds" with pytest.raises(ValueError, match=error_msg_6): ds.Cifar100Dataset(DATA_DIR_100, shuffle=False, num_parallel_workers=0) with pytest.raises(ValueError, match=error_msg_6): ds.Cifar100Dataset(DATA_DIR_100, shuffle=False, num_parallel_workers=88)
def test_chained_sampler_06(): logger.info("Test Case Chained Sampler - WeightedRandom and PKSampler") # Create chained sampler, WeightedRandom and PKSampler weights = [1.0, 0.1, 0.02, 0.3, 0.4, 0.05, 1.2, 0.13, 0.14, 0.015, 0.16, 0.5] sampler = ds.WeightedRandomSampler(weights=weights, num_samples=12) child_sampler = ds.PKSampler(num_val=3) # Number of elements per class is 3 (and there are 4 classes) sampler.add_child(child_sampler) # Create ImageFolderDataset with sampler data1 = ds.ImageFolderDataset(DATA_DIR, sampler=sampler) # Verify dataset size data1_size = data1.get_dataset_size() logger.info("dataset size is: {}".format(data1_size)) assert data1_size == 12 # Verify number of iterations num_iter = 0 for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary # in this example, each dictionary has keys "image" and "label" logger.info("image is {}".format(item["image"])) logger.info("label is {}".format(item["label"])) num_iter += 1 logger.info("Number of data in data1: {}".format(num_iter)) # Note: WeightedRandomSampler produces 12 samples # Note: Child PKSampler produces 12 samples assert num_iter == 12
def test_coco_case_exception(): try: data1 = ds.CocoDataset("path_not_exist/", annotation_file=ANNOTATION_FILE, task="Detection") for _ in data1.__iter__(): pass assert False except ValueError as e: assert "does not exist or permission denied" in str(e) try: data1 = ds.CocoDataset(DATA_DIR, annotation_file="./file_not_exist", task="Detection") for _ in data1.__iter__(): pass assert False except ValueError as e: assert "does not exist or permission denied" in str(e) try: data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Invalid task") for _ in data1.__iter__(): pass assert False except ValueError as e: assert "Invalid task type" in str(e) try: data1 = ds.CocoDataset(DATA_DIR, annotation_file=LACKOFIMAGE_FILE, task="Detection") for _ in data1.__iter__(): pass assert False except RuntimeError as e: assert "Invalid node found in json" in str(e) try: data1 = ds.CocoDataset(DATA_DIR, annotation_file=INVALID_CATEGORY_ID_FILE, task="Detection") for _ in data1.__iter__(): pass assert False except RuntimeError as e: assert "category_id can't find in categories" in str(e) try: data1 = ds.CocoDataset(DATA_DIR, annotation_file=INVALID_FILE, task="Detection") for _ in data1.__iter__(): pass assert False except RuntimeError as e: assert "json.exception.parse_error" in str(e) try: sampler = ds.PKSampler(3) data1 = ds.CocoDataset(DATA_DIR, annotation_file=INVALID_FILE, task="Detection", sampler=sampler) for _ in data1.__iter__(): pass assert False except ValueError as e: assert "CocoDataset doesn't support PKSampler" in str(e)
def test_cv_minddataset_pk_sample_error_class_column(): create_cv_mindrecord(1) columns_list = ["data", "file_name", "label"] num_readers = 4 sampler = ds.PKSampler(5, None, True, 'no_exsit_column') with pytest.raises(Exception, match="MindRecordOp launch failed"): data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, sampler=sampler) num_iter = 0 for _ in data_set.create_dict_iterator(): num_iter += 1 os.remove(CV_FILE_NAME) os.remove("{}.db".format(CV_FILE_NAME))
def test_cifar10_pk_sampler_get_dataset_size(): """ Test Cifar10Dataset with PKSampler and get_dataset_size """ sampler = ds.PKSampler(3) data = ds.Cifar10Dataset(DATA_DIR_10, sampler=sampler) num_iter = 0 ds_sz = data.get_dataset_size() for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True): num_iter += 1 assert ds_sz == num_iter == 30
def test_cv_minddataset_pk_sample_exclusive_shuffle(): create_cv_mindrecord(1) columns_list = ["data", "file_name", "label"] num_readers = 4 sampler = ds.PKSampler(2) with pytest.raises(Exception, match="sampler and shuffle cannot be specified at the same time."): data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, sampler=sampler, shuffle=False) num_iter = 0 for _ in data_set.create_dict_iterator(): num_iter += 1 os.remove(CV_FILE_NAME) os.remove("{}.db".format(CV_FILE_NAME))
def test_cifar10_with_chained_sampler_get_dataset_size(): """ Test Cifar10Dataset with PKSampler chained with a SequentialSampler and get_dataset_size """ sampler = ds.SequentialSampler(start_index=0, num_samples=5) child_sampler = ds.PKSampler(4) sampler.add_child(child_sampler) data = ds.Cifar10Dataset(DATA_DIR_10, sampler=sampler) num_iter = 0 ds_sz = data.get_dataset_size() for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True): num_iter += 1 assert ds_sz == num_iter == 5
def test_cifar100_pk_sampler(): """ Test Cifar100Dataset with PKSampler """ logger.info("Test Cifar100Dataset with PKSampler") golden = [i for i in range(20)] sampler = ds.PKSampler(1) data = ds.Cifar100Dataset(DATA_DIR_100, sampler=sampler) num_iter = 0 label_list = [] for item in data.create_dict_iterator(): label_list.append(item["coarse_label"]) num_iter += 1 np.testing.assert_array_equal(golden, label_list) assert num_iter == 20
def test_cv_minddataset_pk_sample_no_column(add_and_remove_cv_file): """tutorial for cv minderdataset.""" num_readers = 4 sampler = ds.PKSampler(2) data_set = ds.MindDataset(CV_FILE_NAME + "0", None, num_readers, sampler=sampler) assert data_set.get_dataset_size() == 6 num_iter = 0 for item in data_set.create_dict_iterator(): logger.info("-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info("-------------- item[file_name]: \ {}------------------------".format("".join([chr(x) for x in item["file_name"]]))) logger.info("-------------- item[label]: {} ----------------------------".format(item["label"])) num_iter += 1
def test_cifar10_pk_sampler(): """ Test Cifar10Dataset with PKSampler """ logger.info("Test Cifar10Dataset Op with PKSampler") golden = [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9] sampler = ds.PKSampler(3) data = ds.Cifar10Dataset(DATA_DIR_10, sampler=sampler) num_iter = 0 label_list = [] for item in data.create_dict_iterator(): label_list.append(item["label"]) num_iter += 1 np.testing.assert_array_equal(golden, label_list) assert num_iter == 30
def test_mnist_pk_sampler(): """ Test MnistDataset with PKSampler """ logger.info("Test MnistDataset Op with PKSampler") golden = [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9] sampler = ds.PKSampler(3) data = ds.MnistDataset(DATA_DIR, sampler=sampler) num_iter = 0 label_list = [] for item in data.create_dict_iterator(num_epochs=1, output_numpy=True): label_list.append(item["label"]) num_iter += 1 np.testing.assert_array_equal(golden, label_list) assert num_iter == 30
def test_cv_minddataset_pk_sample_out_of_range(add_and_remove_cv_file): """tutorial for cv minderdataset.""" columns_list = ["data", "file_name", "label"] num_readers = 4 sampler = ds.PKSampler(5, None, True) data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, sampler=sampler) assert data_set.get_dataset_size() == 15 num_iter = 0 for item in data_set.create_dict_iterator(): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info("-------------- item[file_name]: \ {}------------------------".format(to_str(item["file_name"]))) logger.info( "-------------- item[label]: {} ----------------------------".format(item["label"])) num_iter += 1
def test_pk_sampler(): logger.info("Test Case PKSampler") # define parameters repeat_count = 1 # apply dataset operations sampler = ds.PKSampler(3) data1 = ds.ImageFolderDatasetV2(DATA_DIR, sampler=sampler) data1 = data1.repeat(repeat_count) num_iter = 0 for item in data1.create_dict_iterator(): # each data is a dictionary # in this example, each dictionary has keys "image" and "label" logger.info("image is {}".format(item["image"])) logger.info("label is {}".format(item["label"])) num_iter += 1 logger.info("Number of data in data1: {}".format(num_iter)) assert num_iter == 12
def test_imagefolder_sampler_chain(): """ Test ImageFolderDataset sampler chain """ logger.info("test_imagefolder_sampler_chain") sampler = ds.SequentialSampler(start_index=1, num_samples=3) child_sampler = ds.PKSampler(2) sampler.add_child(child_sampler) data1 = ds.ImageFolderDataset(IMAGENET_RAWDATA_DIR, sampler=sampler) # Verify dataset size data1_size = data1.get_dataset_size() logger.info("dataset size is: {}".format(data1_size)) assert data1_size == 3 # Verify number of rows assert sum([1 for _ in data1]) == 3 # Verify dataset contents res = [] for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True): logger.info("item: {}".format(item)) res.append(item) logger.info("dataset: {}".format(res))
def test_mnist_exception(): """ Test error cases for MnistDataset """ logger.info("Test error cases for MnistDataset") error_msg_1 = "sampler and shuffle cannot be specified at the same time" with pytest.raises(RuntimeError, match=error_msg_1): ds.MnistDataset(DATA_DIR, shuffle=False, sampler=ds.PKSampler(3)) error_msg_2 = "sampler and sharding cannot be specified at the same time" with pytest.raises(RuntimeError, match=error_msg_2): ds.MnistDataset(DATA_DIR, sampler=ds.PKSampler(3), num_shards=2, shard_id=0) error_msg_3 = "num_shards is specified and currently requires shard_id as well" with pytest.raises(RuntimeError, match=error_msg_3): ds.MnistDataset(DATA_DIR, num_shards=10) error_msg_4 = "shard_id is specified but num_shards is not" with pytest.raises(RuntimeError, match=error_msg_4): ds.MnistDataset(DATA_DIR, shard_id=0) error_msg_5 = "Input shard_id is not within the required interval" with pytest.raises(ValueError, match=error_msg_5): ds.MnistDataset(DATA_DIR, num_shards=5, shard_id=-1) with pytest.raises(ValueError, match=error_msg_5): ds.MnistDataset(DATA_DIR, num_shards=5, shard_id=5) with pytest.raises(ValueError, match=error_msg_5): ds.MnistDataset(DATA_DIR, num_shards=2, shard_id=5) error_msg_6 = "num_parallel_workers exceeds" with pytest.raises(ValueError, match=error_msg_6): ds.MnistDataset(DATA_DIR, shuffle=False, num_parallel_workers=0) with pytest.raises(ValueError, match=error_msg_6): ds.MnistDataset(DATA_DIR, shuffle=False, num_parallel_workers=256) with pytest.raises(ValueError, match=error_msg_6): ds.MnistDataset(DATA_DIR, shuffle=False, num_parallel_workers=-2) error_msg_7 = "Argument shard_id" with pytest.raises(TypeError, match=error_msg_7): ds.MnistDataset(DATA_DIR, num_shards=2, shard_id="0") def exception_func(item): raise Exception("Error occur!") error_msg_8 = "The corresponding data files" with pytest.raises(RuntimeError, match=error_msg_8): data = ds.MnistDataset(DATA_DIR) data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) for _ in data.__iter__(): pass with pytest.raises(RuntimeError, match=error_msg_8): data = ds.MnistDataset(DATA_DIR) data = data.map(operations=vision.Decode(), input_columns=["image"], num_parallel_workers=1) data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) for _ in data.__iter__(): pass with pytest.raises(RuntimeError, match=error_msg_8): data = ds.MnistDataset(DATA_DIR) data = data.map(operations=exception_func, input_columns=["label"], num_parallel_workers=1) for _ in data.__iter__(): pass
def test_coco_case_exception(): try: data1 = ds.CocoDataset("path_not_exist/", annotation_file=ANNOTATION_FILE, task="Detection") for _ in data1.__iter__(): pass assert False except ValueError as e: assert "does not exist or permission denied" in str(e) try: data1 = ds.CocoDataset(DATA_DIR, annotation_file="./file_not_exist", task="Detection") for _ in data1.__iter__(): pass assert False except ValueError as e: assert "does not exist or permission denied" in str(e) try: data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Invalid task") for _ in data1.__iter__(): pass assert False except ValueError as e: assert "Invalid task type" in str(e) try: data1 = ds.CocoDataset(DATA_DIR, annotation_file=LACKOFIMAGE_FILE, task="Detection") for _ in data1.__iter__(): pass assert False except RuntimeError as e: assert "invalid node found in json" in str(e) try: data1 = ds.CocoDataset(DATA_DIR, annotation_file=INVALID_CATEGORY_ID_FILE, task="Detection") for _ in data1.__iter__(): pass assert False except RuntimeError as e: assert "category_id can't find in categories" in str(e) try: data1 = ds.CocoDataset(DATA_DIR, annotation_file=INVALID_FILE, task="Detection") for _ in data1.__iter__(): pass assert False except RuntimeError as e: assert "json.exception.parse_error" in str(e) try: sampler = ds.PKSampler(3) data1 = ds.CocoDataset(DATA_DIR, annotation_file=INVALID_FILE, task="Detection", sampler=sampler) for _ in data1.__iter__(): pass assert False except ValueError as e: assert "CocoDataset doesn't support PKSampler" in str(e) def exception_func(item): raise Exception("Error occur!") try: data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection") data1 = data1.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) for _ in data1.__iter__(): pass assert False except RuntimeError as e: assert "map operation: [PyFunc] failed. The corresponding data files" in str( e) try: data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection") data1 = data1.map(operations=vision.Decode(), input_columns=["image"], num_parallel_workers=1) data1 = data1.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) for _ in data1.__iter__(): pass assert False except RuntimeError as e: assert "map operation: [PyFunc] failed. The corresponding data files" in str( e) try: data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection") data1 = data1.map(operations=exception_func, input_columns=["bbox"], num_parallel_workers=1) for _ in data1.__iter__(): pass assert False except RuntimeError as e: assert "map operation: [PyFunc] failed. The corresponding data files" in str( e) try: data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Detection") data1 = data1.map(operations=exception_func, input_columns=["category_id"], num_parallel_workers=1) for _ in data1.__iter__(): pass assert False except RuntimeError as e: assert "map operation: [PyFunc] failed. The corresponding data files" in str( e) try: data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Stuff") data1 = data1.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) for _ in data1.__iter__(): pass assert False except RuntimeError as e: assert "map operation: [PyFunc] failed. The corresponding data files" in str( e) try: data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Stuff") data1 = data1.map(operations=vision.Decode(), input_columns=["image"], num_parallel_workers=1) data1 = data1.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) for _ in data1.__iter__(): pass assert False except RuntimeError as e: assert "map operation: [PyFunc] failed. The corresponding data files" in str( e) try: data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Stuff") data1 = data1.map(operations=exception_func, input_columns=["segmentation"], num_parallel_workers=1) for _ in data1.__iter__(): pass assert False except RuntimeError as e: assert "map operation: [PyFunc] failed. The corresponding data files" in str( e) try: data1 = ds.CocoDataset(DATA_DIR, annotation_file=ANNOTATION_FILE, task="Stuff") data1 = data1.map(operations=exception_func, input_columns=["iscrowd"], num_parallel_workers=1) for _ in data1.__iter__(): pass assert False except RuntimeError as e: assert "map operation: [PyFunc] failed. The corresponding data files" in str( e) try: data1 = ds.CocoDataset(DATA_DIR, annotation_file=KEYPOINT_FILE, task="Keypoint") data1 = data1.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) for _ in data1.__iter__(): pass assert False except RuntimeError as e: assert "map operation: [PyFunc] failed. The corresponding data files" in str( e) try: data1 = ds.CocoDataset(DATA_DIR, annotation_file=KEYPOINT_FILE, task="Keypoint") data1 = data1.map(operations=vision.Decode(), input_columns=["image"], num_parallel_workers=1) data1 = data1.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) for _ in data1.__iter__(): pass assert False except RuntimeError as e: assert "map operation: [PyFunc] failed. The corresponding data files" in str( e) try: data1 = ds.CocoDataset(DATA_DIR, annotation_file=KEYPOINT_FILE, task="Keypoint") data1 = data1.map(operations=exception_func, input_columns=["keypoints"], num_parallel_workers=1) for _ in data1.__iter__(): pass assert False except RuntimeError as e: assert "map operation: [PyFunc] failed. The corresponding data files" in str( e) try: data1 = ds.CocoDataset(DATA_DIR, annotation_file=KEYPOINT_FILE, task="Keypoint") data1 = data1.map(operations=exception_func, input_columns=["num_keypoints"], num_parallel_workers=1) for _ in data1.__iter__(): pass assert False except RuntimeError as e: assert "map operation: [PyFunc] failed. The corresponding data files" in str( e) try: data1 = ds.CocoDataset(DATA_DIR, annotation_file=PANOPTIC_FILE, task="Panoptic") data1 = data1.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) for _ in data1.__iter__(): pass assert False except RuntimeError as e: assert "map operation: [PyFunc] failed. The corresponding data files" in str( e) try: data1 = ds.CocoDataset(DATA_DIR, annotation_file=PANOPTIC_FILE, task="Panoptic") data1 = data1.map(operations=vision.Decode(), input_columns=["image"], num_parallel_workers=1) data1 = data1.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) for _ in data1.__iter__(): pass assert False except RuntimeError as e: assert "map operation: [PyFunc] failed. The corresponding data files" in str( e) try: data1 = ds.CocoDataset(DATA_DIR, annotation_file=PANOPTIC_FILE, task="Panoptic") data1 = data1.map(operations=exception_func, input_columns=["bbox"], num_parallel_workers=1) for _ in data1.__iter__(): pass assert False except RuntimeError as e: assert "map operation: [PyFunc] failed. The corresponding data files" in str( e) try: data1 = ds.CocoDataset(DATA_DIR, annotation_file=PANOPTIC_FILE, task="Panoptic") data1 = data1.map(operations=exception_func, input_columns=["category_id"], num_parallel_workers=1) for _ in data1.__iter__(): pass assert False except RuntimeError as e: assert "map operation: [PyFunc] failed. The corresponding data files" in str( e) try: data1 = ds.CocoDataset(DATA_DIR, annotation_file=PANOPTIC_FILE, task="Panoptic") data1 = data1.map(operations=exception_func, input_columns=["area"], num_parallel_workers=1) for _ in data1.__iter__(): pass assert False except RuntimeError as e: assert "map operation: [PyFunc] failed. The corresponding data files" in str( e)