def skip_test_minddataset(add_and_remove_cv_file=True): """tutorial for cv minderdataset.""" columns_list = ["data", "file_name", "label"] num_readers = 4 indices = [1, 2, 3, 5, 7] sampler = ds.SubsetRandomSampler(indices) data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, sampler=sampler) # Serializing into python dictionary ds1_dict = ds.serialize(data_set) # Serializing into json object ds1_json = json.dumps(ds1_dict, sort_keys=True) # Reconstruct dataset pipeline from its serialized form data_set = ds.deserialize(input_dict=ds1_dict) ds2_dict = ds.serialize(data_set) # Serializing into json object ds2_json = json.dumps(ds2_dict, sort_keys=True) assert ds1_json == ds2_json _ = get_data(CV_DIR_NAME) assert data_set.get_dataset_size() == 5 num_iter = 0 for _ in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): num_iter += 1 assert num_iter == 5
def test_cv_minddataset_subset_random_sample_negative(add_and_remove_cv_file): columns_list = ["data", "file_name", "label"] num_readers = 4 indices = [1, 2, 4, -1, -2] sampler = ds.SubsetRandomSampler(indices) data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, sampler=sampler) assert data_set.get_dataset_size() == 5 num_iter = 0 for item in data_set.create_dict_iterator(): logger.info( "-------------- cv reader basic: {} ------------------------". format(num_iter)) logger.info( "-------------- item[data]: {} -----------------------------". format(item["data"])) logger.info( "-------------- item[file_name]: {} ------------------------". format(item["file_name"])) logger.info( "-------------- item[label]: {} ----------------------------". format(item["label"])) num_iter += 1 assert num_iter == 5
def skip_test_chained_sampler_08(): logger.info("Test Case Chained Sampler - SubsetRandom and Distributed, 4 shards") # Create chained sampler, subset random and distributed indices = [0, 1, 2, 3, 4, 5, 12, 13, 14, 15, 16, 11] sampler = ds.SubsetRandomSampler(indices, num_samples=12) child_sampler = ds.DistributedSampler(num_shards=4, shard_id=1) sampler.add_child(child_sampler) # Create ImageFolderDataset with sampler data1 = ds.ImageFolderDataset(DATA_DIR, sampler=sampler) # Verify dataset size data1_size = data1.get_dataset_size() logger.info("dataset size is: {}".format(data1_size)) assert data1_size == 3 # Verify number of iterations num_iter = 0 for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary # in this example, each dictionary has keys "image" and "label" logger.info("image is {}".format(item["image"])) logger.info("label is {}".format(item["label"])) num_iter += 1 logger.info("Number of data in data1: {}".format(num_iter)) # Note: SubsetRandomSampler returns 12 samples # Note: Each of 4 shards has 3 samples assert num_iter == 3
def test_cv_minddataset_subset_random_sample_out_of_range( add_and_remove_cv_file): """tutorial for cv minderdataset.""" columns_list = ["data", "file_name", "label"] num_readers = 4 indices = [1, 2, 4, 11, 13] sampler = ds.SubsetRandomSampler(indices) data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, sampler=sampler) assert data_set.get_dataset_size() == 5 num_iter = 0 for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- cv reader basic: {} ------------------------". format(num_iter)) logger.info( "-------------- item[data]: {} -----------------------------". format(item["data"])) logger.info( "-------------- item[file_name]: {} ------------------------". format(item["file_name"])) logger.info( "-------------- item[label]: {} ----------------------------". format(item["label"])) num_iter += 1 assert num_iter == 5
def test_cv_minddataset_subset_random_sample_empty(add_and_remove_cv_file): """tutorial for cv minderdataset.""" columns_list = ["data", "file_name", "label"] num_readers = 4 indices = [] sampler = ds.SubsetRandomSampler(indices) data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, sampler=sampler) data = get_data(CV_DIR_NAME) assert data_set.get_dataset_size() == 10 num_iter = 0 for item in data_set.create_dict_iterator(): logger.info( "-------------- cv reader basic: {} ------------------------". format(num_iter)) logger.info( "-------------- item[data]: {} -----------------------------". format(item["data"])) logger.info( "-------------- item[file_name]: {} ------------------------". format(item["file_name"])) logger.info( "-------------- item[label]: {} ----------------------------". format(item["label"])) num_iter += 1 assert num_iter == 0
def test_subset_random_sampler(): logger.info("Test Case SubsetRandomSampler") # define parameters repeat_count = 1 # apply dataset operations indices = [0, 1, 2, 3, 4, 5, 12, 13, 14, 15, 16, 11] sampler = ds.SubsetRandomSampler(indices) data1 = ds.ImageFolderDatasetV2(DATA_DIR, sampler=sampler) data1 = data1.repeat(repeat_count) num_iter = 0 for item in data1.create_dict_iterator(): # each data is a dictionary # in this example, each dictionary has keys "image" and "label" logger.info("image is {}".format(item["image"])) logger.info("label is {}".format(item["label"])) num_iter += 1 logger.info("Number of data in data1: {}".format(num_iter)) assert num_iter == 12