def test_folder_class_data(): path = get_test_dataset_path(DATASET_PATHS.FOLDER_CLASS_DATA) expected_items = [str(p) for p in Path(path).glob("*/*.jpg")] ds = loaders.from_folder_class_data(path) found_items = [i[0] for i in ds] assert set(expected_items) == set(found_items)
def test_string_image_conversion(): path = get_test_dataset_path(DATASET_PATHS.FOLDER_DATA) ds_str = loaders.from_folder_data(path) ds_img = ds_str.image() items_img = [x for x in ds_img] for data in items_img: data = data[0] assert(issubclass(type(data), Image.Image))
def test_folder_data(): path = get_test_dataset_path(DATASET_PATHS.FOLDER_DATA) expected_items = [ str(Path(path) / "frame_000{}.jpg".format(i)) for i in range(1, 7) ] ds = loaders.from_folder_data(path) found_items = [i[0] for i in ds] assert set(expected_items) == set(found_items)
def test_readme_example_1(): path = ( Path(get_test_dataset_path(DATASET_PATHS.FOLDER_DATASET_CLASS_DATA)) / "amazon" ) train, val, test = ( do.from_folder_class_data(path) .named("data", "label") .image_resize((240, 240)) .one_hot("label") .shuffle(seed=42) .split([0.6, 0.2, 0.2]) )
def test_domain_adaptation(): p = Path(get_test_dataset_path(DATASET_PATHS.FOLDER_DATASET_CLASS_DATA)) train, val, test = domain_adaptation_office31( source_data_path=p / "amazon", target_data_path=p / "dslr", seed=1 ) # prepare for tensorflow train, val, test = [ d.to_tensorflow().batch(16).prefetch(2) for d in [train, val, test] ] # take an item from each and make sure it doesn't raise for d in [train, val, test]: next(iter(d))
def test_folder_dataset_class_data(): path = get_test_dataset_path(DATASET_PATHS.FOLDER_DATASET_CLASS_DATA) sets = Path(path).glob("[!._]*") sets_of_expected_items = [ set([str(p) for p in Path(s).glob("*/*.jpg")]) for s in sets ] datasets = loaders.from_folder_dataset_class_data(path) sets_of_found_items = [set([i[0] for i in ds]) for ds in datasets] for expected_items_set in sets_of_expected_items: assert any([ expected_items_set == found_items for found_items in sets_of_found_items ])
def test_image_resize(): ds = from_dummy_numpy_data().reshape(DUMMY_NUMPY_DATA_SHAPE_2D) for tpl in ds: data = tpl[0] assert(data.shape == DUMMY_NUMPY_DATA_SHAPE_2D) NEW_SIZE = (5,5) # works directly on numpy arrays (ints) ds_resized = ds.image_resize(NEW_SIZE) for tpl in ds_resized: data = tpl[0] assert(data.size == NEW_SIZE) assert(data.mode == 'L') # grayscale int # also if they are floats ds_resized_float = ds.transform([custom(np.float32)]).image_resize(NEW_SIZE) for tpl in ds_resized_float: data = tpl[0] assert(data.size == NEW_SIZE) assert(data.mode == 'F') # grayscale float # works directly on strings ds_str = loaders.from_folder_data(get_test_dataset_path(DATASET_PATHS.FOLDER_DATA)) ds_resized_from_str = ds_str.image_resize(NEW_SIZE) for tpl in ds_resized_from_str: data = tpl[0] assert(data.size == NEW_SIZE) # works on other images (scaling down) ds_resized_again = ds_resized.image_resize(DUMMY_NUMPY_DATA_SHAPE_2D) for tpl in ds_resized_again: data = tpl[0] assert(data.size == DUMMY_NUMPY_DATA_SHAPE_2D) # Test error scenarios with pytest.raises(ValueError): ds.image_resize() # No args with pytest.raises(ValueError): ds.image_resize(NEW_SIZE, NEW_SIZE, NEW_SIZE) # Too many args with pytest.raises(AssertionError): ds.image_resize((4,4,4)) # Invalid size
def test_mat_single_with_multi_data(): path = get_test_dataset_path(DATASET_PATHS.MAT_SINGLE_WITH_MULTI_DATA) datasets = loaders.from_mat_single_mult_data(path) for ds in datasets: # check dataset sizes and names if ds.name == "src": assert len(ds) == 2000 elif ds.name == "tar": assert len(ds) == 1800 else: assert False # randomly check some samples for their dimension ids = random.sample(range(len(ds)), 42) for i in ids: data, label = ds[i] assert data.shape == (256, ) assert int(label) in range(10)
def test_reshape(): ds = from_dummy_numpy_data().named('data','label') items = list(ds) s = ds.shape assert(ds.shape == (DUMMY_NUMPY_DATA_SHAPE_1D, _DEFAULT_SHAPE) ) assert(ds[0][0].shape == DUMMY_NUMPY_DATA_SHAPE_1D) # reshape adding extra dim ds_r = ds.reshape(DUMMY_NUMPY_DATA_SHAPE_2D) ds_r_alt = ds.reshape(data=DUMMY_NUMPY_DATA_SHAPE_2D) items_r = list(ds_r) items_r_alt = list(ds_r_alt) assert(ds_r.shape == ( DUMMY_NUMPY_DATA_SHAPE_2D, _DEFAULT_SHAPE) ) assert(ds_r[0][0].shape == DUMMY_NUMPY_DATA_SHAPE_2D) for (old_data, l), (new_data, ln), (new_data_alt, lna) in zip(items, items_r, items_r_alt): assert(set(old_data) == set(new_data.flatten()) == set(new_data_alt.flatten())) assert(old_data.shape != new_data.shape == new_data_alt.shape) assert(l == ln == lna) # use wildcard ds_wild = ds.reshape((-1,DUMMY_NUMPY_DATA_SHAPE_2D[1])) items_wild = list(ds_wild) for (old_data, _), (new_data, _) in zip(items_r, items_wild): assert(np.array_equal(old_data, new_data)) # reshape back, alternative syntax ds_back = ds_r.reshape(DUMMY_NUMPY_DATA_SHAPE_1D, None) items_back = [x for x in ds_back] for (old_data, _), (new_data, _) in zip(items, items_back): assert(np.array_equal(old_data, new_data)) # yet another syntax ds_trans = ds.transform([reshape(DUMMY_NUMPY_DATA_SHAPE_2D)]) items_trans = [x for x in ds_trans] for (old_data, _), (new_data, _) in zip(items_r, items_trans): assert(np.array_equal(old_data, new_data)) # doing nothing also works ds_dummy = ds.reshape(None, None) items_dummy = [x for x in ds_dummy] for (old_data, _), (new_data, _) in zip(items, items_dummy): assert(np.array_equal(old_data, new_data)) # TODO test reshape on string data ds_str = loaders.from_folder_data(get_test_dataset_path(DATASET_PATHS.FOLDER_DATA)) with pytest.raises(ValueError): # string has no shape ds_str.reshape((1,2)) with pytest.raises(ValueError): # No input ds.reshape() with pytest.raises(TypeError): # bad input ds.reshape('whazzagh') with pytest.raises(ValueError): # Too many inputs ds.reshape(None, None, None) with pytest.raises(ValueError): # Dimensions don't match ds.reshape((13,13))