def test_folder_class_data():
    path = get_test_dataset_path(DATASET_PATHS.FOLDER_CLASS_DATA)

    expected_items = [str(p) for p in Path(path).glob("*/*.jpg")]

    ds = loaders.from_folder_class_data(path)
    found_items = [i[0] for i in ds]

    assert set(expected_items) == set(found_items)
def test_string_image_conversion():
    path = get_test_dataset_path(DATASET_PATHS.FOLDER_DATA)
    ds_str = loaders.from_folder_data(path)

    ds_img = ds_str.image()
    items_img = [x for x in ds_img]

    for data in items_img:
        data = data[0]
        assert(issubclass(type(data), Image.Image))
def test_folder_data():
    path = get_test_dataset_path(DATASET_PATHS.FOLDER_DATA)

    expected_items = [
        str(Path(path) / "frame_000{}.jpg".format(i)) for i in range(1, 7)
    ]

    ds = loaders.from_folder_data(path)
    found_items = [i[0] for i in ds]

    assert set(expected_items) == set(found_items)
Beispiel #4
0
def test_readme_example_1():
    path = (
        Path(get_test_dataset_path(DATASET_PATHS.FOLDER_DATASET_CLASS_DATA)) / "amazon"
    )

    train, val, test = (
        do.from_folder_class_data(path)
        .named("data", "label")
        .image_resize((240, 240))
        .one_hot("label")
        .shuffle(seed=42)
        .split([0.6, 0.2, 0.2])
    )
Beispiel #5
0
def test_domain_adaptation():
    p = Path(get_test_dataset_path(DATASET_PATHS.FOLDER_DATASET_CLASS_DATA))
    train, val, test = domain_adaptation_office31(
        source_data_path=p / "amazon", target_data_path=p / "dslr", seed=1
    )

    # prepare for tensorflow
    train, val, test = [
        d.to_tensorflow().batch(16).prefetch(2) for d in [train, val, test]
    ]

    # take an item from each and make sure it doesn't raise
    for d in [train, val, test]:
        next(iter(d))
def test_folder_dataset_class_data():
    path = get_test_dataset_path(DATASET_PATHS.FOLDER_DATASET_CLASS_DATA)
    sets = Path(path).glob("[!._]*")

    sets_of_expected_items = [
        set([str(p) for p in Path(s).glob("*/*.jpg")]) for s in sets
    ]

    datasets = loaders.from_folder_dataset_class_data(path)
    sets_of_found_items = [set([i[0] for i in ds]) for ds in datasets]

    for expected_items_set in sets_of_expected_items:
        assert any([
            expected_items_set == found_items
            for found_items in sets_of_found_items
        ])
def test_image_resize():
    ds = from_dummy_numpy_data().reshape(DUMMY_NUMPY_DATA_SHAPE_2D)
    for tpl in ds:
        data = tpl[0]
        assert(data.shape == DUMMY_NUMPY_DATA_SHAPE_2D)

    NEW_SIZE = (5,5)

    # works directly on numpy arrays (ints)
    ds_resized = ds.image_resize(NEW_SIZE)
    for tpl in ds_resized:
        data = tpl[0]
        assert(data.size == NEW_SIZE)
        assert(data.mode == 'L') # grayscale int

    # also if they are floats
    ds_resized_float = ds.transform([custom(np.float32)]).image_resize(NEW_SIZE)
    for tpl in ds_resized_float:
        data = tpl[0]
        assert(data.size == NEW_SIZE)
        assert(data.mode == 'F') # grayscale float

    # works directly on strings
    ds_str = loaders.from_folder_data(get_test_dataset_path(DATASET_PATHS.FOLDER_DATA))
    ds_resized_from_str = ds_str.image_resize(NEW_SIZE)
    for tpl in ds_resized_from_str:
        data = tpl[0]
        assert(data.size == NEW_SIZE)

    # works on other images (scaling down)
    ds_resized_again = ds_resized.image_resize(DUMMY_NUMPY_DATA_SHAPE_2D)
    for tpl in ds_resized_again:
        data = tpl[0]
        assert(data.size == DUMMY_NUMPY_DATA_SHAPE_2D)

    # Test error scenarios
    with pytest.raises(ValueError):
        ds.image_resize() # No args

    with pytest.raises(ValueError):
        ds.image_resize(NEW_SIZE, NEW_SIZE, NEW_SIZE) # Too many args

    with pytest.raises(AssertionError):
        ds.image_resize((4,4,4)) # Invalid size
def test_mat_single_with_multi_data():
    path = get_test_dataset_path(DATASET_PATHS.MAT_SINGLE_WITH_MULTI_DATA)

    datasets = loaders.from_mat_single_mult_data(path)

    for ds in datasets:
        # check dataset sizes and names
        if ds.name == "src":
            assert len(ds) == 2000
        elif ds.name == "tar":
            assert len(ds) == 1800
        else:
            assert False

        # randomly check some samples for their dimension
        ids = random.sample(range(len(ds)), 42)
        for i in ids:
            data, label = ds[i]

            assert data.shape == (256, )
            assert int(label) in range(10)
def test_reshape():
    ds = from_dummy_numpy_data().named('data','label')
    items = list(ds)

    s = ds.shape
    assert(ds.shape == (DUMMY_NUMPY_DATA_SHAPE_1D, _DEFAULT_SHAPE) )
    assert(ds[0][0].shape == DUMMY_NUMPY_DATA_SHAPE_1D)

    # reshape adding extra dim
    ds_r = ds.reshape(DUMMY_NUMPY_DATA_SHAPE_2D)
    ds_r_alt = ds.reshape(data=DUMMY_NUMPY_DATA_SHAPE_2D)
    items_r = list(ds_r)
    items_r_alt = list(ds_r_alt)

    assert(ds_r.shape == ( DUMMY_NUMPY_DATA_SHAPE_2D, _DEFAULT_SHAPE) )
    assert(ds_r[0][0].shape == DUMMY_NUMPY_DATA_SHAPE_2D)

    for (old_data, l), (new_data, ln), (new_data_alt, lna) in zip(items, items_r, items_r_alt):
        assert(set(old_data) == set(new_data.flatten()) == set(new_data_alt.flatten()))
        assert(old_data.shape != new_data.shape == new_data_alt.shape)
        assert(l == ln == lna)

    # use wildcard
    ds_wild = ds.reshape((-1,DUMMY_NUMPY_DATA_SHAPE_2D[1]))
    items_wild = list(ds_wild)
    for (old_data, _), (new_data, _) in zip(items_r, items_wild):
        assert(np.array_equal(old_data, new_data))

    # reshape back, alternative syntax
    ds_back = ds_r.reshape(DUMMY_NUMPY_DATA_SHAPE_1D, None)
    items_back = [x for x in ds_back]

    for (old_data, _), (new_data, _) in zip(items, items_back):
        assert(np.array_equal(old_data, new_data))

    # yet another syntax
    ds_trans = ds.transform([reshape(DUMMY_NUMPY_DATA_SHAPE_2D)])
    items_trans = [x for x in ds_trans]
    for (old_data, _), (new_data, _) in zip(items_r, items_trans):
        assert(np.array_equal(old_data, new_data))

    # doing nothing also works
    ds_dummy = ds.reshape(None, None)
    items_dummy = [x for x in ds_dummy]
    for (old_data, _), (new_data, _) in zip(items, items_dummy):
        assert(np.array_equal(old_data, new_data))

    # TODO test reshape on string data
    ds_str = loaders.from_folder_data(get_test_dataset_path(DATASET_PATHS.FOLDER_DATA))

    with pytest.raises(ValueError):
        # string has no shape 
        ds_str.reshape((1,2))

    with pytest.raises(ValueError):
        # No input
        ds.reshape() 

    with pytest.raises(TypeError):
        # bad input
        ds.reshape('whazzagh') 
    
    with pytest.raises(ValueError):
        # Too many inputs
        ds.reshape(None, None, None) 

    with pytest.raises(ValueError):
        # Dimensions don't match
        ds.reshape((13,13))