def test_split_fixed_oversample_unbalanced(): input_dir = os.path.join(os.path.dirname(__file__), "imgs") output_dir = os.path.join(os.path.dirname(__file__), "output") shutil.rmtree(output_dir, ignore_errors=True) with pytest.raises(ValueError): fixed(input_dir, output_dir, fixed=(9, 1), oversample=True)
def test_wrong_input(): input_dir = os.path.join(os.path.dirname(__file__), "imgsxx") output_dir = os.path.join(os.path.dirname(__file__), "output") with pytest.raises(ValueError): fixed(input_dir, output_dir) with pytest.raises(ValueError): fixed("peterpan", output_dir)
def test_split_fixed_oversample(): input_dir = os.path.join(os.path.dirname(__file__), "imgs") output_dir = os.path.join(os.path.dirname(__file__), "output") shutil.rmtree(output_dir, ignore_errors=True) fixed(input_dir, output_dir, fixed=(2, 2), oversample=True) # ensure the number of pics is the same a = len(list(pathlib.Path(input_dir).glob("**/*.jpg"))) b = len(list(pathlib.Path(output_dir).glob("**/*.jpg"))) assert a != b
def test_split_fixed_prefix(): input_dir = os.path.join(os.path.dirname(__file__), "imgs_texts") output_dir = os.path.join(os.path.dirname(__file__), "output") shutil.rmtree(output_dir, ignore_errors=True) fixed(input_dir, output_dir, fixed=(1, 1), oversample=False, group_prefix=2) # ensure the number of pics is the same a = len(list(pathlib.Path(input_dir).glob("**/*.jpg"))) b = len(list(pathlib.Path(output_dir).glob("**/*.jpg"))) assert a == b
# https://youtu.be/C6wbr1jJvVs """ pip install split-folders """ import splitfolders # or import split_folders input_folder = 'cell_images/' # Split with a ratio. # To only split into training and validation set, set a tuple to `ratio`, i.e, `(.8, .2)`. #Train, val, test splitfolders.ratio(input_folder, output="cell_images2", seed=42, ratio=(.7, .2, .1), group_prefix=None) # default values # Split val/test with a fixed number of items e.g. 100 for each set. # To only split into training and validation set, use a single number to `fixed`, i.e., `10`. # enable oversampling of imbalanced datasets, works only with fixed splitfolders.fixed(input_folder, output="cell_images2", seed=42, fixed=(35, 20), oversample=False, group_prefix=None)