def split_classify(img_path: str, n_splits: int, group_regex: str = None): output = join(current_folder, 'fold.csv') get_group = None if group_regex: pattern = re.compile(group_regex) def get_group(x): match = pattern.match(x) if not match: return str(uuid4()) return match.group(1) images_labels = [(img, sub_folder) for sub_folder in os.listdir(img_path) for img in os.listdir(join(img_path, sub_folder))] file_group_kfold(n_splits, output, get_group=get_group, image=[join(label, img) for img, label in images_labels], label=[label for img, label in images_labels])
def split_segment(img_path: str, mask_path: str, n_splits: int, group_regex: str = None): output = join(current_folder, 'fold.csv') get_group = None if group_regex: pattern = re.compile(group_regex) def get_group(x): match = pattern.match(x) if not match: return str(uuid4()) return match.group(1) file_group_kfold(n_splits, output, get_group=get_group, image=os.listdir(img_path), mask=os.listdir(mask_path), sort=True, must_equal=['image', 'mask'] )
def split_pandas(path: str, n_splits: int): output = join(current_folder, 'fold.csv') df = pd.read_csv(path) folds = file_group_kfold(n_splits, image=df[df.columns[0]]) df['fold'] = folds['fold'] df.to_csv(output)