def get_files(pp: Paths, glob: str=DEFAULT_GLOB, sort: bool=True) -> Tuple[Path, ...]: """ Helper function to avoid boilerplate. Tuple as return type is a bit friendlier for hashing/caching, so hopefully makes sense """ # TODO FIXME mm, some wrapper to assert iterator isn't empty? sources: List[Path] = [] if isinstance(pp, (str, Path)): sources.append(Path(pp)) else: sources.extend(map(Path, pp)) paths: List[Path] = [] for src in sources: if src.is_dir(): gp: Iterable[Path] = src.glob(glob) paths.extend(gp) else: ss = str(src) if '*' in ss: if glob != DEFAULT_GLOB: warnings.warn(f"Treating {ss} as glob path. Explicit glob={glob} argument is ignored!") paths.extend(map(Path, do_glob(ss))) else: assert src.is_file(), src # todo assert matches glob?? paths.append(src) if sort: paths = list(sorted(paths)) return tuple(paths)
def get_files(pp: Paths, glob: str = DEFAULT_GLOB, sort: bool = True) -> Tuple[Path, ...]: """ Helper function to avoid boilerplate. Tuple as return type is a bit friendlier for hashing/caching, so hopefully makes sense """ # TODO FIXME mm, some wrapper to assert iterator isn't empty? sources: List[Path] if isinstance(pp, Path): sources = [pp] elif isinstance(pp, str): if pp == '': # special case -- makes sense for optional data sources, etc return () # early return to prevent warnings etc sources = [Path(pp)] else: sources = [Path(p) for p in pp] def caller() -> str: import traceback # TODO ugh. very flaky... -3 because [<this function>, get_files(), <actual caller>] return traceback.extract_stack()[-3].filename paths: List[Path] = [] for src in sources: if src.parts[0] == '~': src = src.expanduser() if src.is_dir(): gp: Iterable[Path] = src.glob(glob) paths.extend(gp) else: ss = str(src) if '*' in ss: if glob != DEFAULT_GLOB: warnings.warn( f"{caller()}: treating {ss} as glob path. Explicit glob={glob} argument is ignored!" ) paths.extend(map(Path, do_glob(ss))) else: if not src.is_file(): raise RuntimeError(f"Expected '{src}' to exist") # todo assert matches glob?? paths.append(src) if sort: paths = list(sorted(paths)) if len(paths) == 0: # todo make it conditionally defensive based on some global settings # TODO not sure about using warnings module for this import traceback warnings.warn( f'{caller()}: no paths were matched against {paths}. This might result in missing data.' ) traceback.print_stack() return tuple(paths)
def get_files( pp: Paths, glob: str=DEFAULT_GLOB, sort: bool=True, guess_compression: bool=True, ) -> Tuple[Path, ...]: """ Helper function to avoid boilerplate. Tuple as return type is a bit friendlier for hashing/caching, so hopefully makes sense """ # TODO FIXME mm, some wrapper to assert iterator isn't empty? sources: List[Path] if isinstance(pp, Path): sources = [pp] elif isinstance(pp, str): if pp == '': # special case -- makes sense for optional data sources, etc return () # early return to prevent warnings etc sources = [Path(pp)] else: sources = [Path(p) for p in pp] def caller() -> str: import traceback # TODO ugh. very flaky... -3 because [<this function>, get_files(), <actual caller>] return traceback.extract_stack()[-3].filename paths: List[Path] = [] for src in sources: if src.parts[0] == '~': src = src.expanduser() # note: glob handled first, because e.g. on Windows asterisk makes is_dir unhappy gs = str(src) if '*' in gs: if glob != DEFAULT_GLOB: warnings.warn(f"{caller()}: treating {gs} as glob path. Explicit glob={glob} argument is ignored!") paths.extend(map(Path, do_glob(gs))) elif src.is_dir(): gp: Iterable[Path] = src.glob(glob) # todo not sure if should be recursive? paths.extend(gp) else: if not src.is_file(): # todo not sure, might be race condition? raise RuntimeError(f"Expected '{src}' to exist") # todo assert matches glob?? paths.append(src) if sort: paths = list(sorted(paths)) if len(paths) == 0: # todo make it conditionally defensive based on some global settings core_warnings.high(f''' {caller()}: no paths were matched against {pp}. This might result in missing data. Likely, the directory you passed is empty. '''.strip()) # traceback is useful to figure out what config caused it? import traceback traceback.print_stack() if guess_compression: from .kompress import CPath paths = [CPath(p) if _is_compressed(p) else p for p in paths] return tuple(paths)