예제 #1
0
파일: common.py 프로젝트: skalawag/HPI
def get_files(pp: Paths, glob: str=DEFAULT_GLOB, sort: bool=True) -> Tuple[Path, ...]:
    """
    Helper function to avoid boilerplate.

    Tuple as return type is a bit friendlier for hashing/caching, so hopefully makes sense
    """
    # TODO FIXME mm, some wrapper to assert iterator isn't empty?
    sources: List[Path] = []
    if isinstance(pp, (str, Path)):
        sources.append(Path(pp))
    else:
        sources.extend(map(Path, pp))

    paths: List[Path] = []
    for src in sources:
        if src.is_dir():
            gp: Iterable[Path] = src.glob(glob)
            paths.extend(gp)
        else:
            ss = str(src)
            if '*' in ss:
                if glob != DEFAULT_GLOB:
                    warnings.warn(f"Treating {ss} as glob path. Explicit glob={glob} argument is ignored!")
                paths.extend(map(Path, do_glob(ss)))
            else:
                assert src.is_file(), src
                # todo assert matches glob??
                paths.append(src)

    if sort:
        paths = list(sorted(paths))
    return tuple(paths)
예제 #2
0
def get_files(pp: Paths,
              glob: str = DEFAULT_GLOB,
              sort: bool = True) -> Tuple[Path, ...]:
    """
    Helper function to avoid boilerplate.

    Tuple as return type is a bit friendlier for hashing/caching, so hopefully makes sense
    """
    # TODO FIXME mm, some wrapper to assert iterator isn't empty?
    sources: List[Path]
    if isinstance(pp, Path):
        sources = [pp]
    elif isinstance(pp, str):
        if pp == '':
            # special case -- makes sense for optional data sources, etc
            return ()  # early return to prevent warnings etc
        sources = [Path(pp)]
    else:
        sources = [Path(p) for p in pp]

    def caller() -> str:
        import traceback
        # TODO ugh. very flaky... -3 because [<this function>, get_files(), <actual caller>]
        return traceback.extract_stack()[-3].filename

    paths: List[Path] = []
    for src in sources:
        if src.parts[0] == '~':
            src = src.expanduser()
        if src.is_dir():
            gp: Iterable[Path] = src.glob(glob)
            paths.extend(gp)
        else:
            ss = str(src)
            if '*' in ss:
                if glob != DEFAULT_GLOB:
                    warnings.warn(
                        f"{caller()}: treating {ss} as glob path. Explicit glob={glob} argument is ignored!"
                    )
                paths.extend(map(Path, do_glob(ss)))
            else:
                if not src.is_file():
                    raise RuntimeError(f"Expected '{src}' to exist")
                # todo assert matches glob??
                paths.append(src)

    if sort:
        paths = list(sorted(paths))

    if len(paths) == 0:
        # todo make it conditionally defensive based on some global settings
        # TODO not sure about using warnings module for this
        import traceback
        warnings.warn(
            f'{caller()}: no paths were matched against {paths}. This might result in missing data.'
        )
        traceback.print_stack()

    return tuple(paths)
예제 #3
0
def get_files(
        pp: Paths,
        glob: str=DEFAULT_GLOB,
        sort: bool=True,
        guess_compression: bool=True,
) -> Tuple[Path, ...]:
    """
    Helper function to avoid boilerplate.

    Tuple as return type is a bit friendlier for hashing/caching, so hopefully makes sense
    """
    # TODO FIXME mm, some wrapper to assert iterator isn't empty?
    sources: List[Path]
    if isinstance(pp, Path):
        sources = [pp]
    elif isinstance(pp, str):
        if pp == '':
            # special case -- makes sense for optional data sources, etc
            return () # early return to prevent warnings etc
        sources = [Path(pp)]
    else:
        sources = [Path(p) for p in pp]

    def caller() -> str:
        import traceback
        # TODO ugh. very flaky... -3 because [<this function>, get_files(), <actual caller>]
        return traceback.extract_stack()[-3].filename

    paths: List[Path] = []
    for src in sources:
        if src.parts[0] == '~':
            src = src.expanduser()
        # note: glob handled first, because e.g. on Windows asterisk makes is_dir unhappy
        gs = str(src)
        if '*' in gs:
            if glob != DEFAULT_GLOB:
                warnings.warn(f"{caller()}: treating {gs} as glob path. Explicit glob={glob} argument is ignored!")
            paths.extend(map(Path, do_glob(gs)))
        elif src.is_dir():
            gp: Iterable[Path] = src.glob(glob) # todo not sure if should be recursive?
            paths.extend(gp)
        else:
            if not src.is_file():
                # todo not sure, might be race condition?
                raise RuntimeError(f"Expected '{src}' to exist")
            # todo assert matches glob??
            paths.append(src)

    if sort:
        paths = list(sorted(paths))

    if len(paths) == 0:
        # todo make it conditionally defensive based on some global settings
        core_warnings.high(f'''
{caller()}: no paths were matched against {pp}. This might result in missing data. Likely, the directory you passed is empty.
'''.strip())
        # traceback is useful to figure out what config caused it?
        import traceback
        traceback.print_stack()

    if guess_compression:
        from .kompress import CPath
        paths = [CPath(p) if _is_compressed(p) else p for p in paths]
    return tuple(paths)