def test_target_path(self):
     file_path = os.path.join('source_type', 'year', 'month', 'day',
                              'source_id', 'data.ext')
     source_path = os.path.join('/', 'pfs', 'repo_name', file_path)
     out_dir = os.path.join('/', 'pfs', 'out')
     target = target_path.get_path(source_path, out_dir)
     expected_target = os.path.join(out_dir, file_path)
     self.assertTrue(target == expected_target)
def group(path, out_path):
    """
    Link files into the output directory.

    :param path: File or directory paths.
    :type path: str
    :param out_path: The output path for writing results.
    :type out_path: str
    """
    for file_path in file_crawler.crawl(path):
        target = target_path.get_path(file_path, out_path)
        log.debug(f'target: {target}')
        file_linker.link(file_path, target)
def join(pathname, out_path):
    """
    Join paths according to the given pathname and
    link all matching files into the output directory.

    :param pathname: The path pattern to match.
    :type pathname: str
    :param out_path: The output path for writing results.
    :type out_path: str
    """
    files = [fn for fn in glob.glob(pathname, recursive=True)
             if not os.path.basename(fn).startswith(out_path) if os.path.isfile(fn)]
    for file in files:
        log.debug(f'found matching file: {file}')
        target = target_path.get_path(file, out_path)
        log.debug(f'target: {target}')
        file_linker.link(file, target)
def filter_directory(in_path, filter_dirs, out_path):
    """
    Link the target directory into the output directory.

    :param in_path: The input path.
    :type in_path: str
    :param filter_dirs: The directories to filter.
    :type filter_dirs: str
    :param out_path: The output path for writing results.
    :type out_path: str
    :return:
    """
    parsed_dirs = parse_dirs(filter_dirs)
    for r, d, f in os.walk(in_path):
        for name in d:
            if not name.startswith('.') and name in parsed_dirs:
                source = os.path.join(r, name)
                destination = target_path.get_path(source, out_path)
                file_linker.link(source, destination)
def group(paths, out_path):
    """
    Link all files into the output directory.

    :param paths: Comma separated list of environment variable names whose values are full directory paths.
    :type paths: str
    :param out_path: The output path for writing results.
    :type out_path: str
    """
    if ',' in paths:
        paths = paths.split(',')
    log.debug(f'paths: {paths}')
    for p in paths:
        log.debug(f'path: {p}')
        path = os.environ[p]
        for file_path in file_crawler.crawl(path):
            target = target_path.get_path(file_path, out_path)
            log.debug(f'target: {target}')
            file_linker.link(file_path, target)
def load_files(directory, out_dir):
    """
    Read all files in a directory and load them into a dictionary of source file path and output paths.

    :param directory: A directory.
    :type directory: str
    :param out_dir: The output directory.
    :type out_dir: str
    :return: dict containing source file paths and output paths.
    """
    files = {}
    for r, d, f in os.walk(directory):
        for filename in f:
            source = os.path.join(r, filename)
            destination = target_path.get_path(r, out_dir)
            paths = dict(source=source,
                         destination=os.path.join(destination, filename))
            files.update({destination: paths})
            log.debug(f'adding key: {destination} value: {paths}')
    return files