Esempio n. 1
0
def fofn_to_chunks(fofn):
    files = fofn_to_files(fofn)
    chunks = []
    for i, f in enumerate(files):
        chunk_id = "chunk-{i}".format(i=i)
        _d = {Constants.CHUNK_KEY_FOFN: f}
        p = PipelineChunk(chunk_id, **_d)
        chunks.append(p)
    return chunks
Esempio n. 2
0
def _args_chunk_fofn(args):
    fofn_files = fofn_to_files(args.input_fofn)
    log.info("read in fofn with {n} files.".format(n=len(fofn_files)))
    chunks = CU.write_grouped_fofn_chunks(fofn_files, args.max_total_chunks,
                                          args.output_dir,
                                          args.chunk_report_json)
    log.debug("Converted {x} Fofn into {n} chunks. Write chunks to {f}".format(
        n=len(chunks), f=args.chunk_report_json, x=len(fofn_files)))
    return 0
Esempio n. 3
0
def fofn_to_chunks(fofn):
    files = fofn_to_files(fofn)
    chunks = []
    for i, f in enumerate(files):
        chunk_id = "chunk-{i}".format(i=i)
        _d = {Constants.CHUNK_KEY_FOFN: f}
        p = PipelineChunk(chunk_id, **_d)
        chunks.append(p)
    return chunks
Esempio n. 4
0
def gather_fofn(input_files, output_file, skip_empty=True):
    """
    This should be better spec'ed and impose a tighter constraint on the FOFN

    :param input_files: List of file paths
    :param output_file: File Path
    :param skip_empty: Ignore empty files

    :return: Output file

    :rtype: str
    """

    all_files = []
    for input_file in input_files:
        file_names = fofn_to_files(input_file)
        all_files.extend(file_names)

    with open(output_file, 'w') as f:
        f.write("\n".join(all_files))

    return output_file
Esempio n. 5
0
def gather_fofn(input_files, output_file, skip_empty=True):
    """
    This should be better spec'ed and impose a tighter constraint on the FOFN

    :param input_files: List of file paths
    :param output_file: File Path
    :param skip_empty: Ignore empty files

    :return: Output file

    :rtype: str
    """

    all_files = []
    for input_file in input_files:
        file_names = fofn_to_files(input_file)
        all_files.extend(file_names)

    with open(output_file, 'w') as f:
        f.write("\n".join(all_files))

    return output_file
Esempio n. 6
0
def nchunk_fofn(input_file, max_chunks):
    input_files = fofn_to_files(input_file)
    nchunks = min(len(input_files), max_chunks)
    return nchunks
Esempio n. 7
0
def nchunk_fofn(input_file, max_chunks):
    input_files = fofn_to_files(input_file)
    nchunks = min(len(input_files), max_chunks)
    return nchunks
Esempio n. 8
0
def _args_chunk_fofn(args):
    fofn_files = fofn_to_files(args.input_fofn)
    log.info("read in fofn with {n} files.".format(n=len(fofn_files)))
    chunks = CU.write_grouped_fofn_chunks(fofn_files, args.max_total_chunks, args.output_dir, args.chunk_report_json)
    log.debug("Converted {x} Fofn into {n} chunks. Write chunks to {f}".format(n=len(chunks), f=args.chunk_report_json, x=len(fofn_files)))
    return 0
Esempio n. 9
0
def _fofn_to_metadata(path):
    files = fofn_to_files(path)
    return DatasetMetadata(len(files), len(files))
Esempio n. 10
0
def _fofn_to_metadata(path):
    files = fofn_to_files(path)
    return DatasetMetadata(len(files), len(files))