def to_chunked_grouped_fofn(fofn_groups, chunk_id_prefix, fofn_chunk_key, report_chunk_key, chunk_dir_name): """ :param fofn_groups: A list of FofnGroups :param chunk_id_prefix: Prefix used to create the chunk key and grouped Fofn files :param fofn_chunk_key: Value of the chunk key to write to the chunk file (e.g., $chunk.my_key) :param chunk_dir_name: Directory where the Grouped Fofn files will be written to :return: list of pipeline chunks """ chunks = [] for i, fofn_group in enumerate(fofn_groups): chunk_id = "_".join([chunk_id_prefix, str(i)]) fofn_group_name = "".join([chunk_id, ".fofn"]) fofn_group_path = os.path.join(chunk_dir_name, fofn_group_name) write_fofn(fofn_group, fofn_group_path) # Write the companion fofn metadata report fofn_report_name = "".join([chunk_id, "_report", '.json']) fofn_report_path = os.path.join(chunk_dir_name, fofn_report_name) fofn_report = fofn_to_report(len(fofn_group)) fofn_report.write_json(fofn_report_path) d = dict(nfofns=len(fofn_group)) d[fofn_chunk_key] = fofn_group_path d[report_chunk_key] = fofn_report_path c = PipelineChunk(chunk_id, **d) chunks.append(c) return chunks
def run_rtc(rtc): files = list(readFofn(rtc.task.input_files[0])) report = fofn_to_report(len(files)) shutil.copy(rtc.task.input_files[0], rtc.task.output_files[0]) write_report_and_log(report, rtc.task.output_files[1]) log.info("Completed running {i}".format(i=rtc.task)) return 0