def setUpClass(cls): super(TestGatherH5ToolContract, cls).setUpClass() cls.makeInputs() chunks = [PipelineChunk(chunk_id="chunk_data_{i}".format(i=i), **({cls.CHUNK_KEY:fn})) for i, fn in enumerate(cls.CHUNKED_FILES)] write_pipeline_chunks(chunks, cls.INPUT_FILES[0], None)
def _generate_chunk_json(self, data_files): chunks = [ PipelineChunk(chunk_id="chunk_data_{i}".format(i=i), **({ self.CHUNK_KEY: fn })) for i, fn in enumerate(data_files) ] write_pipeline_chunks(chunks, self.INPUT_FILES[0], None)
def setUpClass(cls): super(TestGatherH5ToolContract, cls).setUpClass() cls.makeInputs() chunks = [ PipelineChunk(chunk_id="chunk_data_{i}".format(i=i), **({ cls.CHUNK_KEY: fn })) for i, fn in enumerate(cls.CHUNKED_FILES) ] write_pipeline_chunks(chunks, cls.INPUT_FILES[0], None)
def write_grouped_fofn_chunks(fofn_files, max_total_chunks, chunk_dir_name, chunk_json_path): fofn_groups = _to_grouped_items_by_max_total_chunks( fofn_files, max_total_chunks) chunks = to_chunked_grouped_fofn( fofn_groups, 'fofn_group', Constants.CHUNK_KEY_FOFN, Constants.CHUNK_KEY_FOFN_REPORT, chunk_dir_name) write_pipeline_chunks( chunks, chunk_json_path, "Group Fofn created at {d}".format(d=datetime.datetime.now())) return chunks
def write_grouped_fofn_chunks(fofn_files, max_total_chunks, chunk_dir_name, chunk_json_path): fofn_groups = _to_grouped_items_by_max_total_chunks( fofn_files, max_total_chunks) chunks = to_chunked_grouped_fofn(fofn_groups, 'fofn_group', Constants.CHUNK_KEY_FOFN, Constants.CHUNK_KEY_FOFN_REPORT, chunk_dir_name) write_pipeline_chunks( chunks, chunk_json_path, "Group Fofn created at {d}".format(d=datetime.datetime.now())) return chunks
def setUpClass(cls): super(TextRecordsGatherBase, cls).setUpClass() json_file = cls.INPUT_FILES[0] base = ".".join(json_file.split(".")[:-2]) chunks = [] for i in range(2): file_name = "%s.%d.%s" % (base, i + 1, cls.EXTENSION) with open(file_name, 'w') as f: if cls.RECORD_HEADER is not None: f.write(cls.RECORD_HEADER) f.write("\n".join(cls.RECORDS[i * 2:(i + 1) * 2])) f.write("\n") # XXX we need this for CSV gather d = {cls.CHUNK_KEY: op.abspath(file_name)} c = PipelineChunk("%s_%i" % (cls.EXTENSION, i + 1), **d) chunks.append(c) write_pipeline_chunks(chunks, json_file, None)
def write_chunks_to_json(chunks, chunk_file): log.debug("Wrote {n} chunks to {f}.".format(n=len(chunks), f=chunk_file)) write_pipeline_chunks( chunks, chunk_file, "Chunks written at {d}".format(d=datetime.datetime.now())) return 0
def _generate_chunk_json(self, data_files): chunks = [PipelineChunk(chunk_id="chunk_data_{i}".format(i=i), **({self.CHUNK_KEY:fn})) for i, fn in enumerate(data_files)] write_pipeline_chunks(chunks, self.INPUT_FILES[0], None)