def setUpClass(cls):
     super(TestGatherH5ToolContract, cls).setUpClass()
     cls.makeInputs()
     chunks = [PipelineChunk(chunk_id="chunk_data_{i}".format(i=i),
                             **({cls.CHUNK_KEY:fn}))
               for i, fn in enumerate(cls.CHUNKED_FILES)]
     write_pipeline_chunks(chunks, cls.INPUT_FILES[0], None)
Example #2
0
 def _generate_chunk_json(self, data_files):
     chunks = [
         PipelineChunk(chunk_id="chunk_data_{i}".format(i=i),
                       **({
                           self.CHUNK_KEY: fn
                       })) for i, fn in enumerate(data_files)
     ]
     write_pipeline_chunks(chunks, self.INPUT_FILES[0], None)
Example #3
0
 def setUpClass(cls):
     super(TestGatherH5ToolContract, cls).setUpClass()
     cls.makeInputs()
     chunks = [
         PipelineChunk(chunk_id="chunk_data_{i}".format(i=i),
                       **({
                           cls.CHUNK_KEY: fn
                       })) for i, fn in enumerate(cls.CHUNKED_FILES)
     ]
     write_pipeline_chunks(chunks, cls.INPUT_FILES[0], None)
Example #4
0
def write_grouped_fofn_chunks(fofn_files, max_total_chunks, chunk_dir_name, chunk_json_path):

    fofn_groups = _to_grouped_items_by_max_total_chunks(
        fofn_files, max_total_chunks)

    chunks = to_chunked_grouped_fofn(
        fofn_groups, 'fofn_group', Constants.CHUNK_KEY_FOFN, Constants.CHUNK_KEY_FOFN_REPORT, chunk_dir_name)

    write_pipeline_chunks(
        chunks, chunk_json_path, "Group Fofn created at {d}".format(d=datetime.datetime.now()))

    return chunks
Example #5
0
def write_grouped_fofn_chunks(fofn_files, max_total_chunks, chunk_dir_name,
                              chunk_json_path):

    fofn_groups = _to_grouped_items_by_max_total_chunks(
        fofn_files, max_total_chunks)

    chunks = to_chunked_grouped_fofn(fofn_groups, 'fofn_group',
                                     Constants.CHUNK_KEY_FOFN,
                                     Constants.CHUNK_KEY_FOFN_REPORT,
                                     chunk_dir_name)

    write_pipeline_chunks(
        chunks, chunk_json_path,
        "Group Fofn created at {d}".format(d=datetime.datetime.now()))

    return chunks
 def setUpClass(cls):
     super(TextRecordsGatherBase, cls).setUpClass()
     json_file = cls.INPUT_FILES[0]
     base = ".".join(json_file.split(".")[:-2])
     chunks = []
     for i in range(2):
         file_name = "%s.%d.%s" % (base, i + 1, cls.EXTENSION)
         with open(file_name, 'w') as f:
             if cls.RECORD_HEADER is not None:
                 f.write(cls.RECORD_HEADER)
             f.write("\n".join(cls.RECORDS[i * 2:(i + 1) * 2]))
             f.write("\n")  # XXX we need this for CSV gather
         d = {cls.CHUNK_KEY: op.abspath(file_name)}
         c = PipelineChunk("%s_%i" % (cls.EXTENSION, i + 1), **d)
         chunks.append(c)
     write_pipeline_chunks(chunks, json_file, None)
Example #7
0
 def setUpClass(cls):
     super(TextRecordsGatherBase, cls).setUpClass()
     json_file = cls.INPUT_FILES[0]
     base = ".".join(json_file.split(".")[:-2])
     chunks = []
     for i in range(2):
         file_name = "%s.%d.%s" % (base, i + 1, cls.EXTENSION)
         with open(file_name, 'w') as f:
             if cls.RECORD_HEADER is not None:
                 f.write(cls.RECORD_HEADER)
             f.write("\n".join(cls.RECORDS[i * 2:(i + 1) * 2]))
             f.write("\n")  # XXX we need this for CSV gather
         d = {cls.CHUNK_KEY: op.abspath(file_name)}
         c = PipelineChunk("%s_%i" % (cls.EXTENSION, i + 1), **d)
         chunks.append(c)
     write_pipeline_chunks(chunks, json_file, None)
Example #8
0
def write_chunks_to_json(chunks, chunk_file):
    log.debug("Wrote {n} chunks to {f}.".format(n=len(chunks), f=chunk_file))
    write_pipeline_chunks(
        chunks, chunk_file,
        "Chunks written at {d}".format(d=datetime.datetime.now()))
    return 0
Example #9
0
def write_chunks_to_json(chunks, chunk_file):
    log.debug("Wrote {n} chunks to {f}.".format(n=len(chunks), f=chunk_file))
    write_pipeline_chunks(
        chunks, chunk_file, "Chunks written at {d}".format(d=datetime.datetime.now()))
    return 0
 def _generate_chunk_json(self, data_files):
     chunks = [PipelineChunk(chunk_id="chunk_data_{i}".format(i=i),
                             **({self.CHUNK_KEY:fn}))
               for i, fn in enumerate(data_files)]
     write_pipeline_chunks(chunks, self.INPUT_FILES[0], None)