コード例 #1
0
    def run(self):
        """Run IcePartialMerge."""
        logging.debug("root_dir: {d}".format(d=self.root_dir))
        logging.debug("Total number of chunks N = {N}".format(N=self.N))

        splitted_pickles, out_pickle = self.validate_inputs()

        logging.info("Combining {N} nfl pickles.")
        combine_nfl_pickles(splitted_pickles, out_pickle)
コード例 #2
0
def resolved_tool_contract_runner(rtc):
    """Given resolved tool contract, run"""
    p = ChunkTasksPickle.read(rtc.task.input_files[0])
    p.sorted_by_attr(attr='cluster_bin_index')
    assert all([isinstance(task, PartialChunkTask) for task in p])

    with open(rtc.task.output_files[0], 'w') as writer:
        for i, group in groupby(p, lambda x: x.cluster_bin_index):
            gs = [g for g in group]
            nfl_pickles_of_bin_i = [g.nfl_pickle for g in gs]
            out_pickle = IceFiles(prog_name="", root_dir=gs[0].cluster_out_dir,
                                  no_log_f=True).nfl_all_pickle_fn
            log.info("Combining nfl pickles of cluster bin %s.", str(i))
            log.debug("nfl pickles are: %s.", (", ".join(nfl_pickles_of_bin_i)))
            log.debug("Output merged nfl pickle is %s.", out_pickle)
            combine_nfl_pickles(splitted_pickles=nfl_pickles_of_bin_i, out_pickle=out_pickle)
            writer.write("Merge nfl pickles of cluster bin %s DONE: %s\n" %
                         (i, out_pickle))
コード例 #3
0
def resolved_tool_contract_runner(rtc):
    """Given resolved tool contract, run"""
    p = ChunkTasksPickle.read(rtc.task.input_files[0])
    p.sorted_by_attr(attr='cluster_bin_index')
    assert all([isinstance(task, PartialChunkTask) for task in p])

    with open(rtc.task.output_files[0], 'w') as writer:
        for i, group in groupby(p, lambda x: x.cluster_bin_index):
            gs = [g for g in group]
            nfl_pickles_of_bin_i = [g.nfl_pickle for g in gs]
            out_pickle = IceFiles(prog_name="",
                                  root_dir=gs[0].cluster_out_dir,
                                  no_log_f=True).nfl_all_pickle_fn
            log.info("Combining nfl pickles of cluster bin %s.", str(i))
            log.debug("nfl pickles are: %s.",
                      (", ".join(nfl_pickles_of_bin_i)))
            log.debug("Output merged nfl pickle is %s.", out_pickle)
            combine_nfl_pickles(splitted_pickles=nfl_pickles_of_bin_i,
                                out_pickle=out_pickle)
            writer.write("Merge nfl pickles of cluster bin %s DONE: %s\n" %
                         (i, out_pickle))
コード例 #4
0
 def combinePickles(self, pickle_filenames, out_pickle):
     """Combine all *.pickle files to one and dump to self.out_pickle."""
     combine_nfl_pickles(pickle_filenames, out_pickle)
コード例 #5
0
def run(chunk_input_json, output_file, chunk_key):
    chunks = load_pipeline_chunks_from_json(chunk_input_json)
    chunked_files = get_datum_from_chunks_by_chunk_key(chunks, chunk_key)
    _ = combine_nfl_pickles(chunked_files, output_file)
    return 0
コード例 #6
0
 def combinePickles(self, pickle_filenames, out_pickle):
     """Combine all *.pickle files to one and dump to self.out_pickle."""
     combine_nfl_pickles(pickle_filenames, out_pickle)
コード例 #7
0
def run(chunk_input_json, output_file, chunk_key):
    chunks = load_pipeline_chunks_from_json(chunk_input_json)
    chunked_files = get_datum_from_chunks_by_chunk_key(chunks, chunk_key)
    _ = combine_nfl_pickles(chunked_files, output_file)
    return 0