def run(self): """Run IcePartialMerge.""" logging.debug("root_dir: {d}".format(d=self.root_dir)) logging.debug("Total number of chunks N = {N}".format(N=self.N)) splitted_pickles, out_pickle = self.validate_inputs() logging.info("Combining {N} nfl pickles.") combine_nfl_pickles(splitted_pickles, out_pickle)
def resolved_tool_contract_runner(rtc): """Given resolved tool contract, run""" p = ChunkTasksPickle.read(rtc.task.input_files[0]) p.sorted_by_attr(attr='cluster_bin_index') assert all([isinstance(task, PartialChunkTask) for task in p]) with open(rtc.task.output_files[0], 'w') as writer: for i, group in groupby(p, lambda x: x.cluster_bin_index): gs = [g for g in group] nfl_pickles_of_bin_i = [g.nfl_pickle for g in gs] out_pickle = IceFiles(prog_name="", root_dir=gs[0].cluster_out_dir, no_log_f=True).nfl_all_pickle_fn log.info("Combining nfl pickles of cluster bin %s.", str(i)) log.debug("nfl pickles are: %s.", (", ".join(nfl_pickles_of_bin_i))) log.debug("Output merged nfl pickle is %s.", out_pickle) combine_nfl_pickles(splitted_pickles=nfl_pickles_of_bin_i, out_pickle=out_pickle) writer.write("Merge nfl pickles of cluster bin %s DONE: %s\n" % (i, out_pickle))
def combinePickles(self, pickle_filenames, out_pickle): """Combine all *.pickle files to one and dump to self.out_pickle.""" combine_nfl_pickles(pickle_filenames, out_pickle)
def run(chunk_input_json, output_file, chunk_key): chunks = load_pipeline_chunks_from_json(chunk_input_json) chunked_files = get_datum_from_chunks_by_chunk_key(chunks, chunk_key) _ = combine_nfl_pickles(chunked_files, output_file) return 0