def test_num_items_in_chunks(): expected = [3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2] got = num_items_in_chunks(28, 12) assert_equal(expected, got) expected = [1, 1] got = num_items_in_chunks(2, 2) assert_equal(expected, got) expected = [4, 3] got = num_items_in_chunks(7, 2) assert_equal(expected, got)
def run_main(json_file, output_json_file, max_nchunks): """ Spawn a json with scripts into multiple json files each containing a script. Parameters: json_file -- json <- dict{p_id: args}, where args <- dict{'script_fn': script_fn, ...} output_json -- chunk.json """ a = json.load(open(json_file, 'r')) if len(a) == 0: raise ValueError("script json %s is empty" % json_file) out_dir = op.dirname(output_json_file) num_chunks = min(max_nchunks, len(a)) num_scripts_in_chunks = num_items_in_chunks(num_items=len(a), num_chunks=num_chunks) # Writing chunk.json base_name = "spawned_json_w_scripts_chunk" chunks = [] spawned_jsons = [] p_ids = sorted(a.keys()) for chunk_idx in range(0, num_chunks): chunk_id = "_".join([base_name, str(chunk_idx)]) spawned_json_file = op.join(out_dir, chunk_id + ".json") spawned_txt_file = op.join(out_dir, chunk_id + "_done.txt") # make a chunk d = {Constants.CHUNK_KEYS[0]: spawned_json_file, Constants.CHUNK_KEYS[1]: spawned_txt_file} c = PipelineChunk(chunk_id, **d) chunks.append(c) # make content for the spawned json scripts_dict = dict() num_scripts = num_scripts_in_chunks[chunk_idx] for script_idx in range(0, num_scripts): p_id = p_ids[script_idx] scripts_dict[p_id] = a[p_id] # delete p_ids[0: num_scripts] p_ids = p_ids[num_scripts:] # Write script_dict, which is a dict of {p_id: args} to spawned json with open(spawned_json_file, 'w') as writer: writer.write(json.dumps(scripts_dict) + "\n") spawned_jsons.append(spawned_json_file) with open(spawned_txt_file, 'w') as writer: writer.write("%s" % spawned_json_file) if len(p_ids) != 0: raise AssertionError("Scripts of p_ids %s are not scattered." % repr(p_ids)) log.info("Spawning %s into %d files", json_file, num_chunks) log.debug("Spawned files: %s.", ", ".join(spawned_jsons)) log.info("Writing chunk.json to %s", output_json_file) write_pipeline_chunks(chunks, output_json_file, "created by %s" % Constants.TOOL_ID) return 0
def run_main(json_file, output_json_file, max_nchunks): """ Spawn a json with scripts into multiple json files each containing a script. Parameters: json_file -- json <- dict{p_id: args}, where args <- dict{'script_fn': script_fn, ...} output_json -- chunk.json """ a = json.load(open(json_file, 'r')) if len(a) == 0: raise ValueError("script json %s is empty" % json_file) out_dir = op.dirname(output_json_file) num_chunks = min(max_nchunks, len(a)) num_scripts_in_chunks = num_items_in_chunks(num_items=len(a), num_chunks=num_chunks) # Writing chunk.json base_name = "spawned_json_w_scripts_chunk" chunks = [] spawned_jsons = [] p_ids = sorted(a.keys()) for chunk_idx in range(0, num_chunks): chunk_id = "_".join([base_name, str(chunk_idx)]) spawned_json_file = op.join(out_dir, chunk_id + ".json") spawned_txt_file = op.join(out_dir, chunk_id + "_done.txt") # make a chunk d = { Constants.CHUNK_KEYS[0]: spawned_json_file, Constants.CHUNK_KEYS[1]: spawned_txt_file } c = PipelineChunk(chunk_id, **d) chunks.append(c) # make content for the spawned json scripts_dict = dict() num_scripts = num_scripts_in_chunks[chunk_idx] for script_idx in range(0, num_scripts): p_id = p_ids[script_idx] scripts_dict[p_id] = a[p_id] # delete p_ids[0: num_scripts] p_ids = p_ids[num_scripts:] # Write script_dict, which is a dict of {p_id: args} to spawned json with open(spawned_json_file, 'w') as writer: writer.write(json.dumps(scripts_dict) + "\n") spawned_jsons.append(spawned_json_file) with open(spawned_txt_file, 'w') as writer: writer.write("%s" % spawned_json_file) if len(p_ids) != 0: raise AssertionError("Scripts of p_ids %s are not scattered." % repr(p_ids)) log.info("Spawning %s into %d files", json_file, num_chunks) log.debug("Spawned files: %s.", ", ".join(spawned_jsons)) log.info("Writing chunk.json to %s", output_json_file) write_pipeline_chunks(chunks, output_json_file, "created by %s" % Constants.TOOL_ID) return 0