def test_num_items_in_chunks():
    expected = [3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2]
    got = num_items_in_chunks(28, 12)
    assert_equal(expected, got)

    expected = [1, 1]
    got = num_items_in_chunks(2, 2)
    assert_equal(expected, got)

    expected = [4, 3]
    got = num_items_in_chunks(7, 2)
    assert_equal(expected, got)
def test_num_items_in_chunks():
    expected = [3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2]
    got = num_items_in_chunks(28, 12)
    assert_equal(expected, got)

    expected = [1, 1]
    got = num_items_in_chunks(2, 2)
    assert_equal(expected, got)

    expected = [4, 3]
    got = num_items_in_chunks(7, 2)
    assert_equal(expected, got)
def run_main(json_file, output_json_file, max_nchunks):
    """
    Spawn a json with scripts into multiple json files each containing a script.
    Parameters:
      json_file -- json <- dict{p_id: args}, where args <- dict{'script_fn': script_fn, ...}
      output_json -- chunk.json
    """
    a = json.load(open(json_file, 'r'))

    if len(a) == 0:
        raise ValueError("script json %s is empty" % json_file)
    out_dir = op.dirname(output_json_file)

    num_chunks = min(max_nchunks, len(a))
    num_scripts_in_chunks = num_items_in_chunks(num_items=len(a), num_chunks=num_chunks)

    # Writing chunk.json
    base_name = "spawned_json_w_scripts_chunk"
    chunks = []
    spawned_jsons = []

    p_ids = sorted(a.keys())
    for chunk_idx in range(0, num_chunks):
        chunk_id = "_".join([base_name, str(chunk_idx)])
        spawned_json_file = op.join(out_dir, chunk_id + ".json")
        spawned_txt_file = op.join(out_dir, chunk_id + "_done.txt")
        # make a chunk
        d = {Constants.CHUNK_KEYS[0]: spawned_json_file,
             Constants.CHUNK_KEYS[1]: spawned_txt_file}
        c = PipelineChunk(chunk_id, **d)
        chunks.append(c)

        # make content for the spawned json
        scripts_dict = dict()
        num_scripts = num_scripts_in_chunks[chunk_idx]
        for script_idx in range(0, num_scripts):
            p_id = p_ids[script_idx]
            scripts_dict[p_id] = a[p_id]

        # delete p_ids[0: num_scripts]
        p_ids = p_ids[num_scripts:]

        # Write script_dict, which is a dict of {p_id: args} to spawned json
        with open(spawned_json_file, 'w') as writer:
            writer.write(json.dumps(scripts_dict) + "\n")

        spawned_jsons.append(spawned_json_file)
        with open(spawned_txt_file, 'w') as writer:
            writer.write("%s" % spawned_json_file)

    if len(p_ids) != 0:
        raise AssertionError("Scripts of p_ids %s are not scattered." % repr(p_ids))

    log.info("Spawning %s into %d files", json_file, num_chunks)
    log.debug("Spawned files: %s.", ", ".join(spawned_jsons))
    log.info("Writing chunk.json to %s", output_json_file)
    write_pipeline_chunks(chunks, output_json_file,
                          "created by %s" % Constants.TOOL_ID)
    return 0
def run_main(json_file, output_json_file, max_nchunks):
    """
    Spawn a json with scripts into multiple json files each containing a script.
    Parameters:
      json_file -- json <- dict{p_id: args}, where args <- dict{'script_fn': script_fn, ...}
      output_json -- chunk.json
    """
    a = json.load(open(json_file, 'r'))

    if len(a) == 0:
        raise ValueError("script json %s is empty" % json_file)
    out_dir = op.dirname(output_json_file)

    num_chunks = min(max_nchunks, len(a))
    num_scripts_in_chunks = num_items_in_chunks(num_items=len(a),
                                                num_chunks=num_chunks)

    # Writing chunk.json
    base_name = "spawned_json_w_scripts_chunk"
    chunks = []
    spawned_jsons = []

    p_ids = sorted(a.keys())
    for chunk_idx in range(0, num_chunks):
        chunk_id = "_".join([base_name, str(chunk_idx)])
        spawned_json_file = op.join(out_dir, chunk_id + ".json")
        spawned_txt_file = op.join(out_dir, chunk_id + "_done.txt")
        # make a chunk
        d = {
            Constants.CHUNK_KEYS[0]: spawned_json_file,
            Constants.CHUNK_KEYS[1]: spawned_txt_file
        }
        c = PipelineChunk(chunk_id, **d)
        chunks.append(c)

        # make content for the spawned json
        scripts_dict = dict()
        num_scripts = num_scripts_in_chunks[chunk_idx]
        for script_idx in range(0, num_scripts):
            p_id = p_ids[script_idx]
            scripts_dict[p_id] = a[p_id]

        # delete p_ids[0: num_scripts]
        p_ids = p_ids[num_scripts:]

        # Write script_dict, which is a dict of {p_id: args} to spawned json
        with open(spawned_json_file, 'w') as writer:
            writer.write(json.dumps(scripts_dict) + "\n")

        spawned_jsons.append(spawned_json_file)
        with open(spawned_txt_file, 'w') as writer:
            writer.write("%s" % spawned_json_file)

    if len(p_ids) != 0:
        raise AssertionError("Scripts of p_ids %s are not scattered." %
                             repr(p_ids))

    log.info("Spawning %s into %d files", json_file, num_chunks)
    log.debug("Spawned files: %s.", ", ".join(spawned_jsons))
    log.info("Writing chunk.json to %s", output_json_file)
    write_pipeline_chunks(chunks, output_json_file,
                          "created by %s" % Constants.TOOL_ID)
    return 0