def dry_run(args, opt_file_lookup, run_uuid, fp, random=np_random):
    """Write to buffer description of commands for running all experiments.

    This function is almost pure by writing to a buffer, but it could be switched to a generator.

    Parameters
    ----------
    args : dict(CmdArgs, [int, str])
        Arguments of options to pass to the experiments being launched. The keys corresponds to the same arguments
        passed to this program.
    opt_file_lookup : dict(str, str)
        Mapping from method name to filename containing wrapper class for the method.
    run_uuid : uuid.UUID
        UUID for this launcher run. Needed to generate different experiments UUIDs on each call. This function is
        deterministic provided the same `run_uuid`.
    fp : writable buffer
        File handle to write out sequence of commands to execute (broken into jobs on each line) to execute all the
        experiments (possibly each job in parallel).
    random : RandomState
        Random stream to use for reproducibility.
    """
    assert args[CmdArgs.n_jobs] > 0, "Must have non-zero jobs for dry run"

    # Taking in file pointer since then we can test without actual file. Could also build generator that returns lines
    # to write.
    manual_setup_info = XRSerializer.init_db_manual(args[CmdArgs.db_root],
                                                    db=args[CmdArgs.db],
                                                    keys=EXP_VARS)
    warnings.warn(manual_setup_info, UserWarning)

    # Get the commands
    dry_run_commands = {}
    G = gen_commands(args, opt_file_lookup, run_uuid)
    for (_, _, _, optimizer, _), full_cmd in G:
        cmd_str = shell_join(full_cmd)
        dry_run_commands.setdefault(optimizer, []).append(cmd_str)

    # Make sure we never have any empty jobs, which is a waste
    n_commands = sum(len(v) for v in dry_run_commands.values())
    n_jobs = min(args[CmdArgs.n_jobs], n_commands)

    # Would prob also work with pyrandom, but only tested np random so far
    subcommands = strat_split(list(dry_run_commands.values()),
                              n_jobs,
                              random=random)
    # Make sure have same commands overall, delete once we trust strat_split
    assert sorted(np.concatenate(subcommands)) == sorted(
        sum(list(dry_run_commands.values()), []))

    job_suffix = run_uuid.hex[:UUID_JOB_CHARS]

    # Include comments as reproducibility lines
    args_str = serializable_dict(args)
    fp.write("# running: %s\n" % str(args_str))
    fp.write("# cmd: %s\n" % cmd.cmd_str())
    for ii, ii_str in range_str(n_jobs):
        assert len(subcommands[ii]) > 0
        fp.write("job_%s_%s %s\n" %
                 (job_suffix, ii_str, " && ".join(subcommands[ii])))
Example #2
0
def cmd_str():
    cmd = "%s %s" % (PY_INTERPRETER, shell_join(sys.argv))
    return cmd
Example #3
0
def test_shell_join(argv):
    cmd = bobm_util.shell_join(argv, delim=" ")

    assert shlex.split(cmd) == list(argv)