def dry_run(args, opt_file_lookup, run_uuid, fp, random=np_random): """Write to buffer description of commands for running all experiments. This function is almost pure by writing to a buffer, but it could be switched to a generator. Parameters ---------- args : dict(CmdArgs, [int, str]) Arguments of options to pass to the experiments being launched. The keys corresponds to the same arguments passed to this program. opt_file_lookup : dict(str, str) Mapping from method name to filename containing wrapper class for the method. run_uuid : uuid.UUID UUID for this launcher run. Needed to generate different experiments UUIDs on each call. This function is deterministic provided the same `run_uuid`. fp : writable buffer File handle to write out sequence of commands to execute (broken into jobs on each line) to execute all the experiments (possibly each job in parallel). random : RandomState Random stream to use for reproducibility. """ assert args[CmdArgs.n_jobs] > 0, "Must have non-zero jobs for dry run" # Taking in file pointer since then we can test without actual file. Could also build generator that returns lines # to write. manual_setup_info = XRSerializer.init_db_manual(args[CmdArgs.db_root], db=args[CmdArgs.db], keys=EXP_VARS) warnings.warn(manual_setup_info, UserWarning) # Get the commands dry_run_commands = {} G = gen_commands(args, opt_file_lookup, run_uuid) for (_, _, _, optimizer, _), full_cmd in G: cmd_str = shell_join(full_cmd) dry_run_commands.setdefault(optimizer, []).append(cmd_str) # Make sure we never have any empty jobs, which is a waste n_commands = sum(len(v) for v in dry_run_commands.values()) n_jobs = min(args[CmdArgs.n_jobs], n_commands) # Would prob also work with pyrandom, but only tested np random so far subcommands = strat_split(list(dry_run_commands.values()), n_jobs, random=random) # Make sure have same commands overall, delete once we trust strat_split assert sorted(np.concatenate(subcommands)) == sorted( sum(list(dry_run_commands.values()), [])) job_suffix = run_uuid.hex[:UUID_JOB_CHARS] # Include comments as reproducibility lines args_str = serializable_dict(args) fp.write("# running: %s\n" % str(args_str)) fp.write("# cmd: %s\n" % cmd.cmd_str()) for ii, ii_str in range_str(n_jobs): assert len(subcommands[ii]) > 0 fp.write("job_%s_%s %s\n" % (job_suffix, ii_str, " && ".join(subcommands[ii])))
def cmd_str(): cmd = "%s %s" % (PY_INTERPRETER, shell_join(sys.argv)) return cmd
def test_shell_join(argv): cmd = bobm_util.shell_join(argv, delim=" ") assert shlex.split(cmd) == list(argv)