Beispiel #1
0
def launcher_args_and_config(min_jobs=0):
    def args_and_config(opts):
        args = launcher_args(opts, min_jobs=min_jobs)
        configs = fixed_dictionaries({ss: filenames(suffix=".py") for ss in opts})
        args_and_configs = tuples(args, configs)
        return args_and_configs

    # Make opt names a mix of built in opts and arbitrary names
    optimizers = lists(joinables() | sampled_from(sorted(CONFIG.keys())), min_size=1)
    S = optimizers.flatmap(args_and_config)
    return S
Beispiel #2
0
def test_get_opt_class_module():
    # Should really do parametric test but for loop good enough
    for opt_name in sorted(CONFIG.keys()):
        opt_class = exp._get_opt_class(opt_name)

        fname = inspect.getfile(opt_class)
        fname = os.path.basename(fname)

        wrapper_file, _ = CONFIG[opt_name]

        assert fname == wrapper_file
Beispiel #3
0
def test_run_sklearn_study_real(api_config, model_name, dataset, scorer,
                                n_calls, n_suggestions):
    prob_type = data.get_problem_type(dataset)
    assume(scorer in data.METRICS_LOOKUP[prob_type])

    # Should really do parametric test but for loop good enough
    for opt_name in sorted(CONFIG.keys()):
        opt_class = exp._get_opt_class(opt_name)
        # opt_root=None should work with built-in opt
        opt_kwargs = exp.load_optimizer_kwargs(opt_name, opt_root=None)

        exp.run_sklearn_study(opt_class, opt_kwargs, model_name, dataset,
                              scorer, n_calls, n_suggestions)
def infer_settings(opt_root, opt_pattern="**/optimizer.py"):
    opt_root = PosixPath(opt_root)
    assert opt_root.is_dir(), "Opt root directory doesn't exist: %s" % opt_root
    assert opt_root.is_absolute(), "Only absolute path should have even gotten this far."

    # Always sort for reproducibility
    source_files = sorted(opt_root.glob(opt_pattern))
    source_files = [ss.relative_to(opt_root) for ss in source_files]

    settings = {_cleanup(str(ss.parent)): [str(ss), {}] for ss in source_files}

    assert all(joinable(kk) for kk in settings), "Something went wrong in name sanitization."
    assert len(settings) == len(source_files), "Name collision after sanitization of %s" % repr(source_files)
    assert len(set(CONFIG.keys()) & set(settings.keys())) == 0, "Name collision with builtin optimizers."

    return settings
def gen_commands(args, opt_file_lookup, run_uuid):
    """Generator providing commands to launch processes for experiments.

    Parameters
    ----------
    args : dict(CmdArgs, [int, str])
        Arguments of options to pass to the experiments being launched. The keys corresponds to the same arguments
        passed to this program.
    opt_file_lookup : dict(str, str)
        Mapping from method name to filename containing wrapper class for the method.
    run_uuid : uuid.UUID
        UUID for this launcher run. Needed to generate different experiments UUIDs on each call. This function is
        deterministic provided the same `run_uuid`.

    Yields
    ------
    iteration_key : (str, str, str, str)
        Tuple containing ``(trial, classifier, data, optimizer)`` to index the experiment.
    full_cmd : tuple(str)
        Strings containing command and arguments to run a process with experiment. Join with whitespace or use
        :func:`.util.shell_join` to get string with executable command. The command omits ``--opt-root`` which means it
        will default to ``.`` if the command is executed. As such, the command assumes it is executed with
        ``--opt-root`` as the working directory.
    """
    args_to_pass_thru = [
        CmdArgs.n_calls, CmdArgs.n_suggest, CmdArgs.db_root, CmdArgs.db
    ]
    # This could be made simpler and avoid if statement if we just always pass dataroot, even if no custom data used.
    if args[CmdArgs.data_root] is not None:
        args_to_pass_thru.append(CmdArgs.data_root)

    # Possibilities to iterate over. Put them in sorted order just for good measure.
    c_list = strict_sorted(MODEL_NAMES if args[CmdArgs.classifier] is None else
                           args[CmdArgs.classifier])
    d_list = strict_sorted(DATA_LOADER_NAMES if args[CmdArgs.data] is None else
                           args[CmdArgs.data])
    o_list = strict_sorted(
        list(opt_file_lookup.keys()) + list(CONFIG.keys())
        if args[CmdArgs.optimizer] is None else args[CmdArgs.optimizer])
    assert all(((optimizer in opt_file_lookup) or (optimizer in CONFIG))
               for optimizer in o_list), "unknown optimizer in optimizer list"

    m_set = set(
        METRICS if args[CmdArgs.metric] is None else args[CmdArgs.metric])
    m_lookup = {
        problem_type: sorted(m_set.intersection(mm))
        for problem_type, mm in METRICS_LOOKUP.items()
    }
    assert all(
        (len(m_lookup[get_problem_type(data)]) > 0) for data in
        d_list), "At one metric needed for each problem type of data sets"

    G = product(range_str(args[CmdArgs.n_repeat]), c_list, d_list,
                o_list)  # iterate all combos
    for rep, classifier, data, optimizer in G:
        _, rep_str = rep
        problem_type = get_problem_type(data)
        for metric in m_lookup[problem_type]:
            # Get a reproducible string based (conditioned on having same (run uuid), but should also never give
            # a duplicate (unless we force the same run uuid twice).
            iteration_key = (rep_str, classifier, data, optimizer, metric)
            iteration_id = str_join_safe(ARG_DELIM, iteration_key)
            sub_uuid = pyuuid.uuid5(run_uuid, iteration_id).hex

            # Build the argument list for subproc, passing some args thru
            cmd_args_pass_thru = [[CMD_STR[vv][0],
                                   arg_safe_str(args[vv])]
                                  for vv in args_to_pass_thru]
            # Technically, the optimizer is is not actually needed here for non-built in optimizers because it already
            # specified via the entry point: optimizer_wrapper_file
            cmd_args = [
                [CMD_STR[CmdArgs.classifier][0],
                 arg_safe_str(classifier)],
                [CMD_STR[CmdArgs.data][0],
                 arg_safe_str(data)],
                [CMD_STR[CmdArgs.optimizer][0],
                 arg_safe_str(optimizer)],
                [CMD_STR[CmdArgs.uuid][0],
                 arg_safe_str(sub_uuid)],
                [CMD_STR[CmdArgs.metric][0],
                 arg_safe_str(metric)],
            ]
            cmd_args = tuple(sum(cmd_args + cmd_args_pass_thru, []))
            logger.info(" ".join(cmd_args))

            # The experiment command without the arguments
            if optimizer in CONFIG:  # => built in optimizer wrapper
                experiment_cmd = (EXPERIMENT_ENTRY, )
            else:
                optimizer_wrapper_file = opt_file_lookup[optimizer]
                assert optimizer_wrapper_file.endswith(
                    ".py"), "optimizer wrapper should a be .py file"
                experiment_cmd = (PY_INTERPRETER, optimizer_wrapper_file)

            # Check arg safe again, off elements in list need to be argsafe
            assert all((_is_arg_safe(ss) == (ii % 2 == 1))
                       for ii, ss in enumerate(cmd_args))

            full_cmd = experiment_cmd + cmd_args
            yield iteration_key, full_cmd