def load_experiments(uuid_list, db_root, dbid):  # pragma: io
    """Generator to load the results of the experiments.

    Parameters
    ----------
    uuid_list : list(uuid.UUID)
        List of UUIDs corresponding to experiments to load.
    db_root : str
        Root location for data store as requested by the serializer used.
    dbid : str
        Name of the data store as requested by the serializer used.

    Yields
    ------
    meta_data : (str, str, str)
        The `meta_data` contains a `tuple` of `str` with ``test_case, optimizer, uuid``.
    data : (:class:`xarray:xarray.Dataset`, :class:`xarray:xarray.Dataset`, :class:`xarray:xarray.Dataset` list(float))
        The `data` contains a tuple of ``(perf_ds, time_ds, suggest_ds, sig)``. The `perf_ds` is a
        :class:`xarray:xarray.Dataset` containing the evaluation results with dimensions ``(ITER, SUGGEST)``, each
        variable is an objective. The `time_ds` is an :class:`xarray:xarray.Dataset` containing the timing results of
        the form accepted by `summarize_time`. The coordinates must be compatible with `perf_ds`. The suggest_ds is a
        :class:`xarray:xarray.Dataset` containing the inputs to the function evaluations. Each variable is a function
        input. Finally, `sig` contains the `test_case` signature and must be `list(float)`.
    """
    uuids_seen = set()
    for uuid_ in uuid_list:
        logger.info(uuid_.hex)

        # Load perf and timing data
        perf_ds, meta = XRSerializer.load(db_root, db=dbid, key=cc.EVAL, uuid_=uuid_)
        time_ds, meta_t = XRSerializer.load(db_root, db=dbid, key=cc.TIME, uuid_=uuid_)
        assert meta == meta_t, "meta data should between time and eval files"
        suggest_ds, meta_t = XRSerializer.load(db_root, db=dbid, key=cc.SUGGEST_LOG, uuid_=uuid_)
        assert meta == meta_t, "meta data should between suggest and eval files"

        # Get signature to pass out as well
        _, sig = meta["signature"]
        logger.info(meta)
        logger.info(sig)

        # Build the new indices for combined data, this could be put in function for easier testing
        eval_args = unserializable_dict(meta["args"])  # Unpack meta-data
        test_case = SklearnModel.test_case_str(
            eval_args[CmdArgs.classifier], eval_args[CmdArgs.data], eval_args[CmdArgs.metric]
        )
        optimizer = str_join_safe(
            ARG_DELIM, (eval_args[CmdArgs.optimizer], eval_args[CmdArgs.opt_rev], eval_args[CmdArgs.rev])
        )
        args_uuid = eval_args[CmdArgs.uuid]

        # Check UUID sanity
        assert isinstance(args_uuid, str)
        assert args_uuid == uuid_.hex, "UUID meta-data does not match filename"
        assert args_uuid not in uuids_seen, "uuids being reused between studies"
        uuids_seen.add(args_uuid)

        # Return key -> data so this generator can be iterated over in dict like manner
        meta_data = (test_case, optimizer, args_uuid)
        data = (perf_ds, time_ds, suggest_ds, sig)
        yield meta_data, data
예제 #2
0
    def init_db_manual(db_root, keys, db):
        """Instruction for how one would manually initialize the "database" on another system.

        Parameters
        ----------
        db_root : str
            Absolute path to the database.
        keys : list(str)
            The variable names (or keys) we will store in the database for non-derived data.
        db : str
            The name of the database.

        Returns
        -------
        manual_setup_info : str
            The setup instructions.
        """
        XRSerializer._validate(db_root, keys, db)
        assert db is not None, "Must specify db name to setup manually."

        exp_subdir = os.path.join(db_root, db)
        subdirs = [_DERIVED_DIR, _LOGGING_DIR] + list(keys)
        manual_setup_info = _SETUP_STR % (exp_subdir,
                                          str_join_safe(" ", subdirs))
        return manual_setup_info
예제 #3
0
 def test_case_str(model, dataset, scorer):
     """Generate the combined test case string from model, dataset, and scorer combination."""
     test_case = str_join_safe(ARG_DELIM, (model, dataset, scorer))
     return test_case
예제 #4
0
def experiment_main(opt_class, args=None):  # pragma: main
    """This is in effect the `main` routine for this experiment. However, it is called from the optimizer wrapper file
    so the class can be passed in. The optimizers are assumed to be outside the package, so the optimizer class can't
    be named from inside the main function without using hacky stuff like `eval`.
    """
    if args is None:
        description = "Run a study with one benchmark function and an optimizer"
        args = cmd.parse_args(cmd.experiment_parser(description))
    args[CmdArgs.opt_rev] = opt_class.get_version()

    run_uuid = uuid.UUID(args[CmdArgs.uuid])

    logging.captureWarnings(True)

    # Setup logging to both a file and stdout (if verbose is set to True)
    logger.setLevel(logging.INFO)  # Note this is the module-wide logger
    logfile = XRSerializer.logging_path(args[CmdArgs.db_root],
                                        args[CmdArgs.db], run_uuid)
    logger_file_handler = logging.FileHandler(logfile, mode="w")
    logger.addHandler(logger_file_handler)
    if args[CmdArgs.verbose]:
        logger.addHandler(logging.StreamHandler())

    warnings_logger = logging.getLogger("py.warnings")
    warnings_logger.addHandler(logger_file_handler)
    if args[CmdArgs.verbose]:
        warnings_logger.addHandler(logging.StreamHandler())

    logger.info("running: %s" % str(cmd.serializable_dict(args)))
    logger.info("cmd: %s" % cmd.cmd_str())

    assert (args[CmdArgs.metric]
            in METRICS_LOOKUP[get_problem_type(args[CmdArgs.data])]
            ), "reg/clf metrics can only be used on compatible dataset"

    # Setup random streams for computing the signature, must use same seed
    # across all runs to ensure signature is consistent. This seed is random:
    _setup_seeds(
        "7e9f2cabb0dd4f44bc10cf18e440b427")  # pragma: allowlist secret
    signature = get_objective_signature(args[CmdArgs.classifier],
                                        args[CmdArgs.data],
                                        args[CmdArgs.metric],
                                        data_root=args[CmdArgs.data_root])
    logger.info("computed signature: %s" % str(signature))

    opt_kwargs = load_optimizer_kwargs(args[CmdArgs.optimizer],
                                       args[CmdArgs.optimizer_root])

    # Setup the call back for intermediate logging
    if cc.BASELINE not in XRSerializer.get_derived_keys(args[CmdArgs.db_root],
                                                        db=args[CmdArgs.db]):
        warnings.warn("Baselines not found. Will not log intermediate scores.")
        callback = None
    else:
        test_case_str = SklearnModel.test_case_str(args[CmdArgs.classifier],
                                                   args[CmdArgs.data],
                                                   args[CmdArgs.metric])
        optimizer_str = str_join_safe(
            ARG_DELIM, (args[CmdArgs.optimizer], args[CmdArgs.opt_rev],
                        args[CmdArgs.rev]))

        baseline_ds, baselines_meta = XRSerializer.load_derived(
            args[CmdArgs.db_root], db=args[CmdArgs.db], key=cc.BASELINE)

        # Check the objective function signatures match in the baseline file
        sig_errs, _ = analyze_signature_pair({test_case_str: signature[1]},
                                             baselines_meta["signature"])
        logger.info("Signature errors:\n%s" % sig_errs.to_string())
        print(json.dumps({"exp sig errors": sig_errs.T.to_dict()}))

        def log_mean_score_json(evals, iters):
            assert evals.shape == (len(OBJECTIVE_NAMES), )
            assert not np.any(np.isnan(evals))

            log_msg = {
                cc.TEST_CASE: test_case_str,
                cc.METHOD: optimizer_str,
                cc.TRIAL: args[CmdArgs.uuid],
                cc.ITER: iters,
            }

            for idx, obj in enumerate(OBJECTIVE_NAMES):
                assert OBJECTIVE_NAMES[idx] == obj

                # Extract relevant rescaling info
                slice_ = {cc.TEST_CASE: test_case_str, OBJECTIVE: obj}
                best_opt = baseline_ds[cc.PERF_BEST].sel(
                    slice_, drop=True).values.item()
                base_clip_val = baseline_ds[cc.PERF_CLIP].sel(
                    slice_, drop=True).values.item()

                # Perform the same rescaling as found in experiment_analysis.compute_aggregates()
                score = linear_rescale(evals[idx],
                                       best_opt,
                                       base_clip_val,
                                       0.0,
                                       1.0,
                                       enforce_bounds=False)
                # Also, clip the score from below at -1 to limit max influence of single run on final average
                score = np.clip(score, -1.0, 1.0)
                score = score.item()  # Make easiest for logging in JSON
                assert isinstance(score, float)

                # Note: This is not the raw score but the rescaled one!
                log_msg[obj] = score
            log_msg = json.dumps(log_msg)
            print(log_msg, flush=True)
            # One second safety delay to protect against subprocess stdout getting lost
            sleep(1)

        callback = log_mean_score_json

    # Now set the seeds for the actual experiment
    _setup_seeds(args[CmdArgs.uuid])

    # Now do the experiment
    logger.info("starting sklearn study %s %s %s %s %d %d" % (
        args[CmdArgs.optimizer],
        args[CmdArgs.classifier],
        args[CmdArgs.data],
        args[CmdArgs.metric],
        args[CmdArgs.n_calls],
        args[CmdArgs.n_suggest],
    ))
    logger.info("with data root: %s" % args[CmdArgs.data_root])
    function_evals, timing, suggest_log = run_sklearn_study(
        opt_class,
        opt_kwargs,
        args[CmdArgs.classifier],
        args[CmdArgs.data],
        args[CmdArgs.metric],
        args[CmdArgs.n_calls],
        args[CmdArgs.n_suggest],
        data_root=args[CmdArgs.data_root],
        callback=callback,
    )

    # Curate results into clean dataframes
    eval_ds = build_eval_ds(function_evals, OBJECTIVE_NAMES)
    time_ds = build_timing_ds(*timing)
    suggest_ds = build_suggest_ds(suggest_log)

    # setup meta:
    meta = {"args": cmd.serializable_dict(args), "signature": signature}
    logger.info("saving meta data: %s" % str(meta))

    # Now the final IO to export the results
    logger.info("saving results")
    XRSerializer.save(eval_ds,
                      meta,
                      args[CmdArgs.db_root],
                      db=args[CmdArgs.db],
                      key=cc.EVAL,
                      uuid_=run_uuid)

    logger.info("saving timing")
    XRSerializer.save(time_ds,
                      meta,
                      args[CmdArgs.db_root],
                      db=args[CmdArgs.db],
                      key=cc.TIME,
                      uuid_=run_uuid)

    logger.info("saving suggest log")
    XRSerializer.save(suggest_ds,
                      meta,
                      args[CmdArgs.db_root],
                      db=args[CmdArgs.db],
                      key=cc.SUGGEST_LOG,
                      uuid_=run_uuid)

    logger.info("done")
def compute_baseline(perf_da):
    """Compute a performance baseline of base and best performance from the aggregate experimental results.

    Parameters
    ----------
    perf_da : :class:`xarray:xarray.DataArray`
        Aggregate experimental results with each function evaluation in the experiments. `all_perf` has dimensions
        ``(ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)`` as is assumed to have no nan values.

    Returns
    -------
    baseline_ds : :class:`xarray:xarray.Dataset`
        Dataset with baseline performance. It was variables ``(PERF_MED, PERF_MEAN, PERF_CLIP, PERF_BEST)`` with
        dimensions ``(ITER, TEST_CASE)``, ``(ITER, TEST_CASE)``, ``(TEST_CASE,)``, and ``(TEST_CASE,)``, respectively.
        `PERF_MED` is a baseline of performance based on random search when using medians to summarize performance.
        Likewise, `PERF_MEAN` is for means. `PERF_CLIP` is an upperbound to clip poor performance when using the mean.
        `PERF_BEST` is an estimate on the global minimum.
    """
    validate_agg_perf(perf_da)

    ref_prefix = str_join_safe(ARG_DELIM, (cc.RANDOM_SEARCH, ""))
    ref_random = [kk for kk in perf_da.coords[METHOD].values if kk.startswith(ref_prefix)]
    assert len(ref_random) > 0, "Did not find any random search in methods."

    # Now many points we will have after each batch
    trials_grid = perf_da.sizes[SUGGEST] * (1 + np.arange(perf_da.sizes[ITER]))

    # Now iterate over problems and get baseline performance
    baseline_ds = ds_like_mixed(
        perf_da,
        [
            (PERF_MED, [ITER, TEST_CASE]),
            (PERF_MEAN, [ITER, TEST_CASE]),
            (PERF_CLIP, [TEST_CASE]),
            (PERF_BEST, [TEST_CASE]),
        ],
        (ITER, TEST_CASE),
    )
    for func_name in perf_da.coords[TEST_CASE].values:
        random_evals = np.ravel(perf_da.sel({METHOD: ref_random, TEST_CASE: func_name}, drop=True).values)
        assert random_evals.size > 0

        # We will likely change this to a min mean (instead of median) using a different util in near future:
        assert np.all(trials_grid == perf_da.sizes[SUGGEST] * (1 + baseline_ds.coords[ITER].values))
        rand_perf, _, _ = qt.min_quantile_CI(random_evals, EVAL_Q, trials_grid, alpha=ALPHA)
        baseline_ds[PERF_MED].loc[{TEST_CASE: func_name}] = rand_perf

        # Decide on a level to clip when computing the mean
        base_clip_val = qt.quantile(random_evals, EVAL_Q)
        assert np.isfinite(base_clip_val), "Median random search performance is not even finite."
        assert (perf_da.sizes[SUGGEST] > 1) or np.isclose(base_clip_val, rand_perf[0])
        baseline_ds[PERF_CLIP].loc[{TEST_CASE: func_name}] = base_clip_val

        # Estimate the global min via best of any method
        best_opt = np.min(perf_da.sel({TEST_CASE: func_name}, drop=True).values)
        if np.any(rand_perf <= best_opt):
            warnings.warn(
                "Random search is also the best search on %s, the normalized score may be meaningless." % func_name,
                RuntimeWarning,
            )
        assert np.isfinite(best_opt), "Best performance found is not even finite."
        logger.info("best %s %f" % (func_name, best_opt))

        # Now make sure strictly less than to avoid assert error in linear_rescale. This will likely give normalized
        # scores of +inf or -inf, but with median summary that is ok. When everything goes to mean, we will need to
        # change this:
        pad = PAD_FACTOR * np.spacing(-np.maximum(MIN_POS, np.abs(best_opt)))
        assert pad < 0
        best_opt = best_opt + pad
        assert np.isfinite(best_opt), "Best performance too close to limit of float range."
        assert np.all(rand_perf > best_opt)
        baseline_ds[PERF_BEST].loc[{TEST_CASE: func_name}] = best_opt

        random_evals = np.minimum(base_clip_val, random_evals)
        assert np.all(np.isfinite(random_evals))
        assert np.all(best_opt <= random_evals)

        rand_perf = em.expected_min(random_evals, trials_grid)
        rand_perf_fixed = np.minimum(base_clip_val, rand_perf)
        assert np.allclose(rand_perf, rand_perf_fixed)
        rand_perf_fixed = np.minimum.accumulate(rand_perf_fixed)
        assert np.allclose(rand_perf, rand_perf_fixed)
        baseline_ds[PERF_MEAN].loc[{TEST_CASE: func_name}] = rand_perf_fixed
    assert not any(np.any(np.isnan(baseline_ds[kk].values)) for kk in baseline_ds)
    validate(baseline_ds)
    return baseline_ds
예제 #6
0
def gen_commands(args, opt_file_lookup, run_uuid):
    """Generator providing commands to launch processes for experiments.

    Parameters
    ----------
    args : dict(CmdArgs, [int, str])
        Arguments of options to pass to the experiments being launched. The keys corresponds to the same arguments
        passed to this program.
    opt_file_lookup : dict(str, str)
        Mapping from method name to filename containing wrapper class for the method.
    run_uuid : uuid.UUID
        UUID for this launcher run. Needed to generate different experiments UUIDs on each call. This function is
        deterministic provided the same `run_uuid`.

    Yields
    ------
    iteration_key : (str, str, str, str)
        Tuple containing ``(trial, classifier, data, optimizer)`` to index the experiment.
    full_cmd : tuple(str)
        Strings containing command and arguments to run a process with experiment. Join with whitespace or use
        :func:`.util.shell_join` to get string with executable command. The command omits ``--opt-root`` which means it
        will default to ``.`` if the command is executed. As such, the command assumes it is executed with
        ``--opt-root`` as the working directory.
    """
    args_to_pass_thru = [
        CmdArgs.n_calls, CmdArgs.n_suggest, CmdArgs.db_root, CmdArgs.db
    ]
    # This could be made simpler and avoid if statement if we just always pass dataroot, even if no custom data used.
    if args[CmdArgs.data_root] is not None:
        args_to_pass_thru.append(CmdArgs.data_root)

    # Possibilities to iterate over. Put them in sorted order just for good measure.
    c_list = strict_sorted(MODEL_NAMES if args[CmdArgs.classifier] is None else
                           args[CmdArgs.classifier])
    d_list = strict_sorted(DATA_LOADER_NAMES if args[CmdArgs.data] is None else
                           args[CmdArgs.data])
    o_list = strict_sorted(
        list(opt_file_lookup.keys()) + list(CONFIG.keys())
        if args[CmdArgs.optimizer] is None else args[CmdArgs.optimizer])
    assert all(((optimizer in opt_file_lookup) or (optimizer in CONFIG))
               for optimizer in o_list), "unknown optimizer in optimizer list"

    m_set = set(
        METRICS if args[CmdArgs.metric] is None else args[CmdArgs.metric])
    m_lookup = {
        problem_type: sorted(m_set.intersection(mm))
        for problem_type, mm in METRICS_LOOKUP.items()
    }
    assert all(
        (len(m_lookup[get_problem_type(data)]) > 0) for data in
        d_list), "At one metric needed for each problem type of data sets"

    G = product(range_str(args[CmdArgs.n_repeat]), c_list, d_list,
                o_list)  # iterate all combos
    for rep, classifier, data, optimizer in G:
        _, rep_str = rep
        problem_type = get_problem_type(data)
        for metric in m_lookup[problem_type]:
            # Get a reproducible string based (conditioned on having same (run uuid), but should also never give
            # a duplicate (unless we force the same run uuid twice).
            iteration_key = (rep_str, classifier, data, optimizer, metric)
            iteration_id = str_join_safe(ARG_DELIM, iteration_key)
            sub_uuid = pyuuid.uuid5(run_uuid, iteration_id).hex

            # Build the argument list for subproc, passing some args thru
            cmd_args_pass_thru = [[CMD_STR[vv][0],
                                   arg_safe_str(args[vv])]
                                  for vv in args_to_pass_thru]
            # Technically, the optimizer is is not actually needed here for non-built in optimizers because it already
            # specified via the entry point: optimizer_wrapper_file
            cmd_args = [
                [CMD_STR[CmdArgs.classifier][0],
                 arg_safe_str(classifier)],
                [CMD_STR[CmdArgs.data][0],
                 arg_safe_str(data)],
                [CMD_STR[CmdArgs.optimizer][0],
                 arg_safe_str(optimizer)],
                [CMD_STR[CmdArgs.uuid][0],
                 arg_safe_str(sub_uuid)],
                [CMD_STR[CmdArgs.metric][0],
                 arg_safe_str(metric)],
            ]
            cmd_args = tuple(sum(cmd_args + cmd_args_pass_thru, []))
            logger.info(" ".join(cmd_args))

            # The experiment command without the arguments
            if optimizer in CONFIG:  # => built in optimizer wrapper
                experiment_cmd = (EXPERIMENT_ENTRY, )
            else:
                optimizer_wrapper_file = opt_file_lookup[optimizer]
                assert optimizer_wrapper_file.endswith(
                    ".py"), "optimizer wrapper should a be .py file"
                experiment_cmd = (PY_INTERPRETER, optimizer_wrapper_file)

            # Check arg safe again, off elements in list need to be argsafe
            assert all((_is_arg_safe(ss) == (ii % 2 == 1))
                       for ii, ss in enumerate(cmd_args))

            full_cmd = experiment_cmd + cmd_args
            yield iteration_key, full_cmd
예제 #7
0
def test_str_join_safe_append(delim, str_vec0, str_vec):
    assume(not any(delim in ss for ss in str_vec0))
    assume(not any(delim in ss for ss in str_vec))

    start = bobm_util.str_join_safe(delim, str_vec0, append=False)
    bobm_util.str_join_safe(delim, [start] + str_vec, append=True)
예제 #8
0
def test_str_join_safe(delim, str_vec):
    assume(not any(delim in ss for ss in str_vec))
    bobm_util.str_join_safe(delim, str_vec, append=False)
예제 #9
0
def concat_experiments(all_experiments, ravel=False):
    """Aggregate the Datasets from a series of experiments into combined Dataset.

    Parameters
    ----------
    all_experiments : typing.Iterable
        Iterable (possible from a generator) with the Datasets from each experiment. Each item in `all_experiments` is
        a pair containing ``(meta_data, data)``. The `meta_data` contains a `tuple` of `str` with
        ``test_case, optimizer, uuid``. The `data` contains a tuple of ``(perf_da, time_ds, sig)``. The `perf_da` is an
        :class:`xarray:xarray.DataArray` containing the evaluation results with dimensions ``(ITER, SUGGEST)``. The
        `time_ds` is an :class:`xarray:xarray.Dataset` containing the timing results of the form accepted by
        `summarize_time`. The coordinates must be compatible with `perf_da`. Finally, `sig` contains the `test_case`
        signature and must be `list(float)`.
    ravel : bool
        If true, ravel all studies to store batch suggestions as if they were serial.

    Returns
    -------
    all_perf : :class:`xarray:xarray.DataArray`
        DataArray containing all of the `perf_da` from the experiments. The meta-data from the experiments are included
        as extra dimensions. `all_perf` has dimensions ``(ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)``. To convert the
        `uuid` to a trial, there must be an equal number of repetition in the experiments for each `TEST_CASE`,
        `METHOD` combination. Likewise, all of the experiments need an equal number of `ITER` and `SUGGEST`. If `ravel`
        is true, then the `SUGGEST` is singleton.
    all_time : :class:`xarray:xarray.Dataset`
        Dataset containing all of the `time_ds` from the experiments. The new dimensions are
        ``(ITER, TEST_CASE, METHOD, TRIAL)``. It has the same variables as `time_ds`.
    all_sigs : dict(str, list(list(float)))
        Aggregate of all experiment signatures.
    """
    all_perf = {}
    all_time = {}
    all_sigs = {}
    trial_counter = Counter()
    for (test_case, optimizer, uuid), (perf_da, time_ds,
                                       sig) in all_experiments:
        if ravel:
            n_suggest = perf_da.sizes[SUGGEST]
            perf_da = _ravel_perf(perf_da)
            time_ds = _ravel_time(time_ds)
            optimizer = str_join_safe(ARG_DELIM,
                                      (optimizer, "p%d" % n_suggest),
                                      append=True)

        case_key = (test_case, optimizer, trial_counter[(test_case,
                                                         optimizer)])
        trial_counter[(test_case, optimizer)] += 1

        # Process perf data
        assert perf_da.dims == (ITER, SUGGEST)
        all_perf[case_key] = perf_da

        # Process time data
        all_time[case_key] = summarize_time(time_ds)

        # Handle the signatures
        all_sigs.setdefault(test_case, []).append(sig)
    assert min(trial_counter.values()) == max(
        trial_counter.values()), "Uneven number of trials per test case"

    # Now need to concat dict of datasets into single dataset
    all_perf = xru.da_concat(all_perf, dims=(TEST_CASE, METHOD, TRIAL))
    assert all_perf.dims == (ITER, SUGGEST, TEST_CASE, METHOD, TRIAL)
    assert not np.any(np.isnan(
        all_perf.values)), "Missing combinations of method and test case"

    all_time = xru.ds_concat(all_time, dims=(TEST_CASE, METHOD, TRIAL))
    assert all(all_time[kk].dims == (ITER, TEST_CASE, METHOD, TRIAL)
               for kk in all_time)
    assert not any(np.any(np.isnan(all_time[kk].values)) for kk in all_time)
    assert xru.coord_compat((all_perf, all_time),
                            (ITER, TEST_CASE, METHOD, TRIAL))

    return all_perf, all_time, all_sigs