Beispiel #1
0
def get_objective_signature(model_name, dataset, scorer, data_root=None):
    """Get signature of an objective function specified by an sklearn model and dataset.

    This routine specializes :func:`.signatures.get_func_signature` for the `sklearn` study case.

    Parameters
    ----------
    model_name : str
        Which sklearn model we are attempting to tune, must be an element of `constants.MODEL_NAMES`.
    dataset : str
        Which data set the model is being tuned to, which must be either a) an element of
        `constants.DATA_LOADER_NAMES`, or b) the name of a csv file in the `data_root` folder for a custom data set.
    scorer : str
        Which metric to use when evaluating the model. This must be an element of `sklearn_funcs.SCORERS_CLF` for
        classification models, or `sklearn_funcs.SCORERS_REG` for regression models.
    data_root : str
        Absolute path to folder containing custom data sets. This may be ``None`` if no custom data sets are used.``

    Returns
    -------
    signature : list(str)
        The signature of this test function.
    """
    function_instance = SklearnModel(model_name,
                                     dataset,
                                     scorer,
                                     data_root=data_root)
    api_config = function_instance.get_api_config()
    signature = get_func_signature(function_instance.evaluate, api_config)
    return signature
Beispiel #2
0
def run_sklearn_study(opt_class,
                      opt_kwargs,
                      model_name,
                      dataset,
                      scorer,
                      n_calls,
                      n_suggestions,
                      data_root=None):
    """Run a study for a single optimizer on a single `sklearn` model/data set combination.

    This routine is meant for benchmarking when tuning `sklearn` models, as opposed to the more general
    :func:`.run_study`.

    Parameters
    ----------
    opt_class : :class:`.abstract_optimizer.AbstractOptimizer`
        Type of wrapper optimizer must be subclass of :class:`.abstract_optimizer.AbstractOptimizer`.
    opt_kwargs : kwargs
        `kwargs` to use when instantiating the wrapper class.
    model_name : str
        Which sklearn model we are attempting to tune, must be an element of `constants.MODEL_NAMES`.
    dataset : str
        Which data set the model is being tuned to, which must be either a) an element of
        `constants.DATA_LOADER_NAMES`, or b) the name of a csv file in the `data_root` folder for a custom data set.
    scorer : str
        Which metric to use when evaluating the model. This must be an element of `sklearn_funcs.SCORERS_CLF` for
        classification models, or `sklearn_funcs.SCORERS_REG` for regression models.
    n_calls : int
        How many iterations of minimization to run.
    n_suggestions : int
        How many parallel evaluation we run each iteration. Must be ``>= 1``.
    data_root : str
        Absolute path to folder containing custom data sets. This may be ``None`` if no custom data sets are used.``

    Returns
    -------
    function_evals : :class:`numpy:numpy.ndarray` of shape (n_calls, n_suggestions)
        Value of objective for each evaluation.
    timing_evals : (:class:`numpy:numpy.ndarray`, :class:`numpy:numpy.ndarray`, :class:`numpy:numpy.ndarray`)
        Tuple of 3 timing results: ``(suggest_time, eval_time, observe_time)`` with shapes ``(n_calls,)``,
        ``(n_calls, n_suggestions)``, and ``(n_calls,)``. These are the time to make each suggestion, the time for each
        evaluation of the objective function, and the time to make an observe call.
    """
    # Setup test function
    function_instance = SklearnModel(model_name,
                                     dataset,
                                     scorer,
                                     data_root=data_root)

    # Setup optimizer
    api_config = function_instance.get_api_config()
    optimizer_instance = opt_class(api_config, **opt_kwargs)

    # Now actually do the experiment
    results = run_study(optimizer_instance, function_instance, n_calls,
                        n_suggestions)
    return results
def test_run_study(model_name, dataset, scorer, n_calls, n_suggestions, seed):
    prob_type = data.get_problem_type(dataset)
    assume(scorer in data.METRICS_LOOKUP[prob_type])

    function_instance = SklearnModel(model_name, dataset, scorer)
    optimizer = RandomOptimizer(function_instance.get_api_config(),
                                random=np.random.RandomState(seed))
    optimizer.get_version()
    exp.run_study(optimizer,
                  function_instance,
                  n_calls,
                  n_suggestions,
                  n_obj=len(function_instance.objective_names))
def test_run_study_bounds_fail(model_name, dataset, scorer, n_calls,
                               n_suggestions, seed):
    prob_type = data.get_problem_type(dataset)
    assume(scorer in data.METRICS_LOOKUP[prob_type])

    function_instance = SklearnModel(model_name, dataset, scorer)
    optimizer = OutOfBoundsOptimizer(function_instance.get_api_config(),
                                     random=np.random.RandomState(seed))
    optimizer.get_version()

    # pytest have some assert failed tools we could use instead, but this is ok for now
    bounds_fails = False
    try:
        exp.run_study(optimizer,
                      function_instance,
                      n_calls,
                      n_suggestions,
                      n_obj=len(function_instance.objective_names))
    except Exception as e:
        bounds_fails = str(e) == "Optimizer suggestion is out of range."
    assert bounds_fails
def test_run_study_callback(model_name, dataset, scorer, n_calls,
                            n_suggestions, seed):
    prob_type = data.get_problem_type(dataset)
    assume(scorer in data.METRICS_LOOKUP[prob_type])

    function_instance = SklearnModel(model_name, dataset, scorer)
    optimizer = RandomOptimizer(function_instance.get_api_config(),
                                random=np.random.RandomState(seed))
    optimizer.get_version()
    n_obj = len(function_instance.objective_names)

    function_evals_cmin = np.zeros((n_calls, n_obj), dtype=float)
    iters_list = []

    def callback(f_min, iters):
        assert f_min.shape == (n_obj, )

        iters_list.append(iters)
        if iters == 0:
            assert np.all(f_min == np.inf)
            return

        function_evals_cmin[iters - 1, :] = f_min

    function_evals, _, _ = exp.run_study(optimizer,
                                         function_instance,
                                         n_calls,
                                         n_suggestions,
                                         n_obj=n_obj,
                                         callback=callback)

    assert iters_list == list(range(n_calls + 1))

    for ii in range(n_obj):
        for jj in range(n_calls):
            idx0, idx1 = np_util.argmin_2d(function_evals[:jj + 1, :, 0])
            assert function_evals_cmin[jj, ii] == function_evals[idx0, idx1,
                                                                 ii]