def get_objective_signature(model_name, dataset, scorer, data_root=None): """Get signature of an objective function specified by an sklearn model and dataset. This routine specializes :func:`.signatures.get_func_signature` for the `sklearn` study case. Parameters ---------- model_name : str Which sklearn model we are attempting to tune, must be an element of `constants.MODEL_NAMES`. dataset : str Which data set the model is being tuned to, which must be either a) an element of `constants.DATA_LOADER_NAMES`, or b) the name of a csv file in the `data_root` folder for a custom data set. scorer : str Which metric to use when evaluating the model. This must be an element of `sklearn_funcs.SCORERS_CLF` for classification models, or `sklearn_funcs.SCORERS_REG` for regression models. data_root : str Absolute path to folder containing custom data sets. This may be ``None`` if no custom data sets are used.`` Returns ------- signature : list(str) The signature of this test function. """ function_instance = SklearnModel(model_name, dataset, scorer, data_root=data_root) api_config = function_instance.get_api_config() signature = get_func_signature(function_instance.evaluate, api_config) return signature
def run_sklearn_study(opt_class, opt_kwargs, model_name, dataset, scorer, n_calls, n_suggestions, data_root=None): """Run a study for a single optimizer on a single `sklearn` model/data set combination. This routine is meant for benchmarking when tuning `sklearn` models, as opposed to the more general :func:`.run_study`. Parameters ---------- opt_class : :class:`.abstract_optimizer.AbstractOptimizer` Type of wrapper optimizer must be subclass of :class:`.abstract_optimizer.AbstractOptimizer`. opt_kwargs : kwargs `kwargs` to use when instantiating the wrapper class. model_name : str Which sklearn model we are attempting to tune, must be an element of `constants.MODEL_NAMES`. dataset : str Which data set the model is being tuned to, which must be either a) an element of `constants.DATA_LOADER_NAMES`, or b) the name of a csv file in the `data_root` folder for a custom data set. scorer : str Which metric to use when evaluating the model. This must be an element of `sklearn_funcs.SCORERS_CLF` for classification models, or `sklearn_funcs.SCORERS_REG` for regression models. n_calls : int How many iterations of minimization to run. n_suggestions : int How many parallel evaluation we run each iteration. Must be ``>= 1``. data_root : str Absolute path to folder containing custom data sets. This may be ``None`` if no custom data sets are used.`` Returns ------- function_evals : :class:`numpy:numpy.ndarray` of shape (n_calls, n_suggestions) Value of objective for each evaluation. timing_evals : (:class:`numpy:numpy.ndarray`, :class:`numpy:numpy.ndarray`, :class:`numpy:numpy.ndarray`) Tuple of 3 timing results: ``(suggest_time, eval_time, observe_time)`` with shapes ``(n_calls,)``, ``(n_calls, n_suggestions)``, and ``(n_calls,)``. These are the time to make each suggestion, the time for each evaluation of the objective function, and the time to make an observe call. """ # Setup test function function_instance = SklearnModel(model_name, dataset, scorer, data_root=data_root) # Setup optimizer api_config = function_instance.get_api_config() optimizer_instance = opt_class(api_config, **opt_kwargs) # Now actually do the experiment results = run_study(optimizer_instance, function_instance, n_calls, n_suggestions) return results
def test_run_study(model_name, dataset, scorer, n_calls, n_suggestions, seed): prob_type = data.get_problem_type(dataset) assume(scorer in data.METRICS_LOOKUP[prob_type]) function_instance = SklearnModel(model_name, dataset, scorer) optimizer = RandomOptimizer(function_instance.get_api_config(), random=np.random.RandomState(seed)) optimizer.get_version() exp.run_study(optimizer, function_instance, n_calls, n_suggestions, n_obj=len(function_instance.objective_names))
def test_run_study_bounds_fail(model_name, dataset, scorer, n_calls, n_suggestions, seed): prob_type = data.get_problem_type(dataset) assume(scorer in data.METRICS_LOOKUP[prob_type]) function_instance = SklearnModel(model_name, dataset, scorer) optimizer = OutOfBoundsOptimizer(function_instance.get_api_config(), random=np.random.RandomState(seed)) optimizer.get_version() # pytest have some assert failed tools we could use instead, but this is ok for now bounds_fails = False try: exp.run_study(optimizer, function_instance, n_calls, n_suggestions, n_obj=len(function_instance.objective_names)) except Exception as e: bounds_fails = str(e) == "Optimizer suggestion is out of range." assert bounds_fails
def test_run_study_callback(model_name, dataset, scorer, n_calls, n_suggestions, seed): prob_type = data.get_problem_type(dataset) assume(scorer in data.METRICS_LOOKUP[prob_type]) function_instance = SklearnModel(model_name, dataset, scorer) optimizer = RandomOptimizer(function_instance.get_api_config(), random=np.random.RandomState(seed)) optimizer.get_version() n_obj = len(function_instance.objective_names) function_evals_cmin = np.zeros((n_calls, n_obj), dtype=float) iters_list = [] def callback(f_min, iters): assert f_min.shape == (n_obj, ) iters_list.append(iters) if iters == 0: assert np.all(f_min == np.inf) return function_evals_cmin[iters - 1, :] = f_min function_evals, _, _ = exp.run_study(optimizer, function_instance, n_calls, n_suggestions, n_obj=n_obj, callback=callback) assert iters_list == list(range(n_calls + 1)) for ii in range(n_obj): for jj in range(n_calls): idx0, idx1 = np_util.argmin_2d(function_evals[:jj + 1, :, 0]) assert function_evals_cmin[jj, ii] == function_evals[idx0, idx1, ii]