def space_fixture(): dimensions = [ Real(0.1, 0.9), Categorical(["foo", "bar", "baz"]), Integer(12, 18) ] locations = [ ("model_init_params", "a"), ("model_init_params", "b", "c"), ("model_extra_params", "e"), ] for i in range(len(dimensions)): setattr(dimensions[i], "location", locations[i]) return Space(dimensions)
def _set_hyperparameter_space(self): """Initialize :attr:`hyperparameter_space` according to the provided hyperparameter search dimensions, and :attr:`base_estimator` and :attr:`optimizer`""" self.hyperparameter_space = Space(dimensions=self.dimensions) self._prepare_estimator() self._build_optimizer()
class InformedOptimizationProtocol(BaseOptimizationProtocol, metaclass=ABCMeta): def __init__( self, target_metric=None, iterations=1, verbose=1, read_experiments=True, reporter_parameters=None, #################### Optimizer Class Parameters #################### base_estimator='GP', n_initial_points=10, acquisition_function='gp_hedge', acquisition_optimizer='auto', random_state=32, acquisition_function_kwargs=None, acquisition_optimizer_kwargs=None, #################### Minimizer Parameters #################### n_random_starts=10, callbacks=None, #################### Other Parameters #################### base_estimator_kwargs=None, ): """Base class for Informed Optimization Protocols Parameters ---------- target_metric: Tuple, default=('oof', <first key in :attr:`environment.Environment.metrics_map`>) A path denoting the metric to be used to compare completed Experiments within the Optimization Protocol. The first value should be one of ['oof', 'holdout', 'in_fold']. The second value should be the name of a metric being recorded according to the values supplied in :attr:`environment.Environment.metrics_params`. See the documentation for :func:`metrics.get_formatted_target_metric` for more info; any values returned by, or used as the `target_metric` input to this function are acceptable values for :attr:`BaseOptimizationProtocol.target_metric` iterations: Int, default=1 The number of distinct experiments to execute verbose: Int 0, 1, or 2, default=1 Verbosity mode for console logging. 0: Silent. 1: Show only logs from the Optimization Protocol. 2: In addition to logs shown when verbose=1, also show the logs from individual Experiments read_experiments: Boolean, default=True If True, all Experiment records that fit within the current :attr:`hyperparameter_space`, and are for the same :attr:`algorithm_name`, and match the current guidelines, will be read in and used to fit any optimizers reporter_parameters: Dict, or None, default=None Additional parameters passed to :meth:`reporting.OptimizationReporter.__init__` base_estimator: String in ['GP', 'GBRT', 'RF', 'ET', 'DUMMY'], or an `sklearn` regressor, default='GP' If one of the above strings, a default model of that type will be used. Else, should inherit from :class:`sklearn.base.RegressorMixin`, and its :meth:`predict` should have an optional `return_std` argument, which returns `std(Y | x)`, along with `E[Y | x]` n_initial_points: Int, default=10 The number of complete evaluation points necessary before allowing Experiments to be approximated with `base_estimator`. Any valid Experiment records found will count as initialization points. If enough Experiment records are not found, additional points will be randomly sampled acquisition_function: String in ['LCB', 'EI', 'PI', 'gp_hedge'], default='gp_hedge' Function to minimize over the posterior distribution. 'LCB': lower confidence bound. 'EI': negative expected improvement. 'PI': negative probability of improvement. 'gp_hedge': Probabilistically choose one of the preceding three acquisition functions at each iteration acquisition_optimizer: String in ['sampling', 'lbfgs', 'auto'], default='auto' Method to minimize the acquisition function. The fit model is updated with the optimal value obtained by optimizing `acquisition_function` with `acquisition_optimizer`. 'sampling': optimize by computing `acquisition_function` at `acquisition_optimizer_kwargs['n_points']` randomly sampled points. 'lbfgs': optimize by sampling `n_restarts_optimizer` random points, then run 'lbfgs' for 20 iterations with those points to find local minima, the optimal of which is used to update the prior. 'auto': configure on the basis of `base_estimator` and `dimensions` random_state: Int, `RandomState` instance, or None, default=None Set to something other than None for reproducible results acquisition_function_kwargs: Dict, or None, default=dict(xi=0.01, kappa=1.96) Additional arguments passed to the acquisition function acquisition_optimizer_kwargs: Dict, or None, default=dict(n_points=10000, n_restarts_optimizer=5, n_jobs=1) Additional arguments passed to the acquisition optimizer n_random_starts: Int, default=10 The number of Experiments to execute with random points before checking that `n_initial_points` have been evaluated callbacks: Callable, list of callables, or None, default=[] If callable, then `callbacks(self.optimizer_result)` is called after each update to :attr:`optimizer`. If list, then each callable is called base_estimator_kwargs: Dict, or None, default={} Additional arguments passed to `base_estimator` when it is initialized Notes ----- To provide initial input points for evaluation, individual Experiments can be executed prior to instantiating an Optimization Protocol. The results of these Experiments will automatically be detected and cherished by the optimizer. :class:`.InformedOptimizationProtocol` and its children in :mod:`.optimization` rely heavily on the utilities provided by the `Scikit-Optimize` library, so thank you to the creators and contributors for their excellent work.""" # TODO: Add 'EIps', and 'PIps' to the allowable `acquisition_function` values - Will need to return execution times #################### Optimizer Parameters #################### self.base_estimator = base_estimator self.n_initial_points = n_initial_points self.acquisition_function = acquisition_function self.acquisition_optimizer = acquisition_optimizer self.random_state = random_state self.acquisition_function_kwargs = dict(xi=0.01, kappa=1.96) self.acquisition_optimizer_kwargs = dict(n_points=10000, n_restarts_optimizer=5, n_jobs=1) self.acquisition_function_kwargs.update(acquisition_function_kwargs or {}) self.acquisition_optimizer_kwargs.update(acquisition_optimizer_kwargs or {}) #################### Minimizer Parameters #################### # TODO: n_random_starts does nothing currently - Fix that self.n_random_starts = n_random_starts self.callbacks = callbacks or [] #################### Other Parameters #################### self.base_estimator_kwargs = base_estimator_kwargs or {} #################### Placeholder Attributes #################### self.optimizer = None self.optimizer_result = None self.current_hyperparameters_list = None super().__init__(target_metric=target_metric, iterations=iterations, verbose=verbose, read_experiments=read_experiments, reporter_parameters=reporter_parameters) def _set_hyperparameter_space(self): """Initialize :attr:`hyperparameter_space` according to the provided hyperparameter search dimensions, and :attr:`base_estimator` and :attr:`optimizer`""" self.hyperparameter_space = Space(dimensions=self.dimensions) self._prepare_estimator() self._build_optimizer() def _prepare_estimator(self): """Initialize :attr:`base_estimator` with :attr:`hyperparameter_space` and any other kwargs, using `skopt.utils.cook_estimator`""" self.base_estimator = cook_estimator(self.base_estimator, space=self.hyperparameter_space, **self.base_estimator_kwargs) def _build_optimizer(self): """Set :attr:`optimizer` to the optimizing class used to both estimate the utility of sets of hyperparameters by learning from executed Experiments, and suggest points at which the objective should be evaluated""" self.optimizer = AskingOptimizer( dimensions=self.hyperparameter_space, base_estimator=self.base_estimator, n_initial_points=self.n_initial_points, acq_func=self.acquisition_function, acq_optimizer=self.acquisition_optimizer, random_state=self.random_state, acq_func_kwargs=self.acquisition_function_kwargs, acq_optimizer_kwargs=self.acquisition_optimizer_kwargs, ) def _execute_experiment(self): """After executing parent's :meth:`_execute_experiment`, fit :attr:`optimizer` with the set of hyperparameters that were used, and the utility of those hyperparameters""" super()._execute_experiment() # FLAG: Resolve switching between below options depending on `target_metric` # self.optimizer_result = self.optimizer.tell(self.current_hyperparameters_list, self.current_score, fit=True) self.optimizer_result = self.optimizer.tell( self.current_hyperparameters_list, -self.current_score, fit=True) # FLAG: Resolve switching between above options depending on `target_metric` if eval_callbacks(self.callbacks, self.optimizer_result): return def _get_current_hyperparameters(self): """Ask :attr:`optimizer` for the upcoming set of hyperparameters that should be searched, then format them to be used in the next Experiment Returns ------- current_hyperparameters: Dict The next set of hyperparameters that will be searched""" _current_hyperparameters = self.optimizer.ask() if _current_hyperparameters == self.current_hyperparameters_list: new_parameters = self.hyperparameter_space.rvs( random_state=None)[0] G.debug_('REPEATED asked={} new={}'.format( _current_hyperparameters, new_parameters)) _current_hyperparameters = new_parameters self.current_hyperparameters_list = _current_hyperparameters current_hyperparameters = dict( zip(self.hyperparameter_space.get_names(use_location=False), self.current_hyperparameters_list)) return current_hyperparameters def _find_similar_experiments(self): """After locating similar experiments by way of the parent's :meth:`_find_similar_experiments`, fit :attr:`optimizer` with the hyperparameters and results of each located experiment""" super()._find_similar_experiments() for _i, _experiment in enumerate(self.similar_experiments[::-1]): _hyperparameters = dimension_subset( _experiment[0], self.hyperparameter_space.get_names()) _evaluation = _experiment[1] _experiment_id = _experiment[2] if len(_experiment) > 2 else None self.logger.print_result(_hyperparameters, _evaluation, experiment_id=_experiment_id) # FLAG: Resolve switching between below options depending on `target_metric` # self.optimizer_result = self.optimizer.tell(_hyperparameters, _evaluation) self.optimizer_result = self.optimizer.tell( _hyperparameters, -_evaluation) # FLAG: Resolve switching between above options depending on `target_metric` # self.optimizer_result = self.optimizer.tell( # _hyperparameters, _evaluation, fit=(_i == len(self.similar_experiments) - 1)) if eval_callbacks(self.callbacks, self.optimizer_result): return self.optimizer_result # FLAG: Could wrap above `tell` call in try/except, then attempt `_tell` with improper dimensions def _validate_parameters(self): """Ensure provided input parameters are properly formatted""" super()._validate_parameters() #################### callbacks #################### self.callbacks = check_callback(self.callbacks) @property def search_space_size(self): """The number of different hyperparameter permutations possible given the current hyperparameter search dimensions. Returns ------- :attr:`_search_space_size`: Int, or `numpy.inf` Infinity will be returned if any of the following constraints are met: 1) the hyperparameter dimensions include any real-valued boundaries, 2) the boundaries include values that are neither categorical nor integer, or 3) the search space size is otherwise incalculable""" if self._search_space_size is None: self._search_space_size = len(self.hyperparameter_space) return self._search_space_size
################################################## import pytest from sys import maxsize def test_dimension_name_value_error(): with pytest.raises( ValueError, match="Dimension's name must be one of: string, tuple, or .*"): Real(0.3, 0.9, name=14) @pytest.mark.parametrize(["value", "is_in"], [(1, True), (5, True), (10, True), (0, False), (11, False), ("x", False)]) def test_integer_contains(value, is_in): assert (value in Integer(1, 10)) is is_in @pytest.mark.parametrize( ["space", "size"], [ (Space([Categorical(["a", "b"]), Real(0.1, 0.7)]), maxsize), (Space([Categorical(["a", "b"]), Integer(1, 5)]), 10), ], ) def test_space_len(space, size): assert len(space) == size
def __init__(self, dimensions, base_estimator='gp', n_random_starts=None, n_initial_points=10, acq_func='gp_hedge', acq_optimizer='auto', random_state=None, acq_func_kwargs=None, acq_optimizer_kwargs=None): """This is nearly identical to :meth:`skopt.optimizer.optimizer.Optimizer.__init__`. It is recreated here to use the modified :class:`hyperparameter_hunter.space.Space`, rather than the original `skopt` version. This is not an ideal solution, and other options are being considered Parameters ---------- dimensions: See :class:`skopt.optimizer.optimizer.Optimizer.__init__` base_estimator: See :class:`skopt.optimizer.optimizer.Optimizer.__init__` n_random_starts: See :class:`skopt.optimizer.optimizer.Optimizer.__init__` n_initial_points: See :class:`skopt.optimizer.optimizer.Optimizer.__init__` acq_func: See :class:`skopt.optimizer.optimizer.Optimizer.__init__` acq_optimizer: See :class:`skopt.optimizer.optimizer.Optimizer.__init__` random_state: See :class:`skopt.optimizer.optimizer.Optimizer.__init__` acq_func_kwargs: See :class:`skopt.optimizer.optimizer.Optimizer.__init__` acq_optimizer_kwargs: See :class:`skopt.optimizer.optimizer.Optimizer.__init__`""" # TODO: Figure out way to override skopt Optimizer's use of skopt Space without having to rewrite __init__ self.__repeated_ask_kwargs = {} self.rng = check_random_state(random_state) # Configure acquisition function - Store and create acquisition function set self.acq_func = acq_func self.acq_func_kwargs = acq_func_kwargs allowed_acq_funcs = ['gp_hedge', 'EI', 'LCB', 'PI', 'EIps', 'PIps'] if self.acq_func not in allowed_acq_funcs: raise ValueError( F'Expected `acq_func` to be in {allowed_acq_funcs}, got {self.acq_func}' ) # Treat hedging method separately if self.acq_func == 'gp_hedge': self.cand_acq_funcs_ = ['EI', 'LCB', 'PI'] self.gains_ = np.zeros(3) else: self.cand_acq_funcs_ = [self.acq_func] if acq_func_kwargs is None: acq_func_kwargs = dict() self.eta = acq_func_kwargs.get('eta', 1.0) # Configure counters of points - Check `n_random_starts` deprecation first if n_random_starts is not None: warnings.warn(( '`n_random_starts` will be removed in favour of `n_initial_points`' ), DeprecationWarning) n_initial_points = n_random_starts if n_initial_points < 0: raise ValueError( F'Expected `n_initial_points` >= 0, got {n_initial_points}') self._n_initial_points = n_initial_points self.n_initial_points_ = n_initial_points # Configure estimator - Build `base_estimator` if doesn't exist if isinstance(base_estimator, str): base_estimator = cook_estimator(base_estimator, space=dimensions, random_state=self.rng.randint( 0, np.iinfo(np.int32).max)) # Check if regressor if not is_regressor(base_estimator) and base_estimator is not None: raise ValueError( F'`base_estimator`={base_estimator} must be a regressor') # Treat per second acquisition function specially is_multi_regressor = isinstance(base_estimator, MultiOutputRegressor) if 'ps' in self.acq_func and not is_multi_regressor: self.base_estimator_ = MultiOutputRegressor(base_estimator) else: self.base_estimator_ = base_estimator # Configure optimizer - Decide optimizer based on gradient information if acq_optimizer == 'auto': if has_gradients(self.base_estimator_): acq_optimizer = 'lbfgs' else: acq_optimizer = 'sampling' if acq_optimizer not in ['lbfgs', 'sampling']: raise ValueError( 'Expected `acq_optimizer` to be "lbfgs" or "sampling", got {}'. format(acq_optimizer)) if (not has_gradients(self.base_estimator_) and acq_optimizer != 'sampling'): raise ValueError( 'The regressor {} should run with `acq_optimizer`="sampling"'. format(type(base_estimator))) self.acq_optimizer = acq_optimizer # Record other arguments if acq_optimizer_kwargs is None: acq_optimizer_kwargs = dict() self.n_points = acq_optimizer_kwargs.get('n_points', 10000) self.n_restarts_optimizer = acq_optimizer_kwargs.get( 'n_restarts_optimizer', 5) n_jobs = acq_optimizer_kwargs.get('n_jobs', 1) self.n_jobs = n_jobs self.acq_optimizer_kwargs = acq_optimizer_kwargs # Configure search space - Normalize space if GP regressor if isinstance(self.base_estimator_, GaussianProcessRegressor): dimensions = normalize_dimensions(dimensions) self.space = Space(dimensions) # Record categorical and non-categorical indices self._cat_inds = [] self._non_cat_inds = [] for ind, dim in enumerate(self.space.dimensions): if isinstance(dim, Categorical): self._cat_inds.append(ind) else: self._non_cat_inds.append(ind) # Initialize storage for optimization self.models = [] self.Xi = [] self.yi = [] # Initialize cache for `ask` method responses # This ensures that multiple calls to `ask` with n_points set return same sets of points. Reset to {} at call to `tell` self.cache_ = {}
class AskingOptimizer(Optimizer): # FLAG: ORIGINAL BELOW # def __init__( # self, dimensions, base_estimator="gp", n_random_starts=None, n_initial_points=10, acq_func="gp_hedge", # acq_optimizer="auto", random_state=None, acq_func_kwargs=None, acq_optimizer_kwargs=None, # repeated_ask_kwargs=None # ): # self.__repeated_ask_kwargs = repeated_ask_kwargs or {} # # super().__init__( # dimensions, base_estimator=base_estimator, n_random_starts=n_random_starts, n_initial_points=n_initial_points, # acq_func=acq_func, acq_optimizer=acq_optimizer, random_state=random_state, acq_func_kwargs=acq_func_kwargs, # acq_optimizer_kwargs=acq_optimizer_kwargs, # ) # FLAG: ORIGINAL ABOVE # FLAG: TEST BELOW # noinspection PyMissingConstructor def __init__(self, dimensions, base_estimator='gp', n_random_starts=None, n_initial_points=10, acq_func='gp_hedge', acq_optimizer='auto', random_state=None, acq_func_kwargs=None, acq_optimizer_kwargs=None): """This is nearly identical to :meth:`skopt.optimizer.optimizer.Optimizer.__init__`. It is recreated here to use the modified :class:`hyperparameter_hunter.space.Space`, rather than the original `skopt` version. This is not an ideal solution, and other options are being considered Parameters ---------- dimensions: See :class:`skopt.optimizer.optimizer.Optimizer.__init__` base_estimator: See :class:`skopt.optimizer.optimizer.Optimizer.__init__` n_random_starts: See :class:`skopt.optimizer.optimizer.Optimizer.__init__` n_initial_points: See :class:`skopt.optimizer.optimizer.Optimizer.__init__` acq_func: See :class:`skopt.optimizer.optimizer.Optimizer.__init__` acq_optimizer: See :class:`skopt.optimizer.optimizer.Optimizer.__init__` random_state: See :class:`skopt.optimizer.optimizer.Optimizer.__init__` acq_func_kwargs: See :class:`skopt.optimizer.optimizer.Optimizer.__init__` acq_optimizer_kwargs: See :class:`skopt.optimizer.optimizer.Optimizer.__init__`""" # TODO: Figure out way to override skopt Optimizer's use of skopt Space without having to rewrite __init__ self.__repeated_ask_kwargs = {} self.rng = check_random_state(random_state) # Configure acquisition function - Store and create acquisition function set self.acq_func = acq_func self.acq_func_kwargs = acq_func_kwargs allowed_acq_funcs = ['gp_hedge', 'EI', 'LCB', 'PI', 'EIps', 'PIps'] if self.acq_func not in allowed_acq_funcs: raise ValueError( F'Expected `acq_func` to be in {allowed_acq_funcs}, got {self.acq_func}' ) # Treat hedging method separately if self.acq_func == 'gp_hedge': self.cand_acq_funcs_ = ['EI', 'LCB', 'PI'] self.gains_ = np.zeros(3) else: self.cand_acq_funcs_ = [self.acq_func] if acq_func_kwargs is None: acq_func_kwargs = dict() self.eta = acq_func_kwargs.get('eta', 1.0) # Configure counters of points - Check `n_random_starts` deprecation first if n_random_starts is not None: warnings.warn(( '`n_random_starts` will be removed in favour of `n_initial_points`' ), DeprecationWarning) n_initial_points = n_random_starts if n_initial_points < 0: raise ValueError( F'Expected `n_initial_points` >= 0, got {n_initial_points}') self._n_initial_points = n_initial_points self.n_initial_points_ = n_initial_points # Configure estimator - Build `base_estimator` if doesn't exist if isinstance(base_estimator, str): base_estimator = cook_estimator(base_estimator, space=dimensions, random_state=self.rng.randint( 0, np.iinfo(np.int32).max)) # Check if regressor if not is_regressor(base_estimator) and base_estimator is not None: raise ValueError( F'`base_estimator`={base_estimator} must be a regressor') # Treat per second acquisition function specially is_multi_regressor = isinstance(base_estimator, MultiOutputRegressor) if 'ps' in self.acq_func and not is_multi_regressor: self.base_estimator_ = MultiOutputRegressor(base_estimator) else: self.base_estimator_ = base_estimator # Configure optimizer - Decide optimizer based on gradient information if acq_optimizer == 'auto': if has_gradients(self.base_estimator_): acq_optimizer = 'lbfgs' else: acq_optimizer = 'sampling' if acq_optimizer not in ['lbfgs', 'sampling']: raise ValueError( 'Expected `acq_optimizer` to be "lbfgs" or "sampling", got {}'. format(acq_optimizer)) if (not has_gradients(self.base_estimator_) and acq_optimizer != 'sampling'): raise ValueError( 'The regressor {} should run with `acq_optimizer`="sampling"'. format(type(base_estimator))) self.acq_optimizer = acq_optimizer # Record other arguments if acq_optimizer_kwargs is None: acq_optimizer_kwargs = dict() self.n_points = acq_optimizer_kwargs.get('n_points', 10000) self.n_restarts_optimizer = acq_optimizer_kwargs.get( 'n_restarts_optimizer', 5) n_jobs = acq_optimizer_kwargs.get('n_jobs', 1) self.n_jobs = n_jobs self.acq_optimizer_kwargs = acq_optimizer_kwargs # Configure search space - Normalize space if GP regressor if isinstance(self.base_estimator_, GaussianProcessRegressor): dimensions = normalize_dimensions(dimensions) self.space = Space(dimensions) # Record categorical and non-categorical indices self._cat_inds = [] self._non_cat_inds = [] for ind, dim in enumerate(self.space.dimensions): if isinstance(dim, Categorical): self._cat_inds.append(ind) else: self._non_cat_inds.append(ind) # Initialize storage for optimization self.models = [] self.Xi = [] self.yi = [] # Initialize cache for `ask` method responses # This ensures that multiple calls to `ask` with n_points set return same sets of points. Reset to {} at call to `tell` self.cache_ = {} # FLAG: TEST ABOVE def _ask(self): # TODO: Add documentation ask_result = super()._ask() do_retell = self.__repeated_ask_kwargs.get('do_retell', True) return_val = self.__repeated_ask_kwargs.get('return_val', 'ask') persistent_check = self.__repeated_ask_kwargs.get( 'persistent_check', True) if persistent_check is True: counter = 100 while (ask_result in self.Xi) and (counter > 0): ask_result = self.__ask_helper(ask_result, do_retell, return_val) # print(F'{counter} {ask_result}') counter -= 1 return ask_result def __ask_helper(self, ask_result, do_retell=True, return_val='ask'): """ Parameters ---------- ask_result: Iterable of hyperparameters The result of :meth:`skopt.optimizer.optimizer.Optimizer._ask` do_retell: Boolean, default=True If True and `ask_result` has already been tested, the optimizer will be re-`tell`ed the hyperparameters and their original score return_val: String in ['ask', 'random'], default='ask' If 'ask', :meth:`skopt.optimizer.optimizer.Optimizer._ask` will be repeatedly called for a new result. If 'random', :meth:`space.Space.rvs` will be used to retrieve the next set of hyperparameters Returns ------- ask_result""" # TODO: Fill in documentation description if self._n_initial_points > 0 or self.base_estimator_ is None: ask_result = self.space.rvs(random_state=self.rng)[0] else: min_delta_x = min( [self.space.distance(ask_result, _) for _ in self.Xi]) if abs(min_delta_x) <= 1e-8: # G.debug_(F'Received repeated point: {ask_result}') if do_retell is True: self.tell(ask_result, self.yi[self.Xi.index(ask_result)]) # G.debug_(F'Optimizer was re-`tell`ed point: {ask_result} -> {self.yi[self.Xi.index(ask_result)]}') if return_val == 'ask': ask_result = super()._ask() # G.debug_(F'Re-`ask`ed, and received point: {ask_result}') elif return_val == 'random': ask_result = self.space.rvs(random_state=self.rng)[0] # G.debug_(F'Set repeated point to random: {ask_result}') else: raise ValueError( F'Received invalid value for `return_val`: {return_val}' ) return ask_result