def _maybe_save_experiment_and_generation_strategy( self, experiment: Experiment, generation_strategy: GenerationStrategy ) -> Tuple[bool, bool]: """If DB settings are set on this `WithDBSettingsBase` instance, checks whether given experiment and generation strategy are already saved and saves them, if not. Returns: Tuple of two booleans: whether experiment was saved in the course of this function's execution and whether generation strategy was saved. """ saved_exp, saved_gs = False, False if self.db_settings_set: if experiment._name is None: raise ValueError( "Experiment must specify a name to use storage functionality." ) exp_name = not_none(experiment.name) exp_id, gs_id = self._get_experiment_and_generation_strategy_db_id( experiment_name=exp_name ) if exp_id: # Experiment in DB. # TODO: Switch to just updating experiment when selective-field # update is available. logger.info(f"Experiment {exp_name} is in DB, updating it.") self._save_experiment_to_db_if_possible(experiment=experiment) saved_exp = True else: # Experiment not yet in DB. logger.info(f"Experiment {exp_name} is not yet in DB, storing it.") self._save_experiment_to_db_if_possible(experiment=experiment) saved_exp = True if gs_id and generation_strategy._db_id != gs_id: raise UnsupportedError( "Experiment was associated with generation strategy in DB, " f"but a new generation strategy {generation_strategy.name} " "was provided. To use the generation strategy currently in DB," " instantiate scheduler via: `Scheduler.with_stored_experiment`." ) if not gs_id or generation_strategy._db_id is None: # There is no GS associated with experiment or the generation # strategy passed in is different from the one associated with # experiment currently. logger.info( f"Generation strategy {generation_strategy.name} is not yet in DB, " "storing it." ) # If generation strategy does not yet have an experiment attached, # attach the current experiment to it, as otherwise it will not be # possible to retrieve by experiment name. if generation_strategy._experiment is None: generation_strategy.experiment = experiment self._save_generation_strategy_to_db_if_possible( generation_strategy=generation_strategy ) saved_gs = True return saved_exp, saved_gs
def choose_generation_strategy( search_space: SearchSpace, use_batch_trials: bool = False, enforce_sequential_optimization: bool = True, random_seed: Optional[int] = None, winsorize_botorch_model: bool = False, winsorization_limits: Optional[Tuple[Optional[float], Optional[float]]] = None, no_bayesian_optimization: bool = False, num_trials: Optional[int] = None, num_initialization_trials: Optional[int] = None, max_parallelism_cap: Optional[int] = None, max_parallelism_override: Optional[int] = None, optimization_config: Optional[OptimizationConfig] = None, should_deduplicate: bool = False, use_saasbo: bool = False, verbose: Optional[bool] = None, experiment: Optional[Experiment] = None, ) -> GenerationStrategy: """Select an appropriate generation strategy based on the properties of the search space and expected settings of the experiment, such as number of arms per trial, optimization algorithm settings, expected number of trials in the experiment, etc. Args: search_space: SearchSpace, based on the properties of which to select the generation strategy. use_batch_trials: Whether this generation strategy will be used to generate batched trials instead of 1-arm trials. enforce_sequential_optimization: Whether to enforce that 1) the generation strategy needs to be updated with `min_trials_observed` observations for a given generation step before proceeding to the next one and 2) maximum number of trials running at once (max_parallelism) if enforced for the BayesOpt step. NOTE: `max_parallelism_override` and `max_parallelism_cap` settings will still take their effect on max parallelism even if `enforce_sequential_optimization=False`, so if those settings are specified, max parallelism will be enforced. random_seed: Fixed random seed for the Sobol generator. winsorize_botorch_model: Whether to apply the winsorization transform prior to applying other transforms for fitting the BoTorch model. winsorization_limits: Bounds for winsorization, if winsorizing, expressed as percentile. Usually only the upper winsorization trim is used when minimizing, and only the lower when maximizing. no_bayesian_optimization: If True, Bayesian optimization generation strategy will not be suggested and quasi-random strategy will be used. num_trials: Total number of trials in the optimization, if known in advance. num_initialization_trials: Specific number of initialization trials, if wanted. Typically, initialization trials are generated quasi-randomly. max_parallelism_override: Integer, with which to override the default max parallelism setting for all steps in the generation strategy returned from this function. Each generation step has a `max_parallelism` value, which restricts how many trials can run simultaneously during a given generation step. By default, the parallelism setting is chosen as appropriate for the model in a given generation step. If `max_parallelism_override` is -1, no max parallelism will be enforced for any step of the generation strategy. Be aware that parallelism is limited to improve performance of Bayesian optimization, so only disable its limiting if necessary. max_parallelism_cap: Integer cap on parallelism in this generation strategy. If specified, `max_parallelism` setting in each generation step will be set to the minimum of the default setting for that step and the value of this cap. `max_parallelism_cap` is meant to just be a hard limit on parallelism (e.g. to avoid overloading machine(s) that evaluate the experiment trials). Specify only if not specifying `max_parallelism_override`. use_saasbo: Whether to use SAAS prior for any GPEI generation steps. verbose: Whether GP model should produce verbose logs. If not ``None``, its value gets added to ``model_kwargs`` during ``generation_strategy`` construction. Defaults to ``True`` for SAASBO, else ``None``. Verbose outputs are currently only available for SAASBO, so if ``verbose is not None`` for a different model type, it will be overridden to ``None`` with a warning. experiment: If specified, `_experiment` attribute of the generation strategy will be set to this experiment (useful for associating a generation strategy with a given experiment before it's first used to ``gen`` with that experiment). """ suggested_model = _suggest_gp_model( search_space=search_space, num_trials=num_trials, optimization_config=optimization_config, use_saasbo=use_saasbo, ) if not no_bayesian_optimization and suggested_model is not None: if not enforce_sequential_optimization and ( # pragma: no cover max_parallelism_override or max_parallelism_cap): logger.info( "If `enforce_sequential_optimization` is False, max parallelism is " "not enforced and other max parallelism settings will be ignored." ) if max_parallelism_override and max_parallelism_cap: raise ValueError( "If `max_parallelism_override` specified, cannot also apply " "`max_parallelism_cap`.") # If number of initialization trials is not specified, estimate it. if num_initialization_trials is None: if use_batch_trials: # Batched trials. num_initialization_trials = 1 elif num_trials is not None: # 1-arm trials with specified `num_trials`. num_initialization_trials = max( 5, min( not_none(num_trials) // 5, 2 * len(search_space.tunable_parameters), ), ) else: # 1-arm trials. num_initialization_trials = max( 5, 2 * len(search_space.tunable_parameters)) # Determine max parallelism for the generation steps. if max_parallelism_override == -1: # `max_parallelism_override` of -1 means no max parallelism enforcement in # the generation strategy, which means `max_parallelism=None` in gen. steps. sobol_parallelism = bo_parallelism = None elif max_parallelism_override is not None: sobol_parallelism = bo_parallelism = max_parallelism_override elif max_parallelism_cap is not None: # Max parallelism override is None by now sobol_parallelism = max_parallelism_cap bo_parallelism = min(max_parallelism_cap, DEFAULT_BAYESIAN_PARALLELISM) elif not enforce_sequential_optimization: # If no max parallelism settings specified and not enforcing sequential # optimization, do not limit parallelism. sobol_parallelism = bo_parallelism = None else: # No additional max parallelism settings, use defaults sobol_parallelism = None # No restriction on Sobol phase bo_parallelism = DEFAULT_BAYESIAN_PARALLELISM # `verbose` default behavior and overrides model_is_saasbo = not_none(suggested_model).name in [ "FULLYBAYESIANMOO", "FULLYBAYESIAN", ] if verbose is None and model_is_saasbo: verbose = True elif verbose is not None and not model_is_saasbo: logger.warning( f"Overriding `verbose = {verbose}` to `None` for non-SAASBO GP step." ) verbose = None # create `generation_strategy` gs = GenerationStrategy(steps=[ _make_sobol_step( num_trials=num_initialization_trials, enforce_num_trials=enforce_sequential_optimization, seed=random_seed, max_parallelism=sobol_parallelism, should_deduplicate=should_deduplicate, ), _make_botorch_step( model=suggested_model, winsorize=winsorize_botorch_model, winsorization_limits=winsorization_limits, max_parallelism=bo_parallelism, should_deduplicate=should_deduplicate, verbose=verbose, ), ]) logger.info( f"Using Bayesian Optimization generation strategy: {gs}. Iterations after" f" {num_initialization_trials} will take longer to generate due to " " model-fitting.") else: if verbose is not None: logger.warning( f"Ignoring `verbose = {verbose}` for `generation_strategy` " "without a GP step.") gs = GenerationStrategy(steps=[ _make_sobol_step(seed=random_seed, should_deduplicate=should_deduplicate) ]) logger.info("Using Sobol generation strategy.") if experiment: gs.experiment = experiment return gs