def simple_experiment_from_json( object_json: Dict[str, Any]) -> SimpleExperiment: """Load AE SimpleExperiment from JSON.""" time_created_json = object_json.pop("time_created") trials_json = object_json.pop("trials") experiment_type_json = object_json.pop("experiment_type") data_by_trial_json = object_json.pop("data_by_trial") description_json = object_json.pop("description") is_test_json = object_json.pop("is_test") optimization_config = object_from_json( object_json.pop("optimization_config")) # not relevant to simple experiment del object_json["tracking_metrics"] del object_json["runner"] kwargs = {k: object_from_json(v) for k, v in object_json.items()} kwargs["evaluation_function"] = unimplemented_evaluation_function kwargs["objective_name"] = optimization_config.objective.metric.name kwargs["minimize"] = optimization_config.objective.minimize kwargs["outcome_constraints"] = optimization_config.outcome_constraints experiment = SimpleExperiment(**kwargs) experiment.description = object_from_json(description_json) experiment.is_test = object_from_json(is_test_json) experiment._time_created = object_from_json(time_created_json) experiment._trials = trials_from_json(experiment, trials_json) for trial in experiment._trials.values(): for arm in trial.arms: experiment._register_arm(arm) if experiment.status_quo is not None: sq = not_none(experiment.status_quo) experiment._register_arm(sq) experiment._experiment_type = object_from_json(experiment_type_json) experiment._data_by_trial = data_from_json(data_by_trial_json) return experiment
def get_optimization_trace( self, objective_optimum: Optional[float] = None ) -> AxPlotConfig: """Retrieves the plot configuration for optimization trace, which shows the evolution of the objective mean over iterations. Args: objective_optimum: Optimal objective, if known, for display in the visualization. """ if not self.experiment.trials: raise ValueError("Cannot generate plot as there are no trials.") objective_name = self.experiment.optimization_config.objective.metric.name best_objectives = np.array( [ [ checked_cast(Trial, trial).objective_mean for trial in self.experiment.trials.values() ] ] ) hover_labels = [ _format_dict(not_none(checked_cast(Trial, trial).arm).parameters) for trial in self.experiment.trials.values() ] return optimization_trace_single_method( y=( np.minimum.accumulate(best_objectives, axis=1) if self.experiment.optimization_config.objective.minimize else np.maximum.accumulate(best_objectives, axis=1) ), optimum=objective_optimum, title="Model performance vs. # of iterations", ylabel=objective_name.capitalize(), hover_labels=hover_labels, )
def _gen_new_generator_run(self, n: int = 1) -> GeneratorRun: """Generate new generator run for this experiment. Args: n: Number of arms to generate. """ new_data = self._get_new_data() # If random seed is not set for this optimization, context manager does # nothing; otherwise, it sets the random seed for torch, but only for the # scope of this call. This is important because torch seed is set globally, # so if we just set the seed without the context manager, it can have # serious negative impact on the performance of the models that employ # stochasticity. with manual_seed(seed=self._random_seed) and warnings.catch_warnings(): # Filter out GPYTorch warnings to avoid confusing users. warnings.simplefilter("ignore") return not_none(self.generation_strategy).gen( experiment=self.experiment, new_data=new_data, n=n, pending_observations=get_pending_observation_features( experiment=self.experiment ), )
def __init__( self, surrogate: Surrogate, bounds: List[Tuple[float, float]], objective_weights: Tensor, objective_thresholds: Optional[Tensor], botorch_acqf_class: Optional[Type[AcquisitionFunction]] = None, options: Optional[Dict[str, Any]] = None, pending_observations: Optional[List[Tensor]] = None, outcome_constraints: Optional[Tuple[Tensor, Tensor]] = None, linear_constraints: Optional[Tuple[Tensor, Tensor]] = None, fixed_features: Optional[Dict[int, float]] = None, target_fidelities: Optional[Dict[int, float]] = None, ) -> None: botorch_acqf_class = not_none( botorch_acqf_class or self.default_botorch_acqf_class ) if not issubclass(botorch_acqf_class, qExpectedHypervolumeImprovement): raise UnsupportedError( "Only qExpectedHypervolumeImprovement is currently supported as " f"a MOOAcquisition botorch_acqf_class. Got: {botorch_acqf_class}." ) super().__init__( surrogate=surrogate, botorch_acqf_class=botorch_acqf_class, bounds=bounds, objective_weights=objective_weights, objective_thresholds=objective_thresholds, outcome_constraints=outcome_constraints, linear_constraints=linear_constraints, fixed_features=fixed_features, pending_observations=pending_observations, target_fidelities=target_fidelities, options=options, )
def get_model_predictions( self, metric_names: Optional[List[str]] = None ) -> Dict[int, Dict[str, Tuple[float, float]]]: """Retrieve model-estimated means and covariances for all metrics. Note: this function retrieves the predictions for the 'in-sample' arms, which means that the return mapping on this function will only contain predictions for trials that have been completed with data. Args: metric_names: Names of the metrics, for which to retrieve predictions. All metrics on experiment will be retrieved if this argument was not specified. Returns: A mapping from trial index to a mapping of metric names to tuples of predicted metric mean and SEM, of form: { trial_index -> { metric_name: ( mean, SEM ) } }. """ if self.generation_strategy.model is None: # pragma: no cover raise ValueError("No model has been instantiated yet.") if metric_names is None and self.experiment.metrics is None: raise ValueError( # pragma: no cover "No metrics to retrieve specified on the experiment or as " "argument to `get_model_predictions`.") arm_info, _, _ = _get_in_sample_arms( model=not_none(self.generation_strategy.model), metric_names=set(metric_names) if metric_names is not None else set(not_none(self.experiment.metrics).keys()), ) trials = checked_cast_dict(int, Trial, self.experiment.trials) return { trial_index: { m: ( arm_info[not_none(trials[trial_index].arm).name].y_hat[m], arm_info[not_none(trials[trial_index].arm).name].se_hat[m], ) for m in arm_info[not_none(trials[trial_index].arm).name].y_hat } for trial_index in trials if not_none(trials[trial_index].arm).name in arm_info }
def __init__( self, surrogate: Surrogate, bounds: List[Tuple[float, float]], objective_weights: Tensor, botorch_acqf_class: Optional[Type[AcquisitionFunction]] = None, options: Optional[Dict[str, Any]] = None, pending_observations: Optional[List[Tensor]] = None, outcome_constraints: Optional[Tuple[Tensor, Tensor]] = None, linear_constraints: Optional[Tuple[Tensor, Tensor]] = None, fixed_features: Optional[Dict[int, float]] = None, target_fidelities: Optional[Dict[int, float]] = None, ) -> None: if not botorch_acqf_class and not self.default_botorch_acqf_class: raise ValueError( f"Acquisition class {self.__class__} does not specify a default " "BoTorch `AcquisitionFunction`, so `botorch_acqf_class` " "argument must be specified.") self._botorch_acqf_class = not_none(botorch_acqf_class or self.default_botorch_acqf_class) self.surrogate = surrogate self.options = options or {} trd = self._extract_training_data(surrogate=surrogate) Xs = ( # Assumes 1-D objective_weights, which should be safe. [trd.X for o in range(objective_weights.shape[0])] if isinstance( trd, TrainingData) else [i.X for i in trd.values()]) X_pending, X_observed = _get_X_pending_and_observed( Xs=Xs, pending_observations=pending_observations, objective_weights=objective_weights, outcome_constraints=outcome_constraints, bounds=bounds, linear_constraints=linear_constraints, fixed_features=fixed_features, ) # Subset model only to the outcomes we need for the optimization. if self.options.get(Keys.SUBSET_MODEL, True): model, objective_weights, outcome_constraints, _ = subset_model( self.surrogate.model, objective_weights=objective_weights, outcome_constraints=outcome_constraints, ) else: model = self.surrogate.model objective = self._get_botorch_objective( model=model, objective_weights=objective_weights, outcome_constraints=outcome_constraints, X_observed=X_observed, ) model_deps = self.compute_model_dependencies( surrogate=surrogate, bounds=bounds, objective_weights=objective_weights, pending_observations=pending_observations, outcome_constraints=outcome_constraints, linear_constraints=linear_constraints, fixed_features=fixed_features, target_fidelities=target_fidelities, options=self.options, ) X_baseline = X_observed overriden_X_baseline = model_deps.get(Keys.X_BASELINE) if overriden_X_baseline is not None: X_baseline = overriden_X_baseline model_deps.pop(Keys.X_BASELINE) self.acqf = self._botorch_acqf_class( # pyre-ignore[28]: Some kwargs are # not expected in base `AcquisitionFunction` but are expected in # its subclasses. model=model, objective=objective, X_pending=X_pending, X_baseline=X_baseline, **self.options, **model_deps, )
def experiment(self) -> Experiment: """Experiment, currently set on this generation strategy.""" if self._experiment is None: # pragma: no cover raise ValueError("No experiment set on generation strategy.") return not_none(self._experiment)
def _get_model_state(self) -> Dict[str, Any]: """Obtains the state of the underlying model if using a stateful one.""" return not_none(self.model)._get_state()
def update_trial_data( self, trial_index: int, raw_data: TEvaluationOutcome, metadata: Optional[Dict[str, Union[str, int]]] = None, sample_size: Optional[int] = None, ) -> None: """ Attaches additional data for completed trial (for example, if trial was completed with data for only one of the required metrics and more data needs to be attached). Args: trial_index: Index of trial within the experiment. raw_data: Evaluation data for the trial. Can be a mapping from metric name to a tuple of mean and SEM, just a tuple of mean and SEM if only one metric in optimization, or just the mean if there is no SEM. Can also be a list of (fidelities, mapping from metric name to a tuple of mean and SEM). metadata: Additional metadata to track about this run. sample_size: Number of samples collected for the underlying arm, optional. """ assert isinstance( trial_index, int ), f"Trial index must be an int, got: {trial_index}." # pragma: no cover trial = self._get_trial(trial_index=trial_index) if not trial.status.is_completed: raise ValueError( f"Trial {trial.index} has not yet been completed with data." "To complete it, use `ax_client.complete_trial`." ) sample_sizes = {not_none(trial.arm).name: sample_size} if sample_size else {} evaluations, data = self._make_evaluations_and_data( trial=trial, raw_data=raw_data, metadata=metadata, sample_sizes=sample_sizes ) trial._run_metadata.update(metadata or {}) for metric_name in data.df["metric_name"].values: if metric_name not in self.experiment.metrics: logger.info( f"Data was logged for metric {metric_name} that was not yet " "tracked on the experiment. Adding it as tracking metric." ) self.experiment.add_tracking_metric(Metric(name=metric_name)) # Registering trial data update is needed for generation strategies that # leverage the `update` functionality of model and bridge setup and therefore # need to be aware of new data added to experiment. Usually this happends # seamlessly, by looking at newly completed trials, but in this case trial # status does not change, so we manually register the new data. # Currently this call will only result in a `NotImplementedError` if generation # strategy uses `update` (`GenerationStep.use_update` is False by default). self.generation_strategy._register_trial_data_update(trial=trial, data=data) self.experiment.attach_data(data, combine_with_last_data=True) data_for_logging = _round_floats_for_logging( item=evaluations[next(iter(evaluations.keys()))] ) logger.info( f"Added data: {_round_floats_for_logging(item=data_for_logging)} " f"to trial {trial.index}." ) self._save_experiment_to_db_if_possible( experiment=self.experiment, suppress_all_errors=self._suppress_storage_errors, )
def _suggest_gp_model( search_space: SearchSpace, num_trials: Optional[int] = None, optimization_config: Optional[OptimizationConfig] = None, use_saasbo: bool = False, ) -> Union[None, Models]: """Suggest a model based on the search space. None means we use Sobol. 1. We use Sobol if the number of total iterations in the optimization is known in advance and there are fewer distinct points in the search space than the known intended number of total iterations. 2. We use ``BO_MIXED`` if there are fewer ordered parameters in the search space than the sum of options for the *unordered* choice parameters, and the number of discrete enumerations to be performed by the optimizer is less than ``MAX_DISCRETE_ENUMERATIONS_MIXED``, or if there are only choice parameters and the number of choice combinations to enumerate is less than ``MAX_DISCRETE_ENUMERATIONS_CHOICE_ONLY``. ``BO_MIXED`` is not currently enabled for multi-objective optimization. 3. We use ``MOO`` if ``optimization_config`` has multiple objectives and ``use_saasbo is False``. 4. We use ``FULLYBAYESIANMOO`` if ``optimization_config`` has multiple objectives and `use_saasbo is True`. 5. If none of the above and ``use_saasbo is False``, we use ``GPEI``. 6. If none of the above and ``use_saasbo is True``, we use ``FULLYBAYESIAN``. """ num_ordered_parameters = num_unordered_choices = 0 num_enumerated_combinations = num_possible_points = 1 all_range_parameters_are_discrete = True all_parameters_are_enumerated = True for parameter in search_space.tunable_parameters.values(): should_enumerate_param = None num_param_discrete_values = None if isinstance(parameter, ChoiceParameter): num_param_discrete_values = len(parameter.values) num_possible_points *= num_param_discrete_values if parameter.is_ordered is False: num_unordered_choices += num_param_discrete_values should_enumerate_param = True else: num_ordered_parameters += 1 should_enumerate_param = True elif isinstance(parameter, RangeParameter): num_ordered_parameters += 1 if parameter.parameter_type == ParameterType.FLOAT: all_range_parameters_are_discrete = False should_enumerate_param = False else: num_param_discrete_values = int(parameter.upper - parameter.lower) + 1 num_possible_points *= num_param_discrete_values should_enumerate_param = False if not_none(should_enumerate_param): num_enumerated_combinations *= not_none(num_param_discrete_values) else: all_parameters_are_enumerated = False # If number of trials is known and sufficient to try all possible points, # we should use Sobol and not BO if (num_trials is not None and all_range_parameters_are_discrete and num_possible_points <= num_trials): logger.info("Using Sobol since we can enumerate the search space.") if use_saasbo: logger.warn(SAASBO_INCOMPATIBLE_MESSAGE.format("Sobol")) return None is_moo_problem = optimization_config and optimization_config.is_moo_problem if num_ordered_parameters > num_unordered_choices: logger.info( "Using Bayesian optimization since there are more ordered " "parameters than there are categories for the unordered categorical " "parameters.") if is_moo_problem and use_saasbo: return Models.FULLYBAYESIANMOO if is_moo_problem and not use_saasbo: return Models.MOO if use_saasbo: return Models.FULLYBAYESIAN return Models.GPEI # The latter condition below is tied to the logic in `BO_MIXED`, which currently # enumerates all combinations of choice parameters. if not is_moo_problem and ( num_enumerated_combinations <= MAX_DISCRETE_ENUMERATIONS_MIXED or (all_parameters_are_enumerated and num_enumerated_combinations < MAX_DISCRETE_ENUMERATIONS_NO_CONTINUOUS_OPTIMIZATION)): logger.info( "Using Bayesian optimization with a categorical kernel for improved " "performance with a large number of unordered categorical parameters." ) if use_saasbo: logger.warn(SAASBO_INCOMPATIBLE_MESSAGE.format("`BO_MIXED`")) return Models.BO_MIXED logger.info( f"Using Sobol since there are more than {MAX_DISCRETE_ENUMERATIONS_MIXED} " "combinations of enumerated parameters. For improved performance, make sure " "that all ordered `ChoiceParameter`s are encoded as such (`is_ordered=True`), " "and use `RangeParameter`s in place of ordered `ChoiceParameter`s where " "possible. Also, consider removing some or all unordered `ChoiceParameter`s." ) if use_saasbo: logger.warn(SAASBO_INCOMPATIBLE_MESSAGE.format("Sobol")) return None
def _model_predict(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: if not self.model: # pragma: no cover raise ValueError(FIT_MODEL_ERROR.format(action="_model_predict")) f, var = not_none(self.model).predict(X=self._array_to_tensor(X)) return f.detach().cpu().clone().numpy(), var.detach().cpu().clone( ).numpy()
def get_contour_plot( self, param_x: Optional[str] = None, param_y: Optional[str] = None, metric_name: Optional[str] = None, ) -> AxPlotConfig: """Retrieves a plot configuration for a contour plot of the response surface. For response surfaces with more than two parameters, selected two parameters will appear on the axes, and remaining parameters will be affixed to the middle of their range. If contour params arguments are not provided, the first two parameters in the search space will be used. If contour metrics are not provided, objective will be used. Args: param_x: name of parameters to use on x-axis for the contour response surface plots. param_y: name of parameters to use on y-axis for the contour response surface plots. metric_name: Name of the metric, for which to plot the response surface. """ if not self.experiment.trials: raise ValueError("Cannot generate plot as there are no trials.") if len(self.experiment.parameters) < 2: raise ValueError( "Cannot create a contour plot as experiment has less than 2 " "parameters, but a contour-related argument was provided." ) if (param_x or param_y) and not (param_x and param_y): raise ValueError( "If `param_x` is provided, `param_y` is " "required as well, and vice-versa." ) objective_name = self.objective_name if not metric_name: metric_name = objective_name if not param_x or not param_y: parameter_names = list(self.experiment.parameters.keys()) param_x = parameter_names[0] param_y = parameter_names[1] if param_x not in self.experiment.parameters: raise ValueError( f'Parameter "{param_x}" not found in the optimization search space.' ) if param_y not in self.experiment.parameters: raise ValueError( f'Parameter "{param_y}" not found in the optimization search space.' ) if metric_name not in self.experiment.metrics: raise ValueError( f'Metric "{metric_name}" is not associated with this optimization.' ) if self.generation_strategy.model is not None: try: logger.info( f"Retrieving contour plot with parameter '{param_x}' on X-axis " f"and '{param_y}' on Y-axis, for metric '{metric_name}'. " "Ramaining parameters are affixed to the middle of their range." ) return plot_contour( model=not_none(self.generation_strategy.model), param_x=param_x, param_y=param_y, metric_name=metric_name, ) except NotImplementedError: # Some models don't implement '_predict', which is needed # for the contour plots. logger.info( f"Model {self.generation_strategy.model} does not implement " "`predict`, so it cannot be used to generate a response " "surface plot." ) raise ValueError( f'Could not obtain contour plot of "{metric_name}" for parameters ' f'"{param_x}" and "{param_y}", as a model with predictive ability, ' "such as a Gaussian Process, has not yet been trained in the course " "of this optimization." )
def training_data(self) -> TrainingData: if self._training_data is None: raise ValueError(NOT_YET_FIT_MSG) return not_none(self._training_data)
def _get_in_sample_arms( model: ModelBridge, metric_names: Set[str], fixed_features: Optional[ObservationFeatures] = None, ) -> Tuple[Dict[str, PlotInSampleArm], RawData, Dict[str, TParameterization]]: """Get in-sample arms from a model with observed and predicted values for specified metrics. Returns a PlotInSampleArm object in which repeated observations are merged with IVW, and a RawData object in which every observation is listed. Fixed features input can be used to override fields of the insample arms when making model predictions. Args: model: An instance of the model bridge. metric_names: Restrict predictions to these metrics. If None, uses all metrics in the model. fixed_features: Features that should be fixed in the arms this function will obtain predictions for. Returns: A tuple containing - Map from arm name to PlotInSampleArm. - List of the data for each observation like:: {'metric_name': 'likes', 'arm_name': '0_0', 'mean': 1., 'sem': 0.1} - Map from arm name to parameters """ observations = model.get_training_data() # Calculate raw data raw_data = [] arm_name_to_parameters = {} for obs in observations: arm_name_to_parameters[obs.arm_name] = obs.features.parameters for j, metric_name in enumerate(obs.data.metric_names): if metric_name in metric_names: raw_data.append({ "metric_name": metric_name, "arm_name": obs.arm_name, "mean": obs.data.means[j], "sem": np.sqrt(obs.data.covariance[j, j]), }) # Check that we have one ObservationFeatures per arm name since we # key by arm name and the model is not Multi-task. # If "TrialAsTask" is present, one of the arms is also chosen. if ("TrialAsTask" not in model.transforms.keys()) and ( len(arm_name_to_parameters) != len(observations)): logger.error( "Have observations of arms with different features but same" " name. Arbitrary one will be plotted.") # Merge multiple measurements within each Observation with IVW to get # un-modeled prediction t = IVW(None, [], []) obs_data = t.transform_observation_data([obs.data for obs in observations], []) # Start filling in plot data in_sample_plot: Dict[str, PlotInSampleArm] = {} for i, obs in enumerate(observations): if obs.arm_name is None: raise ValueError("Observation must have arm name for plotting.") # Extract raw measurement obs_y = {} # Observed metric means. obs_se = {} # Observed metric standard errors. # Use the IVW data, not obs.data for j, metric_name in enumerate(obs_data[i].metric_names): if metric_name in metric_names: obs_y[metric_name] = obs_data[i].means[j] obs_se[metric_name] = np.sqrt(obs_data[i].covariance[j, j]) # Make a prediction. if model.training_in_design[i]: features = obs.features if fixed_features is not None: features.update_features(fixed_features) pred_y, pred_se = _predict_at_point(model, features, metric_names) else: # Use raw data for out-of-design points pred_y = obs_y pred_se = obs_se in_sample_plot[not_none(obs.arm_name)] = PlotInSampleArm( name=not_none(obs.arm_name), y=obs_y, se=obs_se, parameters=obs.features.parameters, y_hat=pred_y, se_hat=pred_se, context_stratum=None, ) return in_sample_plot, raw_data, arm_name_to_parameters
def create_experiment( self, parameters: List[Dict[str, Union[TParamValue, List[TParamValue]]]], name: Optional[str] = None, objective_name: Optional[str] = None, minimize: bool = False, parameter_constraints: Optional[List[str]] = None, outcome_constraints: Optional[List[str]] = None, status_quo: Optional[TParameterization] = None, overwrite_existing_experiment: bool = False, experiment_type: Optional[str] = None, choose_generation_strategy_kwargs: Optional[Dict[str, Any]] = None, ) -> None: """Create a new experiment and save it if DBSettings available. Args: parameters: List of dictionaries representing parameters in the experiment search space. Required elements in the dictionaries are: "name" (name of this parameter, string), "type" (type of the parameter: "range", "fixed", or "choice", string), and "bounds" for range parameters (list of two values, lower bound first), "values" for choice parameters (list of values), and "value" for fixed parameters (single value). objective: Name of the metric used as objective in this experiment. This metric must be present in `raw_data` argument to `complete_trial`. name: Name of the experiment to be created. minimize: Whether this experiment represents a minimization problem. parameter_constraints: List of string representation of parameter constraints, such as "x3 >= x4" or "-x3 + 2*x4 - 3.5*x5 >= 2". For the latter constraints, any number of arguments is accepted, and acceptable operators are "<=" and ">=". outcome_constraints: List of string representation of outcome constraints of form "metric_name >= bound", like "m1 <= 3." status_quo: Parameterization of the current state of the system. If set, this will be added to each trial to be evaluated alongside test configurations. overwrite_existing_experiment: If an experiment has already been set on this `AxClient` instance, whether to reset it to the new one. If overwriting the experiment, generation strategy will be re-selected for the new experiment and restarted. To protect experiments in production, one cannot overwrite existing experiments if the experiment is already stored in the database, regardless of the value of `overwrite_existing_experiment`. choose_generation_strategy_kwargs: Keyword arguments to pass to `choose_generation_strategy` function which determines what generation strategy should be used when none was specified on init. """ if self.db_settings_set and not name: raise ValueError( # pragma: no cover "Must give the experiment a name if `db_settings` is not None." ) if self.db_settings_set: experiment_id, _ = self._get_experiment_and_generation_strategy_db_id( experiment_name=not_none(name) ) if experiment_id: raise ValueError( f"Experiment {name} already exists in the database. " "To protect experiments that are running in production, " "overwriting stored experiments is not allowed. To " "start a new experiment and store it, change the " "experiment's name." ) if self._experiment is not None: if overwrite_existing_experiment: exp_name = self.experiment._name or "untitled" new_exp_name = name or "untitled" logger.info( f"Overwriting existing experiment ({exp_name}) on this client " f"with new experiment ({new_exp_name}) and restarting the " "generation strategy." ) self._generation_strategy = None else: raise ValueError( "Experiment already created for this client instance. " "Set the `overwrite_existing_experiment` to `True` to overwrite " "with new experiment." ) self._experiment = make_experiment( name=name, parameters=parameters, objective_name=objective_name, minimize=minimize, parameter_constraints=parameter_constraints, outcome_constraints=outcome_constraints, status_quo=status_quo, experiment_type=experiment_type, ) try: self._save_experiment_to_db_if_possible( experiment=self.experiment, suppress_all_errors=self._suppress_storage_errors, ) except Exception: # Unset the experiment on this `AxClient` instance if encountered and # raising an error from saving the experiment, to avoid a case where # overall `create_experiment` call fails with a storage error, but # `self._experiment` is still set and user has to specify the # `ooverwrite_existing_experiment` kwarg to re-attempt exp. creation. self._experiment = None raise self._set_generation_strategy( choose_generation_strategy_kwargs=choose_generation_strategy_kwargs ) self._save_generation_strategy_to_db_if_possible( generation_strategy=self.generation_strategy, suppress_all_errors=self._suppress_storage_errors, )
def __call__( self, search_space: Optional[SearchSpace] = None, experiment: Optional[Experiment] = None, data: Optional[Data] = None, silently_filter_kwargs: bool = True, # TODO[Lena]: default to False **kwargs: Any, ) -> ModelBridge: assert self.value in MODEL_KEY_TO_MODEL_SETUP # All model bridges require either a search space or an experiment. assert search_space or experiment, "Search space or experiment required." model_setup_info = MODEL_KEY_TO_MODEL_SETUP[self.value] model_class = model_setup_info.model_class bridge_class = model_setup_info.bridge_class if not silently_filter_kwargs: validate_kwarg_typing( # TODO[Lena]: T46467254, pragma: no cover typed_callables=[model_class, bridge_class], search_space=search_space, experiment=experiment, data=data, **kwargs, ) # Create model with consolidated arguments: defaults + passed in kwargs. model_kwargs = consolidate_kwargs( kwargs_iterable=[get_function_default_arguments(model_class), kwargs], keywords=get_function_argument_names(model_class), ) model = model_class(**model_kwargs) # Create `ModelBridge`: defaults + standard kwargs + passed in kwargs. bridge_kwargs = consolidate_kwargs( kwargs_iterable=[ get_function_default_arguments(bridge_class), model_setup_info.standard_bridge_kwargs, {"transforms": model_setup_info.transforms}, kwargs, ], keywords=get_function_argument_names( function=bridge_class, omit=["experiment", "search_space", "data"] ), ) # Create model bridge with the consolidated kwargs. model_bridge = bridge_class( search_space=search_space or not_none(experiment).search_space, experiment=experiment, data=data, model=model, **bridge_kwargs, ) # Temporarily ignore Botorch callable & torch-typed arguments, as those # are not serializable to JSON out-of-the-box. TODO[Lena]: T46527142 if isinstance(model, TorchModel): model_kwargs = {kw: p for kw, p in model_kwargs.items() if not callable(p)} bridge_kwargs = { kw: p for kw, p in bridge_kwargs.items() if kw[:5] != "torch" } # Store all kwargs on model bridge, to be saved on generator run. model_bridge._set_kwargs_to_save( model_key=self.value, model_kwargs=model_kwargs, bridge_kwargs=bridge_kwargs ) return model_bridge
def db_settings(self) -> DBSettings: """DB settings set on this instance; guaranteed to be non-None.""" if self._db_settings is None: raise ValueError("No DB settings are set on this instance.") return not_none(self._db_settings)
def complete_trial( self, trial_index: int, # acceptable `raw_data` argument formats: # 1) {metric_name -> (mean, standard error)} # 2) (mean, standard error) and we assume metric name == objective name # 3) only the mean, and we assume metric name == objective name and # standard error == 0 raw_data: TEvaluationOutcome, metadata: Optional[Dict[str, str]] = None, ) -> None: """ Completes the trial with given metric values and adds optional metadata to it. Args: trial_index: Index of trial within the experiment. raw_data: Evaluation data for the trial. Can be a mapping from metric name to a tuple of mean and SEM, just a tuple of mean and SEM if only one metric in optimization, or just the mean if there is no SEM. metadata: Additional metadata to track about this run. """ assert isinstance( trial_index, int ), f"Trial index must be an int, got: {trial_index}." # pragma: no cover trial = self.experiment.trials[trial_index] if not isinstance(trial, Trial): raise NotImplementedError( "Batch trial functionality is not yet available through Service API." ) trial._status = TrialStatus.COMPLETED if metadata is not None: trial._run_metadata = metadata if isinstance(raw_data, dict): evaluations = {not_none(trial.arm).name: raw_data} elif isinstance(raw_data, tuple): evaluations = { not_none(trial.arm).name: { self.experiment.optimization_config.objective.metric.name: raw_data } } elif isinstance(raw_data, float) or isinstance(raw_data, int): evaluations = { not_none(trial.arm).name: { self.experiment.optimization_config.objective.metric.name: ( raw_data, 0.0, ) } } else: raise ValueError( "Raw data has an invalid type. The data must either be in the form " "of a dictionary of metric names to mean, sem tuples, " "or a single mean, sem tuple, or a single mean.") data = Data.from_evaluations(evaluations, trial.index) self.experiment.attach_data(data) self._updated_trials.append(trial_index) self._save_experiment_if_possible()
def generator_run_from_sqa(self, generator_run_sqa: SQAGeneratorRun, reduced_state: bool = False) -> GeneratorRun: """Convert SQLAlchemy GeneratorRun to Ax GeneratorRun. Args: generator_run_sqa: `SQAGeneratorRun` to decode. reduced_state: Whether to load generator runs with a slightly reduced state (without model state, search space, and optimization config). """ arms = [] weights = [] opt_config = None search_space = None for arm_sqa in generator_run_sqa.arms: arms.append(self.arm_from_sqa(arm_sqa=arm_sqa)) weights.append(arm_sqa.weight) if not reduced_state: ( opt_config, tracking_metrics, ) = self.opt_config_and_tracking_metrics_from_sqa( metrics_sqa=generator_run_sqa.metrics) if len(tracking_metrics) > 0: raise SQADecodeError( # pragma: no cover "GeneratorRun should not have tracking metrics.") search_space = self.search_space_from_sqa( parameters_sqa=generator_run_sqa.parameters, parameter_constraints_sqa=generator_run_sqa. parameter_constraints, ) best_arm_predictions = None model_predictions = None if (generator_run_sqa.best_arm_parameters is not None and generator_run_sqa.best_arm_predictions is not None): best_arm = Arm( name=generator_run_sqa.best_arm_name, parameters=not_none(generator_run_sqa.best_arm_parameters), ) best_arm_predictions = ( best_arm, tuple(not_none(generator_run_sqa.best_arm_predictions)), ) model_predictions = ( tuple(not_none(generator_run_sqa.model_predictions)) if generator_run_sqa.model_predictions is not None else None) generator_run = GeneratorRun( arms=arms, weights=weights, optimization_config=opt_config, search_space=search_space, fit_time=generator_run_sqa.fit_time, gen_time=generator_run_sqa.gen_time, best_arm_predictions=best_arm_predictions, # pyre-ignore[6] model_predictions=model_predictions, model_key=generator_run_sqa.model_key, model_kwargs=None if reduced_state else object_from_json( generator_run_sqa.model_kwargs), bridge_kwargs=None if reduced_state else object_from_json( generator_run_sqa.bridge_kwargs), gen_metadata=None if reduced_state else object_from_json( generator_run_sqa.gen_metadata), model_state_after_gen=None if reduced_state else object_from_json( generator_run_sqa.model_state_after_gen), generation_step_index=generator_run_sqa.generation_step_index, candidate_metadata_by_arm_signature=object_from_json( generator_run_sqa.candidate_metadata_by_arm_signature), ) generator_run._time_created = generator_run_sqa.time_created generator_run._generator_run_type = self.get_enum_name( value=generator_run_sqa.generator_run_type, enum=self.config.generator_run_type_enum, ) generator_run._index = generator_run_sqa.index generator_run.db_id = generator_run_sqa.id return generator_run
def _observations_from_dataframe( experiment: Experiment, df: pd.DataFrame, cols: List[str], arm_name_only: bool, map_keys: Iterable[str], include_abandoned: bool, map_keys_as_parameters: bool = False, ) -> List[Observation]: """Helper method for extracting observations grouped by `cols` from `df`. Args: experiment: Experiment with arm parameters. df: DataFrame derived from experiment Data. cols: columns used to group data into different observations. map_keys: columns that map dict-like Data e.g. `timestamp` in timeseries data, `epoch` in ML training traces. include_abandoned: Whether data for abandoned trials and arms should be included in the observations, returned from this function. map_keys_as_parameters: Whether map_keys should be returned as part of the parameters of the Observation objects. Returns: List of Observation objects. """ observations = [] abandoned_arms_dict = {} for g, d in df.groupby(by=cols): obs_kwargs = {} if arm_name_only: features = {"arm_name": g} arm_name = g trial_index = None else: features = dict(zip(cols, g)) arm_name = features["arm_name"] trial_index = features.get("trial_index", None) if trial_index is not None: trial = experiment.trials[trial_index] metadata = trial._get_candidate_metadata(arm_name) or {} if Keys.TRIAL_COMPLETION_TIMESTAMP not in metadata: if trial._time_completed is not None: metadata[Keys.TRIAL_COMPLETION_TIMESTAMP] = not_none( trial._time_completed).timestamp() obs_kwargs[Keys.METADATA] = metadata if not include_abandoned and trial.status.is_abandoned: # Exclude abandoned trials. continue if not include_abandoned and isinstance(trial, BatchTrial): # Exclude abandoned arms from batch trial's observations. if trial.index not in abandoned_arms_dict: # Same abandoned arm names to dict to avoid recomputing them # on creation of every observation. abandoned_arms_dict[ trial.index] = trial.abandoned_arm_names if arm_name in abandoned_arms_dict[trial.index]: continue obs_parameters = experiment.arms_by_name[arm_name].parameters.copy() if obs_parameters: obs_kwargs["parameters"] = obs_parameters for f, val in features.items(): if f in OBS_KWARGS: obs_kwargs[f] = val fidelities = features.get("fidelities") if fidelities is not None: obs_parameters.update(json.loads(fidelities)) for map_key in map_keys: if map_key in obs_parameters or map_keys_as_parameters: obs_parameters[map_key] = features[map_key] else: obs_kwargs[Keys.METADATA][map_key] = features[map_key] observations.append( Observation( features=ObservationFeatures(**obs_kwargs), data=ObservationData( metric_names=d["metric_name"].tolist(), means=d["mean"].values, covariance=np.diag(d["sem"].values**2), ), arm_name=arm_name, )) return observations
def gen( self, n: int, bounds: List, objective_weights: Tensor, outcome_constraints: Optional[Tuple[Tensor, Tensor]] = None, linear_constraints: Optional[Tuple[Tensor, Tensor]] = None, fixed_features: Optional[Dict[int, float]] = None, pending_observations: Optional[List[Tensor]] = None, model_gen_options: Optional[TConfig] = None, rounding_func: Optional[Callable[[Tensor], Tensor]] = None, target_fidelities: Optional[Dict[int, float]] = None, ) -> Tuple[Tensor, Tensor, TGenMetadata, Optional[List[TCandidateMetadata]]]: r"""Generate new candidates. Args: n: Number of candidates to generate. bounds: A list of (lower, upper) tuples for each column of X. objective_weights: The objective is to maximize a weighted sum of the columns of f(x). These are the weights. outcome_constraints: A tuple of (A, b). For k outcome constraints and m outputs at f(x), A is (k x m) and b is (k x 1) such that A f(x) <= b. linear_constraints: A tuple of (A, b). For k linear constraints on d-dimensional x, A is (k x d) and b is (k x 1) such that A x <= b. fixed_features: A map {feature_index: value} for features that should be fixed to a particular value during generation. pending_observations: A list of m (k_i x d) feature tensors X for m outcomes and k_i pending observations for outcome i. model_gen_options: A config dictionary that can contain model-specific options. rounding_func: A function that rounds an optimization result appropriately (i.e., according to `round-trip` transformations). target_fidelities: A map {feature_index: value} of fidelity feature column indices to their respective target fidelities. Used for multi-fidelity optimization. Returns: 3-element tuple containing - (n x d) tensor of generated points. - n-tensor of weights for each point. - Dictionary of model-specific metadata for the given generation candidates. """ options = model_gen_options or {} acf_options = options.get("acquisition_function_kwargs", {}) optimizer_options = options.get("optimizer_kwargs", {}) X_pending, X_observed = _get_X_pending_and_observed( Xs=self.Xs, pending_observations=pending_observations, objective_weights=objective_weights, outcome_constraints=outcome_constraints, bounds=bounds, linear_constraints=linear_constraints, fixed_features=fixed_features, ) # subset model only to the outcomes we need for the optimization model = not_none(self.model) if options.get("subset_model", True): model, objective_weights, outcome_constraints, _ = subset_model( model=model, objective_weights=objective_weights, outcome_constraints=outcome_constraints, ) objective = get_botorch_objective( model=model, objective_weights=objective_weights, outcome_constraints=outcome_constraints, X_observed=X_observed, ) inequality_constraints = _to_inequality_constraints(linear_constraints) # TODO: update optimizers to handle inequality_constraints if inequality_constraints is not None: raise UnsupportedError( "Inequality constraints are not yet supported for KnowledgeGradient!" ) # extract a few options n_fantasies = acf_options.get("num_fantasies", 64) qmc = acf_options.get("qmc", True) seed_inner = acf_options.get("seed_inner", None) num_restarts = optimizer_options.get("num_restarts", 40) raw_samples = optimizer_options.get("raw_samples", 1024) # get current value current_value = self._get_current_value( model=model, bounds=bounds, X_observed=not_none(X_observed), objective_weights=objective_weights, outcome_constraints=outcome_constraints, linear_constraints=linear_constraints, seed_inner=seed_inner, fixed_features=fixed_features, model_gen_options=model_gen_options, target_fidelities=target_fidelities, qmc=qmc, ) bounds_ = torch.tensor(bounds, dtype=self.dtype, device=self.device) bounds_ = bounds_.transpose(0, 1) # get acquisition function acq_function = _instantiate_KG( model=model, objective=objective, qmc=qmc, n_fantasies=n_fantasies, num_trace_observations=options.get("num_trace_observations", 0), mc_samples=acf_options.get("mc_samples", 256), seed_inner=seed_inner, seed_outer=acf_options.get("seed_outer", None), X_pending=X_pending, target_fidelities=target_fidelities, fidelity_weights=options.get("fidelity_weights"), current_value=current_value, cost_intercept=self.cost_intercept, ) # optimize and get new points new_x = _optimize_and_get_candidates( acq_function=acq_function, bounds_=bounds_, n=n, num_restarts=num_restarts, raw_samples=raw_samples, optimizer_options=optimizer_options, rounding_func=rounding_func, inequality_constraints=inequality_constraints, fixed_features=fixed_features, ) return new_x, torch.ones(n, dtype=self.dtype), {}, None
def test_not_none(self): self.assertEqual(not_none("not_none"), "not_none") with self.assertRaises(ValueError): not_none(None)
def get_trial_parameters(self, trial_index: int) -> TParameterization: """Retrieve the parameterization of the trial by the given index.""" return not_none(self._get_trial(trial_index).arm).parameters
def gen( self, experiment: Experiment, new_data: Optional[Data] = None, # Take in just the new data. n: int = 1, **kwargs: Any, ) -> GeneratorRun: """Produce the next points in the experiment.""" # Get arm signatures for each entry in new_data that is indeed new. new_arms = self._get_new_arm_signatures( experiment=experiment, new_data=new_data ) enough_observed = ( len(self._observed) + len(new_arms) ) >= self._curr.min_arms_observed unlimited_arms = self._curr.num_arms == -1 enough_generated = ( not unlimited_arms and len(self._generated) >= self._curr.num_arms ) remaining_arms = self._curr.num_arms - len(self._generated) # Check that minimum observed_arms is satisfied if it's enforced. if self._curr.enforce_num_arms and enough_generated and not enough_observed: raise ValueError( "All trials for current model have been generated, but not enough " "data has been observed to fit next model. Try again when more data " "are available." ) # TODO[Lena, T44021164]: take into account failed trials. Potentially # reduce `_generated` count when a trial mentioned in new data failed. if ( self._curr.enforce_num_arms and not unlimited_arms and 0 < remaining_arms < n ): raise ValueError( f"Cannot generate {n} new arms as there are only {remaining_arms} " "remaining arms to generate using the current model." ) all_data = ( Data.from_multiple_data(data=[self._data, new_data]) if new_data else self._data ) if self._model is None: # Instantiate the first model. self._set_current_model(experiment=experiment, data=all_data, **kwargs) elif enough_generated and enough_observed: # Change to the next model. self._change_model(experiment=experiment, data=all_data, **kwargs) elif new_data is not None: # We're sticking with the current model, but update with new data self._model.update(experiment=experiment, data=new_data) gen_run = not_none(self._model).gen(n=n, **(self._curr.model_gen_kwargs or {})) # If nothing failed, update known data, _generated, and _observed. self._data = all_data self._observed.extend(new_arms) self._generated.extend(a.signature for a in gen_run.arms) return gen_run
def objective_name(self) -> str: """Returns the name of the objective in this optimization.""" opt_config = not_none(self.experiment.optimization_config) return opt_config.objective.metric.name
def feature_importances(self, metric_name: str) -> Dict[str, float]: importances_tensor = not_none(self.model).feature_importances() importances_dict = dict(zip(self.outcomes, importances_tensor)) importances_arr = importances_dict[metric_name].flatten() return dict(zip(self.parameters, importances_arr))
def __call__( self, search_space: Optional[SearchSpace] = None, experiment: Optional[Experiment] = None, data: Optional[Data] = None, silently_filter_kwargs: bool = False, **kwargs: Any, ) -> ModelBridge: assert self.value in MODEL_KEY_TO_MODEL_SETUP, f"Unknown model {self.value}" # All model bridges require either a search space or an experiment. assert search_space or experiment, "Search space or experiment required." search_space = search_space or not_none(experiment).search_space model_setup_info = MODEL_KEY_TO_MODEL_SETUP[self.value] model_class = model_setup_info.model_class bridge_class = model_setup_info.bridge_class if not silently_filter_kwargs: validate_kwarg_typing( typed_callables=[model_class, bridge_class], search_space=search_space, experiment=experiment, data=data, **kwargs, ) # Create model with consolidated arguments: defaults + passed in kwargs. model_kwargs = consolidate_kwargs( kwargs_iterable=[ get_function_default_arguments(model_class), kwargs ], keywords=get_function_argument_names(model_class), ) model = model_class(**model_kwargs) # Create `ModelBridge`: defaults + standard kwargs + passed in kwargs. bridge_kwargs = consolidate_kwargs( kwargs_iterable=[ get_function_default_arguments(bridge_class), model_setup_info.standard_bridge_kwargs, { "transforms": model_setup_info.transforms }, kwargs, ], keywords=get_function_argument_names( function=bridge_class, omit=["experiment", "search_space", "data"]), ) # Create model bridge with the consolidated kwargs. model_bridge = bridge_class( search_space=search_space or not_none(experiment).search_space, experiment=experiment, data=data, model=model, **bridge_kwargs, ) if model_setup_info.not_saved_model_kwargs: for key in model_setup_info.not_saved_model_kwargs: model_kwargs.pop(key, None) # Store all kwargs on model bridge, to be saved on generator run. model_bridge._set_kwargs_to_save( model_key=self.value, model_kwargs=_encode_callables_as_references(model_kwargs), bridge_kwargs=_encode_callables_as_references(bridge_kwargs), ) return model_bridge
def _pareto_frontier( self, objective_thresholds: Optional[TRefPoint] = None, observation_features: Optional[List[ObservationFeatures]] = None, observation_data: Optional[List[ObservationData]] = None, optimization_config: Optional[MultiObjectiveOptimizationConfig] = None, ) -> List[ObservationData]: # TODO(jej): This method should be refactored to move tensor # conversions into a separate utility, and eventually should be # moved into base.py. # The reason this method is currently implemented in array.py is to # allow the broadest possible set of models to call frontier and # hypervolume evaluation functions given the current API. X = (self.transform_observation_features(observation_features) if observation_features else None) X = self._array_to_tensor(X) if X is not None else None Y, Yvar = (None, None) if observation_data: Y, Yvar = self.transform_observation_data(observation_data) if Y is not None and Yvar is not None: Y, Yvar = (self._array_to_tensor(Y), self._array_to_tensor(Yvar)) # Optimization_config mooc = optimization_config or checked_cast_optional( MultiObjectiveOptimizationConfig, self._optimization_config) if not mooc: raise ValueError( ("experiment must have an existing optimization_config " "of type MultiObjectiveOptimizationConfig " "or `optimization_config` must be passed as an argument.")) if not isinstance(mooc, MultiObjectiveOptimizationConfig): mooc = not_none( MultiObjectiveOptimizationConfig.from_opt_conf(mooc)) if objective_thresholds: mooc = mooc.clone_with_args( objective_thresholds=objective_thresholds) optimization_config = mooc # Transform OptimizationConfig. optimization_config = self.transform_optimization_config( optimization_config=optimization_config, fixed_features=ObservationFeatures(parameters={}), ) # Extract weights, constraints, and objective_thresholds objective_weights = extract_objective_weights( objective=optimization_config.objective, outcomes=self.outcomes) outcome_constraints = extract_outcome_constraints( outcome_constraints=optimization_config.outcome_constraints, outcomes=self.outcomes, ) objective_thresholds_arr = extract_objective_thresholds( objective_thresholds=optimization_config.objective_thresholds, outcomes=self.outcomes, ) # Transform to tensors. obj_w, oc_c, _, _ = validate_and_apply_final_transform( objective_weights=objective_weights, outcome_constraints=outcome_constraints, linear_constraints=None, pending_observations=None, final_transform=self._array_to_tensor, ) obj_t = self._array_to_tensor(objective_thresholds_arr) frontier_evaluator = self._get_frontier_evaluator() # pyre-ignore[28]: Unexpected keyword `model` to anonymous call f, cov = frontier_evaluator( model=self.model, X=X, Y=Y, Yvar=Yvar, objective_thresholds=obj_t, objective_weights=obj_w, outcome_constraints=oc_c, ) f, cov = f.detach().cpu().clone().numpy(), cov.detach().cpu().clone( ).numpy() frontier_observation_data = array_to_observation_data( f=f, cov=cov, outcomes=not_none(self.outcomes)) # Untransform observations for t in reversed(self.transforms.values()): # noqa T484 frontier_observation_data = t.untransform_observation_data( frontier_observation_data, []) return frontier_observation_data
def choose_generation_strategy( search_space: SearchSpace, use_batch_trials: bool = False, enforce_sequential_optimization: bool = True, random_seed: Optional[int] = None, winsorize_botorch_model: bool = False, winsorization_limits: Optional[Tuple[Optional[float], Optional[float]]] = None, no_bayesian_optimization: bool = False, num_trials: Optional[int] = None, num_initialization_trials: Optional[int] = None, max_parallelism_cap: Optional[int] = None, max_parallelism_override: Optional[int] = None, optimization_config: Optional[OptimizationConfig] = None, should_deduplicate: bool = False, use_saasbo: bool = False, verbose: Optional[bool] = None, experiment: Optional[Experiment] = None, ) -> GenerationStrategy: """Select an appropriate generation strategy based on the properties of the search space and expected settings of the experiment, such as number of arms per trial, optimization algorithm settings, expected number of trials in the experiment, etc. Args: search_space: SearchSpace, based on the properties of which to select the generation strategy. use_batch_trials: Whether this generation strategy will be used to generate batched trials instead of 1-arm trials. enforce_sequential_optimization: Whether to enforce that 1) the generation strategy needs to be updated with `min_trials_observed` observations for a given generation step before proceeding to the next one and 2) maximum number of trials running at once (max_parallelism) if enforced for the BayesOpt step. NOTE: `max_parallelism_override` and `max_parallelism_cap` settings will still take their effect on max parallelism even if `enforce_sequential_optimization=False`, so if those settings are specified, max parallelism will be enforced. random_seed: Fixed random seed for the Sobol generator. winsorize_botorch_model: Whether to apply the winsorization transform prior to applying other transforms for fitting the BoTorch model. winsorization_limits: Bounds for winsorization, if winsorizing, expressed as percentile. Usually only the upper winsorization trim is used when minimizing, and only the lower when maximizing. no_bayesian_optimization: If True, Bayesian optimization generation strategy will not be suggested and quasi-random strategy will be used. num_trials: Total number of trials in the optimization, if known in advance. num_initialization_trials: Specific number of initialization trials, if wanted. Typically, initialization trials are generated quasi-randomly. max_parallelism_override: Integer, with which to override the default max parallelism setting for all steps in the generation strategy returned from this function. Each generation step has a `max_parallelism` value, which restricts how many trials can run simultaneously during a given generation step. By default, the parallelism setting is chosen as appropriate for the model in a given generation step. If `max_parallelism_override` is -1, no max parallelism will be enforced for any step of the generation strategy. Be aware that parallelism is limited to improve performance of Bayesian optimization, so only disable its limiting if necessary. max_parallelism_cap: Integer cap on parallelism in this generation strategy. If specified, `max_parallelism` setting in each generation step will be set to the minimum of the default setting for that step and the value of this cap. `max_parallelism_cap` is meant to just be a hard limit on parallelism (e.g. to avoid overloading machine(s) that evaluate the experiment trials). Specify only if not specifying `max_parallelism_override`. use_saasbo: Whether to use SAAS prior for any GPEI generation steps. verbose: Whether GP model should produce verbose logs. If not ``None``, its value gets added to ``model_kwargs`` during ``generation_strategy`` construction. Defaults to ``True`` for SAASBO, else ``None``. Verbose outputs are currently only available for SAASBO, so if ``verbose is not None`` for a different model type, it will be overridden to ``None`` with a warning. experiment: If specified, `_experiment` attribute of the generation strategy will be set to this experiment (useful for associating a generation strategy with a given experiment before it's first used to ``gen`` with that experiment). """ suggested_model = _suggest_gp_model( search_space=search_space, num_trials=num_trials, optimization_config=optimization_config, use_saasbo=use_saasbo, ) if not no_bayesian_optimization and suggested_model is not None: if not enforce_sequential_optimization and ( # pragma: no cover max_parallelism_override or max_parallelism_cap): logger.info( "If `enforce_sequential_optimization` is False, max parallelism is " "not enforced and other max parallelism settings will be ignored." ) if max_parallelism_override and max_parallelism_cap: raise ValueError( "If `max_parallelism_override` specified, cannot also apply " "`max_parallelism_cap`.") # If number of initialization trials is not specified, estimate it. if num_initialization_trials is None: if use_batch_trials: # Batched trials. num_initialization_trials = 1 elif num_trials is not None: # 1-arm trials with specified `num_trials`. num_initialization_trials = max( 5, min( not_none(num_trials) // 5, 2 * len(search_space.tunable_parameters), ), ) else: # 1-arm trials. num_initialization_trials = max( 5, 2 * len(search_space.tunable_parameters)) # Determine max parallelism for the generation steps. if max_parallelism_override == -1: # `max_parallelism_override` of -1 means no max parallelism enforcement in # the generation strategy, which means `max_parallelism=None` in gen. steps. sobol_parallelism = bo_parallelism = None elif max_parallelism_override is not None: sobol_parallelism = bo_parallelism = max_parallelism_override elif max_parallelism_cap is not None: # Max parallelism override is None by now sobol_parallelism = max_parallelism_cap bo_parallelism = min(max_parallelism_cap, DEFAULT_BAYESIAN_PARALLELISM) elif not enforce_sequential_optimization: # If no max parallelism settings specified and not enforcing sequential # optimization, do not limit parallelism. sobol_parallelism = bo_parallelism = None else: # No additional max parallelism settings, use defaults sobol_parallelism = None # No restriction on Sobol phase bo_parallelism = DEFAULT_BAYESIAN_PARALLELISM # `verbose` default behavior and overrides model_is_saasbo = not_none(suggested_model).name in [ "FULLYBAYESIANMOO", "FULLYBAYESIAN", ] if verbose is None and model_is_saasbo: verbose = True elif verbose is not None and not model_is_saasbo: logger.warning( f"Overriding `verbose = {verbose}` to `None` for non-SAASBO GP step." ) verbose = None # create `generation_strategy` gs = GenerationStrategy(steps=[ _make_sobol_step( num_trials=num_initialization_trials, enforce_num_trials=enforce_sequential_optimization, seed=random_seed, max_parallelism=sobol_parallelism, should_deduplicate=should_deduplicate, ), _make_botorch_step( model=suggested_model, winsorize=winsorize_botorch_model, winsorization_limits=winsorization_limits, max_parallelism=bo_parallelism, should_deduplicate=should_deduplicate, verbose=verbose, ), ]) logger.info( f"Using Bayesian Optimization generation strategy: {gs}. Iterations after" f" {num_initialization_trials} will take longer to generate due to " " model-fitting.") else: if verbose is not None: logger.warning( f"Ignoring `verbose = {verbose}` for `generation_strategy` " "without a GP step.") gs = GenerationStrategy(steps=[ _make_sobol_step(seed=random_seed, should_deduplicate=should_deduplicate) ]) logger.info("Using Sobol generation strategy.") if experiment: gs.experiment = experiment return gs
def _get_frontier_evaluator(self) -> TFrontierEvaluator: return ( # pyre-ignore [16]: `TorchModel has no attribute `frontier_evaluator` not_none(self.model).frontier_evaluator if hasattr( self.model, "frontier_evaluator") else get_default_frontier_evaluator())