Ejemplo n.º 1
0
def simple_experiment_from_json(
        object_json: Dict[str, Any]) -> SimpleExperiment:
    """Load AE SimpleExperiment from JSON."""
    time_created_json = object_json.pop("time_created")
    trials_json = object_json.pop("trials")
    experiment_type_json = object_json.pop("experiment_type")
    data_by_trial_json = object_json.pop("data_by_trial")
    description_json = object_json.pop("description")
    is_test_json = object_json.pop("is_test")
    optimization_config = object_from_json(
        object_json.pop("optimization_config"))

    # not relevant to simple experiment
    del object_json["tracking_metrics"]
    del object_json["runner"]

    kwargs = {k: object_from_json(v) for k, v in object_json.items()}
    kwargs["evaluation_function"] = unimplemented_evaluation_function
    kwargs["objective_name"] = optimization_config.objective.metric.name
    kwargs["minimize"] = optimization_config.objective.minimize
    kwargs["outcome_constraints"] = optimization_config.outcome_constraints
    experiment = SimpleExperiment(**kwargs)

    experiment.description = object_from_json(description_json)
    experiment.is_test = object_from_json(is_test_json)
    experiment._time_created = object_from_json(time_created_json)
    experiment._trials = trials_from_json(experiment, trials_json)
    for trial in experiment._trials.values():
        for arm in trial.arms:
            experiment._register_arm(arm)
    if experiment.status_quo is not None:
        sq = not_none(experiment.status_quo)
        experiment._register_arm(sq)
    experiment._experiment_type = object_from_json(experiment_type_json)
    experiment._data_by_trial = data_from_json(data_by_trial_json)
    return experiment
Ejemplo n.º 2
0
    def get_optimization_trace(
        self, objective_optimum: Optional[float] = None
    ) -> AxPlotConfig:
        """Retrieves the plot configuration for optimization trace, which shows
        the evolution of the objective mean over iterations.

        Args:
            objective_optimum: Optimal objective, if known, for display in the
                visualization.
        """
        if not self.experiment.trials:
            raise ValueError("Cannot generate plot as there are no trials.")
        objective_name = self.experiment.optimization_config.objective.metric.name
        best_objectives = np.array(
            [
                [
                    checked_cast(Trial, trial).objective_mean
                    for trial in self.experiment.trials.values()
                ]
            ]
        )
        hover_labels = [
            _format_dict(not_none(checked_cast(Trial, trial).arm).parameters)
            for trial in self.experiment.trials.values()
        ]
        return optimization_trace_single_method(
            y=(
                np.minimum.accumulate(best_objectives, axis=1)
                if self.experiment.optimization_config.objective.minimize
                else np.maximum.accumulate(best_objectives, axis=1)
            ),
            optimum=objective_optimum,
            title="Model performance vs. # of iterations",
            ylabel=objective_name.capitalize(),
            hover_labels=hover_labels,
        )
Ejemplo n.º 3
0
    def _gen_new_generator_run(self, n: int = 1) -> GeneratorRun:
        """Generate new generator run for this experiment.

        Args:
            n: Number of arms to generate.
        """
        new_data = self._get_new_data()
        # If random seed is not set for this optimization, context manager does
        # nothing; otherwise, it sets the random seed for torch, but only for the
        # scope of this call. This is important because torch seed is set globally,
        # so if we just set the seed without the context manager, it can have
        # serious negative impact on the performance of the models that employ
        # stochasticity.
        with manual_seed(seed=self._random_seed) and warnings.catch_warnings():
            # Filter out GPYTorch warnings to avoid confusing users.
            warnings.simplefilter("ignore")
            return not_none(self.generation_strategy).gen(
                experiment=self.experiment,
                new_data=new_data,
                n=n,
                pending_observations=get_pending_observation_features(
                    experiment=self.experiment
                ),
            )
Ejemplo n.º 4
0
    def __init__(
        self,
        surrogate: Surrogate,
        bounds: List[Tuple[float, float]],
        objective_weights: Tensor,
        objective_thresholds: Optional[Tensor],
        botorch_acqf_class: Optional[Type[AcquisitionFunction]] = None,
        options: Optional[Dict[str, Any]] = None,
        pending_observations: Optional[List[Tensor]] = None,
        outcome_constraints: Optional[Tuple[Tensor, Tensor]] = None,
        linear_constraints: Optional[Tuple[Tensor, Tensor]] = None,
        fixed_features: Optional[Dict[int, float]] = None,
        target_fidelities: Optional[Dict[int, float]] = None,
    ) -> None:
        botorch_acqf_class = not_none(
            botorch_acqf_class or self.default_botorch_acqf_class
        )
        if not issubclass(botorch_acqf_class, qExpectedHypervolumeImprovement):
            raise UnsupportedError(
                "Only qExpectedHypervolumeImprovement is currently supported as "
                f"a MOOAcquisition botorch_acqf_class. Got: {botorch_acqf_class}."
            )

        super().__init__(
            surrogate=surrogate,
            botorch_acqf_class=botorch_acqf_class,
            bounds=bounds,
            objective_weights=objective_weights,
            objective_thresholds=objective_thresholds,
            outcome_constraints=outcome_constraints,
            linear_constraints=linear_constraints,
            fixed_features=fixed_features,
            pending_observations=pending_observations,
            target_fidelities=target_fidelities,
            options=options,
        )
Ejemplo n.º 5
0
    def get_model_predictions(
        self,
        metric_names: Optional[List[str]] = None
    ) -> Dict[int, Dict[str, Tuple[float, float]]]:
        """Retrieve model-estimated means and covariances for all metrics.
        Note: this function retrieves the predictions for the 'in-sample' arms,
        which means that the return mapping on this function will only contain
        predictions for trials that have been completed with data.

        Args:
            metric_names: Names of the metrics, for which to retrieve predictions.
                All metrics on experiment will be retrieved if this argument was
                not specified.

        Returns:
            A mapping from trial index to a mapping of metric names to tuples
            of predicted metric mean and SEM, of form:
            { trial_index -> { metric_name: ( mean, SEM ) } }.
        """
        if self.generation_strategy.model is None:  # pragma: no cover
            raise ValueError("No model has been instantiated yet.")
        if metric_names is None and self.experiment.metrics is None:
            raise ValueError(  # pragma: no cover
                "No metrics to retrieve specified on the experiment or as "
                "argument to `get_model_predictions`.")
        arm_info, _, _ = _get_in_sample_arms(
            model=not_none(self.generation_strategy.model),
            metric_names=set(metric_names) if metric_names is not None else
            set(not_none(self.experiment.metrics).keys()),
        )
        trials = checked_cast_dict(int, Trial, self.experiment.trials)

        return {
            trial_index: {
                m: (
                    arm_info[not_none(trials[trial_index].arm).name].y_hat[m],
                    arm_info[not_none(trials[trial_index].arm).name].se_hat[m],
                )
                for m in arm_info[not_none(trials[trial_index].arm).name].y_hat
            }
            for trial_index in trials
            if not_none(trials[trial_index].arm).name in arm_info
        }
Ejemplo n.º 6
0
    def __init__(
        self,
        surrogate: Surrogate,
        bounds: List[Tuple[float, float]],
        objective_weights: Tensor,
        botorch_acqf_class: Optional[Type[AcquisitionFunction]] = None,
        options: Optional[Dict[str, Any]] = None,
        pending_observations: Optional[List[Tensor]] = None,
        outcome_constraints: Optional[Tuple[Tensor, Tensor]] = None,
        linear_constraints: Optional[Tuple[Tensor, Tensor]] = None,
        fixed_features: Optional[Dict[int, float]] = None,
        target_fidelities: Optional[Dict[int, float]] = None,
    ) -> None:
        if not botorch_acqf_class and not self.default_botorch_acqf_class:
            raise ValueError(
                f"Acquisition class {self.__class__} does not specify a default "
                "BoTorch `AcquisitionFunction`, so `botorch_acqf_class` "
                "argument must be specified.")
        self._botorch_acqf_class = not_none(botorch_acqf_class
                                            or self.default_botorch_acqf_class)
        self.surrogate = surrogate
        self.options = options or {}
        trd = self._extract_training_data(surrogate=surrogate)
        Xs = (
            # Assumes 1-D objective_weights, which should be safe.
            [trd.X for o in range(objective_weights.shape[0])] if isinstance(
                trd, TrainingData) else [i.X for i in trd.values()])
        X_pending, X_observed = _get_X_pending_and_observed(
            Xs=Xs,
            pending_observations=pending_observations,
            objective_weights=objective_weights,
            outcome_constraints=outcome_constraints,
            bounds=bounds,
            linear_constraints=linear_constraints,
            fixed_features=fixed_features,
        )

        # Subset model only to the outcomes we need for the optimization.
        if self.options.get(Keys.SUBSET_MODEL, True):
            model, objective_weights, outcome_constraints, _ = subset_model(
                self.surrogate.model,
                objective_weights=objective_weights,
                outcome_constraints=outcome_constraints,
            )
        else:
            model = self.surrogate.model

        objective = self._get_botorch_objective(
            model=model,
            objective_weights=objective_weights,
            outcome_constraints=outcome_constraints,
            X_observed=X_observed,
        )
        model_deps = self.compute_model_dependencies(
            surrogate=surrogate,
            bounds=bounds,
            objective_weights=objective_weights,
            pending_observations=pending_observations,
            outcome_constraints=outcome_constraints,
            linear_constraints=linear_constraints,
            fixed_features=fixed_features,
            target_fidelities=target_fidelities,
            options=self.options,
        )
        X_baseline = X_observed
        overriden_X_baseline = model_deps.get(Keys.X_BASELINE)
        if overriden_X_baseline is not None:
            X_baseline = overriden_X_baseline
            model_deps.pop(Keys.X_BASELINE)
        self.acqf = self._botorch_acqf_class(  # pyre-ignore[28]: Some kwargs are
            # not expected in base `AcquisitionFunction` but are expected in
            # its subclasses.
            model=model,
            objective=objective,
            X_pending=X_pending,
            X_baseline=X_baseline,
            **self.options,
            **model_deps,
        )
Ejemplo n.º 7
0
 def experiment(self) -> Experiment:
     """Experiment, currently set on this generation strategy."""
     if self._experiment is None:  # pragma: no cover
         raise ValueError("No experiment set on generation strategy.")
     return not_none(self._experiment)
Ejemplo n.º 8
0
 def _get_model_state(self) -> Dict[str, Any]:
     """Obtains the state of the underlying model if using a stateful one."""
     return not_none(self.model)._get_state()
Ejemplo n.º 9
0
    def update_trial_data(
        self,
        trial_index: int,
        raw_data: TEvaluationOutcome,
        metadata: Optional[Dict[str, Union[str, int]]] = None,
        sample_size: Optional[int] = None,
    ) -> None:
        """
        Attaches additional data for completed trial (for example, if trial was
        completed with data for only one of the required metrics and more data
        needs to be attached).

        Args:
            trial_index: Index of trial within the experiment.
            raw_data: Evaluation data for the trial. Can be a mapping from
                metric name to a tuple of mean and SEM, just a tuple of mean and
                SEM if only one metric in optimization, or just the mean if there
                is no SEM.  Can also be a list of (fidelities, mapping from
                metric name to a tuple of mean and SEM).
            metadata: Additional metadata to track about this run.
            sample_size: Number of samples collected for the underlying arm,
                optional.
        """
        assert isinstance(
            trial_index, int
        ), f"Trial index must be an int, got: {trial_index}."  # pragma: no cover
        trial = self._get_trial(trial_index=trial_index)
        if not trial.status.is_completed:
            raise ValueError(
                f"Trial {trial.index} has not yet been completed with data."
                "To complete it, use `ax_client.complete_trial`."
            )
        sample_sizes = {not_none(trial.arm).name: sample_size} if sample_size else {}
        evaluations, data = self._make_evaluations_and_data(
            trial=trial, raw_data=raw_data, metadata=metadata, sample_sizes=sample_sizes
        )
        trial._run_metadata.update(metadata or {})
        for metric_name in data.df["metric_name"].values:
            if metric_name not in self.experiment.metrics:
                logger.info(
                    f"Data was logged for metric {metric_name} that was not yet "
                    "tracked on the experiment. Adding it as tracking metric."
                )
                self.experiment.add_tracking_metric(Metric(name=metric_name))
        # Registering trial data update is needed for generation strategies that
        # leverage the `update` functionality of model and bridge setup and therefore
        # need to be aware of new data added to experiment. Usually this happends
        # seamlessly, by looking at newly completed trials, but in this case trial
        # status does not change, so we manually register the new data.
        # Currently this call will only result in a `NotImplementedError` if generation
        # strategy uses `update` (`GenerationStep.use_update` is False by default).
        self.generation_strategy._register_trial_data_update(trial=trial, data=data)
        self.experiment.attach_data(data, combine_with_last_data=True)
        data_for_logging = _round_floats_for_logging(
            item=evaluations[next(iter(evaluations.keys()))]
        )
        logger.info(
            f"Added data: {_round_floats_for_logging(item=data_for_logging)} "
            f"to trial {trial.index}."
        )
        self._save_experiment_to_db_if_possible(
            experiment=self.experiment,
            suppress_all_errors=self._suppress_storage_errors,
        )
Ejemplo n.º 10
0
def _suggest_gp_model(
    search_space: SearchSpace,
    num_trials: Optional[int] = None,
    optimization_config: Optional[OptimizationConfig] = None,
    use_saasbo: bool = False,
) -> Union[None, Models]:
    """Suggest a model based on the search space. None means we use Sobol.

    1. We use Sobol if the number of total iterations in the optimization is
    known in advance and there are fewer distinct points in the search space
    than the known intended number of total iterations.
    2. We use ``BO_MIXED`` if there are fewer ordered parameters in the search space
    than the sum of options for the *unordered* choice parameters, and the number
    of discrete enumerations to be performed by the optimizer is less than
    ``MAX_DISCRETE_ENUMERATIONS_MIXED``, or if there are only choice parameters and
    the number of choice combinations to enumerate is less than
    ``MAX_DISCRETE_ENUMERATIONS_CHOICE_ONLY``. ``BO_MIXED`` is not currently enabled
    for multi-objective optimization.
    3. We use ``MOO`` if ``optimization_config`` has multiple objectives and
    ``use_saasbo is False``.
    4. We use ``FULLYBAYESIANMOO`` if ``optimization_config`` has multiple objectives
    and `use_saasbo is True`.
    5. If none of the above and ``use_saasbo is False``, we use ``GPEI``.
    6. If none of the above and ``use_saasbo is True``, we use ``FULLYBAYESIAN``.
    """
    num_ordered_parameters = num_unordered_choices = 0
    num_enumerated_combinations = num_possible_points = 1
    all_range_parameters_are_discrete = True
    all_parameters_are_enumerated = True
    for parameter in search_space.tunable_parameters.values():
        should_enumerate_param = None
        num_param_discrete_values = None
        if isinstance(parameter, ChoiceParameter):
            num_param_discrete_values = len(parameter.values)
            num_possible_points *= num_param_discrete_values
            if parameter.is_ordered is False:
                num_unordered_choices += num_param_discrete_values
                should_enumerate_param = True
            else:
                num_ordered_parameters += 1
                should_enumerate_param = True
        elif isinstance(parameter, RangeParameter):
            num_ordered_parameters += 1
            if parameter.parameter_type == ParameterType.FLOAT:
                all_range_parameters_are_discrete = False
                should_enumerate_param = False
            else:
                num_param_discrete_values = int(parameter.upper -
                                                parameter.lower) + 1
                num_possible_points *= num_param_discrete_values
                should_enumerate_param = False

        if not_none(should_enumerate_param):
            num_enumerated_combinations *= not_none(num_param_discrete_values)
        else:
            all_parameters_are_enumerated = False

    # If number of trials is known and sufficient to try all possible points,
    # we should use Sobol and not BO
    if (num_trials is not None and all_range_parameters_are_discrete
            and num_possible_points <= num_trials):
        logger.info("Using Sobol since we can enumerate the search space.")
        if use_saasbo:
            logger.warn(SAASBO_INCOMPATIBLE_MESSAGE.format("Sobol"))
        return None

    is_moo_problem = optimization_config and optimization_config.is_moo_problem
    if num_ordered_parameters > num_unordered_choices:
        logger.info(
            "Using Bayesian optimization since there are more ordered "
            "parameters than there are categories for the unordered categorical "
            "parameters.")
        if is_moo_problem and use_saasbo:
            return Models.FULLYBAYESIANMOO
        if is_moo_problem and not use_saasbo:
            return Models.MOO
        if use_saasbo:
            return Models.FULLYBAYESIAN
        return Models.GPEI
    # The latter condition below is tied to the logic in `BO_MIXED`, which currently
    # enumerates all combinations of choice parameters.
    if not is_moo_problem and (
            num_enumerated_combinations <= MAX_DISCRETE_ENUMERATIONS_MIXED or
        (all_parameters_are_enumerated and num_enumerated_combinations <
         MAX_DISCRETE_ENUMERATIONS_NO_CONTINUOUS_OPTIMIZATION)):
        logger.info(
            "Using Bayesian optimization with a categorical kernel for improved "
            "performance with a large number of unordered categorical parameters."
        )
        if use_saasbo:
            logger.warn(SAASBO_INCOMPATIBLE_MESSAGE.format("`BO_MIXED`"))
        return Models.BO_MIXED
    logger.info(
        f"Using Sobol since there are more than {MAX_DISCRETE_ENUMERATIONS_MIXED} "
        "combinations of enumerated parameters. For improved performance, make sure "
        "that all ordered `ChoiceParameter`s are encoded as such (`is_ordered=True`), "
        "and use `RangeParameter`s in place of ordered `ChoiceParameter`s where "
        "possible. Also, consider removing some or all unordered `ChoiceParameter`s."
    )
    if use_saasbo:
        logger.warn(SAASBO_INCOMPATIBLE_MESSAGE.format("Sobol"))

    return None
Ejemplo n.º 11
0
 def _model_predict(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
     if not self.model:  # pragma: no cover
         raise ValueError(FIT_MODEL_ERROR.format(action="_model_predict"))
     f, var = not_none(self.model).predict(X=self._array_to_tensor(X))
     return f.detach().cpu().clone().numpy(), var.detach().cpu().clone(
     ).numpy()
Ejemplo n.º 12
0
    def get_contour_plot(
        self,
        param_x: Optional[str] = None,
        param_y: Optional[str] = None,
        metric_name: Optional[str] = None,
    ) -> AxPlotConfig:
        """Retrieves a plot configuration for a contour plot of the response
        surface. For response surfaces with more than two parameters,
        selected two parameters will appear on the axes, and remaining parameters
        will be affixed to the middle of their range. If contour params arguments
        are not provided, the first two parameters in the search space will be
        used. If contour metrics are not provided, objective will be used.

        Args:
            param_x: name of parameters to use on x-axis for
                the contour response surface plots.
            param_y: name of parameters to use on y-axis for
                the contour response surface plots.
            metric_name: Name of the metric, for which to plot the response
                surface.
        """
        if not self.experiment.trials:
            raise ValueError("Cannot generate plot as there are no trials.")
        if len(self.experiment.parameters) < 2:
            raise ValueError(
                "Cannot create a contour plot as experiment has less than 2 "
                "parameters, but a contour-related argument was provided."
            )
        if (param_x or param_y) and not (param_x and param_y):
            raise ValueError(
                "If `param_x` is provided, `param_y` is "
                "required as well, and vice-versa."
            )
        objective_name = self.objective_name
        if not metric_name:
            metric_name = objective_name

        if not param_x or not param_y:
            parameter_names = list(self.experiment.parameters.keys())
            param_x = parameter_names[0]
            param_y = parameter_names[1]

        if param_x not in self.experiment.parameters:
            raise ValueError(
                f'Parameter "{param_x}" not found in the optimization search space.'
            )
        if param_y not in self.experiment.parameters:
            raise ValueError(
                f'Parameter "{param_y}" not found in the optimization search space.'
            )
        if metric_name not in self.experiment.metrics:
            raise ValueError(
                f'Metric "{metric_name}" is not associated with this optimization.'
            )
        if self.generation_strategy.model is not None:
            try:
                logger.info(
                    f"Retrieving contour plot with parameter '{param_x}' on X-axis "
                    f"and '{param_y}' on Y-axis, for metric '{metric_name}'. "
                    "Ramaining parameters are affixed to the middle of their range."
                )
                return plot_contour(
                    model=not_none(self.generation_strategy.model),
                    param_x=param_x,
                    param_y=param_y,
                    metric_name=metric_name,
                )

            except NotImplementedError:
                # Some models don't implement '_predict', which is needed
                # for the contour plots.
                logger.info(
                    f"Model {self.generation_strategy.model} does not implement "
                    "`predict`, so it cannot be used to generate a response "
                    "surface plot."
                )
        raise ValueError(
            f'Could not obtain contour plot of "{metric_name}" for parameters '
            f'"{param_x}" and "{param_y}", as a model with predictive ability, '
            "such as a Gaussian Process, has not yet been trained in the course "
            "of this optimization."
        )
Ejemplo n.º 13
0
 def training_data(self) -> TrainingData:
     if self._training_data is None:
         raise ValueError(NOT_YET_FIT_MSG)
     return not_none(self._training_data)
Ejemplo n.º 14
0
def _get_in_sample_arms(
    model: ModelBridge,
    metric_names: Set[str],
    fixed_features: Optional[ObservationFeatures] = None,
) -> Tuple[Dict[str, PlotInSampleArm], RawData, Dict[str, TParameterization]]:
    """Get in-sample arms from a model with observed and predicted values
    for specified metrics.

    Returns a PlotInSampleArm object in which repeated observations are merged
    with IVW, and a RawData object in which every observation is listed.

    Fixed features input can be used to override fields of the insample arms
    when making model predictions.

    Args:
        model: An instance of the model bridge.
        metric_names: Restrict predictions to these metrics. If None, uses all
            metrics in the model.
        fixed_features: Features that should be fixed in the arms this function
            will obtain predictions for.

    Returns:
        A tuple containing

        - Map from arm name to PlotInSampleArm.
        - List of the data for each observation like::

            {'metric_name': 'likes', 'arm_name': '0_0', 'mean': 1., 'sem': 0.1}

        - Map from arm name to parameters
    """
    observations = model.get_training_data()
    # Calculate raw data
    raw_data = []
    arm_name_to_parameters = {}
    for obs in observations:
        arm_name_to_parameters[obs.arm_name] = obs.features.parameters
        for j, metric_name in enumerate(obs.data.metric_names):
            if metric_name in metric_names:
                raw_data.append({
                    "metric_name": metric_name,
                    "arm_name": obs.arm_name,
                    "mean": obs.data.means[j],
                    "sem": np.sqrt(obs.data.covariance[j, j]),
                })

    # Check that we have one ObservationFeatures per arm name since we
    # key by arm name and the model is not Multi-task.
    # If "TrialAsTask" is present, one of the arms is also chosen.
    if ("TrialAsTask" not in model.transforms.keys()) and (
            len(arm_name_to_parameters) != len(observations)):
        logger.error(
            "Have observations of arms with different features but same"
            " name. Arbitrary one will be plotted.")

    # Merge multiple measurements within each Observation with IVW to get
    # un-modeled prediction
    t = IVW(None, [], [])
    obs_data = t.transform_observation_data([obs.data for obs in observations],
                                            [])
    # Start filling in plot data
    in_sample_plot: Dict[str, PlotInSampleArm] = {}
    for i, obs in enumerate(observations):
        if obs.arm_name is None:
            raise ValueError("Observation must have arm name for plotting.")

        # Extract raw measurement
        obs_y = {}  # Observed metric means.
        obs_se = {}  # Observed metric standard errors.
        # Use the IVW data, not obs.data
        for j, metric_name in enumerate(obs_data[i].metric_names):
            if metric_name in metric_names:
                obs_y[metric_name] = obs_data[i].means[j]
                obs_se[metric_name] = np.sqrt(obs_data[i].covariance[j, j])
        # Make a prediction.
        if model.training_in_design[i]:
            features = obs.features
            if fixed_features is not None:
                features.update_features(fixed_features)
            pred_y, pred_se = _predict_at_point(model, features, metric_names)
        else:
            # Use raw data for out-of-design points
            pred_y = obs_y
            pred_se = obs_se
        in_sample_plot[not_none(obs.arm_name)] = PlotInSampleArm(
            name=not_none(obs.arm_name),
            y=obs_y,
            se=obs_se,
            parameters=obs.features.parameters,
            y_hat=pred_y,
            se_hat=pred_se,
            context_stratum=None,
        )
    return in_sample_plot, raw_data, arm_name_to_parameters
Ejemplo n.º 15
0
    def create_experiment(
        self,
        parameters: List[Dict[str, Union[TParamValue, List[TParamValue]]]],
        name: Optional[str] = None,
        objective_name: Optional[str] = None,
        minimize: bool = False,
        parameter_constraints: Optional[List[str]] = None,
        outcome_constraints: Optional[List[str]] = None,
        status_quo: Optional[TParameterization] = None,
        overwrite_existing_experiment: bool = False,
        experiment_type: Optional[str] = None,
        choose_generation_strategy_kwargs: Optional[Dict[str, Any]] = None,
    ) -> None:
        """Create a new experiment and save it if DBSettings available.

        Args:
            parameters: List of dictionaries representing parameters in the
                experiment search space. Required elements in the dictionaries
                are: "name" (name of this parameter, string), "type" (type of the
                parameter: "range", "fixed", or "choice", string), and "bounds"
                for range parameters (list of two values, lower bound first),
                "values" for choice parameters (list of values), and "value" for
                fixed parameters (single value).
            objective: Name of the metric used as objective in this experiment.
                This metric must be present in `raw_data` argument to `complete_trial`.
            name: Name of the experiment to be created.
            minimize: Whether this experiment represents a minimization problem.
            parameter_constraints: List of string representation of parameter
                constraints, such as "x3 >= x4" or "-x3 + 2*x4 - 3.5*x5 >= 2". For
                the latter constraints, any number of arguments is accepted, and
                acceptable operators are "<=" and ">=".
            outcome_constraints: List of string representation of outcome
                constraints of form "metric_name >= bound", like "m1 <= 3."
            status_quo: Parameterization of the current state of the system.
                If set, this will be added to each trial to be evaluated alongside
                test configurations.
            overwrite_existing_experiment: If an experiment has already been set
                on this `AxClient` instance, whether to reset it to the new one.
                If overwriting the experiment, generation strategy will be
                re-selected for the new experiment and restarted.
                To protect experiments in production, one cannot overwrite existing
                experiments if the experiment is already stored in the database,
                regardless of the value of `overwrite_existing_experiment`.
            choose_generation_strategy_kwargs: Keyword arguments to pass to
                `choose_generation_strategy` function which determines what
                generation strategy should be used when none was specified on init.
        """
        if self.db_settings_set and not name:
            raise ValueError(  # pragma: no cover
                "Must give the experiment a name if `db_settings` is not None."
            )
        if self.db_settings_set:
            experiment_id, _ = self._get_experiment_and_generation_strategy_db_id(
                experiment_name=not_none(name)
            )
            if experiment_id:
                raise ValueError(
                    f"Experiment {name} already exists in the database. "
                    "To protect experiments that are running in production, "
                    "overwriting stored experiments is not allowed. To "
                    "start a new experiment and store it, change the "
                    "experiment's name."
                )
        if self._experiment is not None:
            if overwrite_existing_experiment:
                exp_name = self.experiment._name or "untitled"
                new_exp_name = name or "untitled"
                logger.info(
                    f"Overwriting existing experiment ({exp_name}) on this client "
                    f"with new experiment ({new_exp_name}) and restarting the "
                    "generation strategy."
                )
                self._generation_strategy = None
            else:
                raise ValueError(
                    "Experiment already created for this client instance. "
                    "Set the `overwrite_existing_experiment` to `True` to overwrite "
                    "with new experiment."
                )

        self._experiment = make_experiment(
            name=name,
            parameters=parameters,
            objective_name=objective_name,
            minimize=minimize,
            parameter_constraints=parameter_constraints,
            outcome_constraints=outcome_constraints,
            status_quo=status_quo,
            experiment_type=experiment_type,
        )

        try:
            self._save_experiment_to_db_if_possible(
                experiment=self.experiment,
                suppress_all_errors=self._suppress_storage_errors,
            )
        except Exception:
            # Unset the experiment on this `AxClient` instance if encountered and
            # raising an error from saving the experiment, to avoid a case where
            # overall `create_experiment` call fails with a storage error, but
            # `self._experiment` is still set and user has to specify the
            # `ooverwrite_existing_experiment` kwarg to re-attempt exp. creation.
            self._experiment = None
            raise

        self._set_generation_strategy(
            choose_generation_strategy_kwargs=choose_generation_strategy_kwargs
        )
        self._save_generation_strategy_to_db_if_possible(
            generation_strategy=self.generation_strategy,
            suppress_all_errors=self._suppress_storage_errors,
        )
Ejemplo n.º 16
0
    def __call__(
        self,
        search_space: Optional[SearchSpace] = None,
        experiment: Optional[Experiment] = None,
        data: Optional[Data] = None,
        silently_filter_kwargs: bool = True,  # TODO[Lena]: default to False
        **kwargs: Any,
    ) -> ModelBridge:
        assert self.value in MODEL_KEY_TO_MODEL_SETUP
        # All model bridges require either a search space or an experiment.
        assert search_space or experiment, "Search space or experiment required."
        model_setup_info = MODEL_KEY_TO_MODEL_SETUP[self.value]
        model_class = model_setup_info.model_class
        bridge_class = model_setup_info.bridge_class
        if not silently_filter_kwargs:
            validate_kwarg_typing(  # TODO[Lena]: T46467254, pragma: no cover
                typed_callables=[model_class, bridge_class],
                search_space=search_space,
                experiment=experiment,
                data=data,
                **kwargs,
            )

        # Create model with consolidated arguments: defaults + passed in kwargs.
        model_kwargs = consolidate_kwargs(
            kwargs_iterable=[get_function_default_arguments(model_class), kwargs],
            keywords=get_function_argument_names(model_class),
        )
        model = model_class(**model_kwargs)

        # Create `ModelBridge`: defaults + standard kwargs + passed in kwargs.
        bridge_kwargs = consolidate_kwargs(
            kwargs_iterable=[
                get_function_default_arguments(bridge_class),
                model_setup_info.standard_bridge_kwargs,
                {"transforms": model_setup_info.transforms},
                kwargs,
            ],
            keywords=get_function_argument_names(
                function=bridge_class, omit=["experiment", "search_space", "data"]
            ),
        )

        # Create model bridge with the consolidated kwargs.
        model_bridge = bridge_class(
            search_space=search_space or not_none(experiment).search_space,
            experiment=experiment,
            data=data,
            model=model,
            **bridge_kwargs,
        )

        # Temporarily ignore Botorch callable & torch-typed arguments, as those
        # are not serializable to JSON out-of-the-box. TODO[Lena]: T46527142
        if isinstance(model, TorchModel):
            model_kwargs = {kw: p for kw, p in model_kwargs.items() if not callable(p)}
            bridge_kwargs = {
                kw: p for kw, p in bridge_kwargs.items() if kw[:5] != "torch"
            }

        # Store all kwargs on model bridge, to be saved on generator run.
        model_bridge._set_kwargs_to_save(
            model_key=self.value, model_kwargs=model_kwargs, bridge_kwargs=bridge_kwargs
        )
        return model_bridge
Ejemplo n.º 17
0
 def db_settings(self) -> DBSettings:
     """DB settings set on this instance; guaranteed to be non-None."""
     if self._db_settings is None:
         raise ValueError("No DB settings are set on this instance.")
     return not_none(self._db_settings)
Ejemplo n.º 18
0
    def complete_trial(
        self,
        trial_index: int,
        # acceptable `raw_data` argument formats:
        # 1) {metric_name -> (mean, standard error)}
        # 2) (mean, standard error) and we assume metric name == objective name
        # 3) only the mean, and we assume metric name == objective name and
        #    standard error == 0
        raw_data: TEvaluationOutcome,
        metadata: Optional[Dict[str, str]] = None,
    ) -> None:
        """
        Completes the trial with given metric values and adds optional metadata
        to it.

        Args:
            trial_index: Index of trial within the experiment.
            raw_data: Evaluation data for the trial. Can be a mapping from
                metric name to a tuple of mean and SEM, just a tuple of mean and
                SEM if only one metric in optimization, or just the mean if there
                is no SEM.
            metadata: Additional metadata to track about this run.
        """
        assert isinstance(
            trial_index, int
        ), f"Trial index must be an int, got: {trial_index}."  # pragma: no cover
        trial = self.experiment.trials[trial_index]
        if not isinstance(trial, Trial):
            raise NotImplementedError(
                "Batch trial functionality is not yet available through Service API."
            )

        trial._status = TrialStatus.COMPLETED
        if metadata is not None:
            trial._run_metadata = metadata

        if isinstance(raw_data, dict):
            evaluations = {not_none(trial.arm).name: raw_data}
        elif isinstance(raw_data, tuple):
            evaluations = {
                not_none(trial.arm).name: {
                    self.experiment.optimization_config.objective.metric.name:
                    raw_data
                }
            }
        elif isinstance(raw_data, float) or isinstance(raw_data, int):
            evaluations = {
                not_none(trial.arm).name: {
                    self.experiment.optimization_config.objective.metric.name:
                    (
                        raw_data,
                        0.0,
                    )
                }
            }
        else:
            raise ValueError(
                "Raw data has an invalid type. The data must either be in the form "
                "of a dictionary of metric names to mean, sem tuples, "
                "or a single mean, sem tuple, or a single mean.")

        data = Data.from_evaluations(evaluations, trial.index)
        self.experiment.attach_data(data)
        self._updated_trials.append(trial_index)
        self._save_experiment_if_possible()
Ejemplo n.º 19
0
    def generator_run_from_sqa(self,
                               generator_run_sqa: SQAGeneratorRun,
                               reduced_state: bool = False) -> GeneratorRun:
        """Convert SQLAlchemy GeneratorRun to Ax GeneratorRun.

        Args:
            generator_run_sqa: `SQAGeneratorRun` to decode.
            reduced_state: Whether to load generator runs with a slightly reduced state
            (without model state, search space, and optimization config).
        """
        arms = []
        weights = []
        opt_config = None
        search_space = None

        for arm_sqa in generator_run_sqa.arms:
            arms.append(self.arm_from_sqa(arm_sqa=arm_sqa))
            weights.append(arm_sqa.weight)

        if not reduced_state:
            (
                opt_config,
                tracking_metrics,
            ) = self.opt_config_and_tracking_metrics_from_sqa(
                metrics_sqa=generator_run_sqa.metrics)
            if len(tracking_metrics) > 0:
                raise SQADecodeError(  # pragma: no cover
                    "GeneratorRun should not have tracking metrics.")

            search_space = self.search_space_from_sqa(
                parameters_sqa=generator_run_sqa.parameters,
                parameter_constraints_sqa=generator_run_sqa.
                parameter_constraints,
            )

        best_arm_predictions = None
        model_predictions = None
        if (generator_run_sqa.best_arm_parameters is not None
                and generator_run_sqa.best_arm_predictions is not None):
            best_arm = Arm(
                name=generator_run_sqa.best_arm_name,
                parameters=not_none(generator_run_sqa.best_arm_parameters),
            )
            best_arm_predictions = (
                best_arm,
                tuple(not_none(generator_run_sqa.best_arm_predictions)),
            )
        model_predictions = (
            tuple(not_none(generator_run_sqa.model_predictions))
            if generator_run_sqa.model_predictions is not None else None)

        generator_run = GeneratorRun(
            arms=arms,
            weights=weights,
            optimization_config=opt_config,
            search_space=search_space,
            fit_time=generator_run_sqa.fit_time,
            gen_time=generator_run_sqa.gen_time,
            best_arm_predictions=best_arm_predictions,  # pyre-ignore[6]
            model_predictions=model_predictions,
            model_key=generator_run_sqa.model_key,
            model_kwargs=None if reduced_state else object_from_json(
                generator_run_sqa.model_kwargs),
            bridge_kwargs=None if reduced_state else object_from_json(
                generator_run_sqa.bridge_kwargs),
            gen_metadata=None if reduced_state else object_from_json(
                generator_run_sqa.gen_metadata),
            model_state_after_gen=None if reduced_state else object_from_json(
                generator_run_sqa.model_state_after_gen),
            generation_step_index=generator_run_sqa.generation_step_index,
            candidate_metadata_by_arm_signature=object_from_json(
                generator_run_sqa.candidate_metadata_by_arm_signature),
        )
        generator_run._time_created = generator_run_sqa.time_created
        generator_run._generator_run_type = self.get_enum_name(
            value=generator_run_sqa.generator_run_type,
            enum=self.config.generator_run_type_enum,
        )
        generator_run._index = generator_run_sqa.index
        generator_run.db_id = generator_run_sqa.id
        return generator_run
Ejemplo n.º 20
0
def _observations_from_dataframe(
    experiment: Experiment,
    df: pd.DataFrame,
    cols: List[str],
    arm_name_only: bool,
    map_keys: Iterable[str],
    include_abandoned: bool,
    map_keys_as_parameters: bool = False,
) -> List[Observation]:
    """Helper method for extracting observations grouped by `cols` from `df`.

    Args:
        experiment: Experiment with arm parameters.
        df: DataFrame derived from experiment Data.
        cols: columns used to group data into different observations.
        map_keys: columns that map dict-like Data
            e.g. `timestamp` in timeseries data, `epoch` in ML training traces.
        include_abandoned: Whether data for abandoned trials and arms should
            be included in the observations, returned from this function.
        map_keys_as_parameters: Whether map_keys should be returned as part of
            the parameters of the Observation objects.

    Returns:
        List of Observation objects.
    """
    observations = []
    abandoned_arms_dict = {}
    for g, d in df.groupby(by=cols):
        obs_kwargs = {}
        if arm_name_only:
            features = {"arm_name": g}
            arm_name = g
            trial_index = None
        else:
            features = dict(zip(cols, g))
            arm_name = features["arm_name"]
            trial_index = features.get("trial_index", None)

        if trial_index is not None:
            trial = experiment.trials[trial_index]
            metadata = trial._get_candidate_metadata(arm_name) or {}
            if Keys.TRIAL_COMPLETION_TIMESTAMP not in metadata:
                if trial._time_completed is not None:
                    metadata[Keys.TRIAL_COMPLETION_TIMESTAMP] = not_none(
                        trial._time_completed).timestamp()
            obs_kwargs[Keys.METADATA] = metadata

            if not include_abandoned and trial.status.is_abandoned:
                # Exclude abandoned trials.
                continue

            if not include_abandoned and isinstance(trial, BatchTrial):
                # Exclude abandoned arms from batch trial's observations.
                if trial.index not in abandoned_arms_dict:
                    # Same abandoned arm names to dict to avoid recomputing them
                    # on creation of every observation.
                    abandoned_arms_dict[
                        trial.index] = trial.abandoned_arm_names
                if arm_name in abandoned_arms_dict[trial.index]:
                    continue

        obs_parameters = experiment.arms_by_name[arm_name].parameters.copy()
        if obs_parameters:
            obs_kwargs["parameters"] = obs_parameters
        for f, val in features.items():
            if f in OBS_KWARGS:
                obs_kwargs[f] = val
        fidelities = features.get("fidelities")
        if fidelities is not None:
            obs_parameters.update(json.loads(fidelities))

        for map_key in map_keys:
            if map_key in obs_parameters or map_keys_as_parameters:
                obs_parameters[map_key] = features[map_key]
            else:
                obs_kwargs[Keys.METADATA][map_key] = features[map_key]
        observations.append(
            Observation(
                features=ObservationFeatures(**obs_kwargs),
                data=ObservationData(
                    metric_names=d["metric_name"].tolist(),
                    means=d["mean"].values,
                    covariance=np.diag(d["sem"].values**2),
                ),
                arm_name=arm_name,
            ))
    return observations
Ejemplo n.º 21
0
    def gen(
        self,
        n: int,
        bounds: List,
        objective_weights: Tensor,
        outcome_constraints: Optional[Tuple[Tensor, Tensor]] = None,
        linear_constraints: Optional[Tuple[Tensor, Tensor]] = None,
        fixed_features: Optional[Dict[int, float]] = None,
        pending_observations: Optional[List[Tensor]] = None,
        model_gen_options: Optional[TConfig] = None,
        rounding_func: Optional[Callable[[Tensor], Tensor]] = None,
        target_fidelities: Optional[Dict[int, float]] = None,
    ) -> Tuple[Tensor, Tensor, TGenMetadata,
               Optional[List[TCandidateMetadata]]]:
        r"""Generate new candidates.

        Args:
            n: Number of candidates to generate.
            bounds: A list of (lower, upper) tuples for each column of X.
            objective_weights: The objective is to maximize a weighted sum of
                the columns of f(x). These are the weights.
            outcome_constraints: A tuple of (A, b). For k outcome constraints
                and m outputs at f(x), A is (k x m) and b is (k x 1) such that
                A f(x) <= b.
            linear_constraints: A tuple of (A, b). For k linear constraints on
                d-dimensional x, A is (k x d) and b is (k x 1) such that
                A x <= b.
            fixed_features: A map {feature_index: value} for features that
                should be fixed to a particular value during generation.
            pending_observations:  A list of m (k_i x d) feature tensors X
                for m outcomes and k_i pending observations for outcome i.
            model_gen_options: A config dictionary that can contain
                model-specific options.
            rounding_func: A function that rounds an optimization result
                appropriately (i.e., according to `round-trip` transformations).
            target_fidelities: A map {feature_index: value} of fidelity feature
                column indices to their respective target fidelities. Used for
                multi-fidelity optimization.

        Returns:
            3-element tuple containing

            - (n x d) tensor of generated points.
            - n-tensor of weights for each point.
            - Dictionary of model-specific metadata for the given
                generation candidates.
        """
        options = model_gen_options or {}
        acf_options = options.get("acquisition_function_kwargs", {})
        optimizer_options = options.get("optimizer_kwargs", {})

        X_pending, X_observed = _get_X_pending_and_observed(
            Xs=self.Xs,
            pending_observations=pending_observations,
            objective_weights=objective_weights,
            outcome_constraints=outcome_constraints,
            bounds=bounds,
            linear_constraints=linear_constraints,
            fixed_features=fixed_features,
        )

        # subset model only to the outcomes we need for the optimization
        model = not_none(self.model)
        if options.get("subset_model", True):
            model, objective_weights, outcome_constraints, _ = subset_model(
                model=model,
                objective_weights=objective_weights,
                outcome_constraints=outcome_constraints,
            )

        objective = get_botorch_objective(
            model=model,
            objective_weights=objective_weights,
            outcome_constraints=outcome_constraints,
            X_observed=X_observed,
        )

        inequality_constraints = _to_inequality_constraints(linear_constraints)
        # TODO: update optimizers to handle inequality_constraints
        if inequality_constraints is not None:
            raise UnsupportedError(
                "Inequality constraints are not yet supported for KnowledgeGradient!"
            )

        # extract a few options
        n_fantasies = acf_options.get("num_fantasies", 64)
        qmc = acf_options.get("qmc", True)
        seed_inner = acf_options.get("seed_inner", None)
        num_restarts = optimizer_options.get("num_restarts", 40)
        raw_samples = optimizer_options.get("raw_samples", 1024)

        # get current value
        current_value = self._get_current_value(
            model=model,
            bounds=bounds,
            X_observed=not_none(X_observed),
            objective_weights=objective_weights,
            outcome_constraints=outcome_constraints,
            linear_constraints=linear_constraints,
            seed_inner=seed_inner,
            fixed_features=fixed_features,
            model_gen_options=model_gen_options,
            target_fidelities=target_fidelities,
            qmc=qmc,
        )

        bounds_ = torch.tensor(bounds, dtype=self.dtype, device=self.device)
        bounds_ = bounds_.transpose(0, 1)

        # get acquisition function
        acq_function = _instantiate_KG(
            model=model,
            objective=objective,
            qmc=qmc,
            n_fantasies=n_fantasies,
            num_trace_observations=options.get("num_trace_observations", 0),
            mc_samples=acf_options.get("mc_samples", 256),
            seed_inner=seed_inner,
            seed_outer=acf_options.get("seed_outer", None),
            X_pending=X_pending,
            target_fidelities=target_fidelities,
            fidelity_weights=options.get("fidelity_weights"),
            current_value=current_value,
            cost_intercept=self.cost_intercept,
        )

        # optimize and get new points
        new_x = _optimize_and_get_candidates(
            acq_function=acq_function,
            bounds_=bounds_,
            n=n,
            num_restarts=num_restarts,
            raw_samples=raw_samples,
            optimizer_options=optimizer_options,
            rounding_func=rounding_func,
            inequality_constraints=inequality_constraints,
            fixed_features=fixed_features,
        )

        return new_x, torch.ones(n, dtype=self.dtype), {}, None
Ejemplo n.º 22
0
 def test_not_none(self):
     self.assertEqual(not_none("not_none"), "not_none")
     with self.assertRaises(ValueError):
         not_none(None)
Ejemplo n.º 23
0
 def get_trial_parameters(self, trial_index: int) -> TParameterization:
     """Retrieve the parameterization of the trial by the given index."""
     return not_none(self._get_trial(trial_index).arm).parameters
Ejemplo n.º 24
0
    def gen(
        self,
        experiment: Experiment,
        new_data: Optional[Data] = None,  # Take in just the new data.
        n: int = 1,
        **kwargs: Any,
    ) -> GeneratorRun:
        """Produce the next points in the experiment."""
        # Get arm signatures for each entry in new_data that is indeed new.
        new_arms = self._get_new_arm_signatures(
            experiment=experiment, new_data=new_data
        )
        enough_observed = (
            len(self._observed) + len(new_arms)
        ) >= self._curr.min_arms_observed
        unlimited_arms = self._curr.num_arms == -1
        enough_generated = (
            not unlimited_arms and len(self._generated) >= self._curr.num_arms
        )
        remaining_arms = self._curr.num_arms - len(self._generated)

        # Check that minimum observed_arms is satisfied if it's enforced.
        if self._curr.enforce_num_arms and enough_generated and not enough_observed:
            raise ValueError(
                "All trials for current model have been generated, but not enough "
                "data has been observed to fit next model. Try again when more data "
                "are available."
            )
            # TODO[Lena, T44021164]: take into account failed trials. Potentially
            # reduce `_generated` count when a trial mentioned in new data failed.
        if (
            self._curr.enforce_num_arms
            and not unlimited_arms
            and 0 < remaining_arms < n
        ):
            raise ValueError(
                f"Cannot generate {n} new arms as there are only {remaining_arms} "
                "remaining arms to generate using the current model."
            )

        all_data = (
            Data.from_multiple_data(data=[self._data, new_data])
            if new_data
            else self._data
        )

        if self._model is None:
            # Instantiate the first model.
            self._set_current_model(experiment=experiment, data=all_data, **kwargs)
        elif enough_generated and enough_observed:
            # Change to the next model.
            self._change_model(experiment=experiment, data=all_data, **kwargs)
        elif new_data is not None:
            # We're sticking with the current model, but update with new data
            self._model.update(experiment=experiment, data=new_data)

        gen_run = not_none(self._model).gen(n=n, **(self._curr.model_gen_kwargs or {}))

        # If nothing failed, update known data, _generated, and _observed.
        self._data = all_data
        self._observed.extend(new_arms)
        self._generated.extend(a.signature for a in gen_run.arms)
        return gen_run
Ejemplo n.º 25
0
 def objective_name(self) -> str:
     """Returns the name of the objective in this optimization."""
     opt_config = not_none(self.experiment.optimization_config)
     return opt_config.objective.metric.name
Ejemplo n.º 26
0
Archivo: array.py Proyecto: dme65/Ax
 def feature_importances(self, metric_name: str) -> Dict[str, float]:
     importances_tensor = not_none(self.model).feature_importances()
     importances_dict = dict(zip(self.outcomes, importances_tensor))
     importances_arr = importances_dict[metric_name].flatten()
     return dict(zip(self.parameters, importances_arr))
Ejemplo n.º 27
0
    def __call__(
        self,
        search_space: Optional[SearchSpace] = None,
        experiment: Optional[Experiment] = None,
        data: Optional[Data] = None,
        silently_filter_kwargs: bool = False,
        **kwargs: Any,
    ) -> ModelBridge:
        assert self.value in MODEL_KEY_TO_MODEL_SETUP, f"Unknown model {self.value}"
        # All model bridges require either a search space or an experiment.
        assert search_space or experiment, "Search space or experiment required."
        search_space = search_space or not_none(experiment).search_space
        model_setup_info = MODEL_KEY_TO_MODEL_SETUP[self.value]
        model_class = model_setup_info.model_class
        bridge_class = model_setup_info.bridge_class
        if not silently_filter_kwargs:
            validate_kwarg_typing(
                typed_callables=[model_class, bridge_class],
                search_space=search_space,
                experiment=experiment,
                data=data,
                **kwargs,
            )

        # Create model with consolidated arguments: defaults + passed in kwargs.
        model_kwargs = consolidate_kwargs(
            kwargs_iterable=[
                get_function_default_arguments(model_class), kwargs
            ],
            keywords=get_function_argument_names(model_class),
        )
        model = model_class(**model_kwargs)

        # Create `ModelBridge`: defaults + standard kwargs + passed in kwargs.
        bridge_kwargs = consolidate_kwargs(
            kwargs_iterable=[
                get_function_default_arguments(bridge_class),
                model_setup_info.standard_bridge_kwargs,
                {
                    "transforms": model_setup_info.transforms
                },
                kwargs,
            ],
            keywords=get_function_argument_names(
                function=bridge_class,
                omit=["experiment", "search_space", "data"]),
        )

        # Create model bridge with the consolidated kwargs.
        model_bridge = bridge_class(
            search_space=search_space or not_none(experiment).search_space,
            experiment=experiment,
            data=data,
            model=model,
            **bridge_kwargs,
        )

        if model_setup_info.not_saved_model_kwargs:
            for key in model_setup_info.not_saved_model_kwargs:
                model_kwargs.pop(key, None)

        # Store all kwargs on model bridge, to be saved on generator run.
        model_bridge._set_kwargs_to_save(
            model_key=self.value,
            model_kwargs=_encode_callables_as_references(model_kwargs),
            bridge_kwargs=_encode_callables_as_references(bridge_kwargs),
        )
        return model_bridge
Ejemplo n.º 28
0
Archivo: array.py Proyecto: dme65/Ax
    def _pareto_frontier(
        self,
        objective_thresholds: Optional[TRefPoint] = None,
        observation_features: Optional[List[ObservationFeatures]] = None,
        observation_data: Optional[List[ObservationData]] = None,
        optimization_config: Optional[MultiObjectiveOptimizationConfig] = None,
    ) -> List[ObservationData]:
        # TODO(jej): This method should be refactored to move tensor
        # conversions into a separate utility, and eventually should be
        # moved into base.py.
        # The reason this method is currently implemented in array.py is to
        # allow the broadest possible set of models to call frontier and
        # hypervolume evaluation functions given the current API.
        X = (self.transform_observation_features(observation_features)
             if observation_features else None)
        X = self._array_to_tensor(X) if X is not None else None
        Y, Yvar = (None, None)
        if observation_data:
            Y, Yvar = self.transform_observation_data(observation_data)
        if Y is not None and Yvar is not None:
            Y, Yvar = (self._array_to_tensor(Y), self._array_to_tensor(Yvar))

        # Optimization_config
        mooc = optimization_config or checked_cast_optional(
            MultiObjectiveOptimizationConfig, self._optimization_config)
        if not mooc:
            raise ValueError(
                ("experiment must have an existing optimization_config "
                 "of type MultiObjectiveOptimizationConfig "
                 "or `optimization_config` must be passed as an argument."))
        if not isinstance(mooc, MultiObjectiveOptimizationConfig):
            mooc = not_none(
                MultiObjectiveOptimizationConfig.from_opt_conf(mooc))
        if objective_thresholds:
            mooc = mooc.clone_with_args(
                objective_thresholds=objective_thresholds)

        optimization_config = mooc

        # Transform OptimizationConfig.
        optimization_config = self.transform_optimization_config(
            optimization_config=optimization_config,
            fixed_features=ObservationFeatures(parameters={}),
        )
        # Extract weights, constraints, and objective_thresholds
        objective_weights = extract_objective_weights(
            objective=optimization_config.objective, outcomes=self.outcomes)
        outcome_constraints = extract_outcome_constraints(
            outcome_constraints=optimization_config.outcome_constraints,
            outcomes=self.outcomes,
        )
        objective_thresholds_arr = extract_objective_thresholds(
            objective_thresholds=optimization_config.objective_thresholds,
            outcomes=self.outcomes,
        )
        # Transform to tensors.
        obj_w, oc_c, _, _ = validate_and_apply_final_transform(
            objective_weights=objective_weights,
            outcome_constraints=outcome_constraints,
            linear_constraints=None,
            pending_observations=None,
            final_transform=self._array_to_tensor,
        )
        obj_t = self._array_to_tensor(objective_thresholds_arr)
        frontier_evaluator = self._get_frontier_evaluator()
        # pyre-ignore[28]: Unexpected keyword `model` to anonymous call
        f, cov = frontier_evaluator(
            model=self.model,
            X=X,
            Y=Y,
            Yvar=Yvar,
            objective_thresholds=obj_t,
            objective_weights=obj_w,
            outcome_constraints=oc_c,
        )
        f, cov = f.detach().cpu().clone().numpy(), cov.detach().cpu().clone(
        ).numpy()
        frontier_observation_data = array_to_observation_data(
            f=f, cov=cov, outcomes=not_none(self.outcomes))
        # Untransform observations
        for t in reversed(self.transforms.values()):  # noqa T484
            frontier_observation_data = t.untransform_observation_data(
                frontier_observation_data, [])
        return frontier_observation_data
Ejemplo n.º 29
0
def choose_generation_strategy(
    search_space: SearchSpace,
    use_batch_trials: bool = False,
    enforce_sequential_optimization: bool = True,
    random_seed: Optional[int] = None,
    winsorize_botorch_model: bool = False,
    winsorization_limits: Optional[Tuple[Optional[float],
                                         Optional[float]]] = None,
    no_bayesian_optimization: bool = False,
    num_trials: Optional[int] = None,
    num_initialization_trials: Optional[int] = None,
    max_parallelism_cap: Optional[int] = None,
    max_parallelism_override: Optional[int] = None,
    optimization_config: Optional[OptimizationConfig] = None,
    should_deduplicate: bool = False,
    use_saasbo: bool = False,
    verbose: Optional[bool] = None,
    experiment: Optional[Experiment] = None,
) -> GenerationStrategy:
    """Select an appropriate generation strategy based on the properties of
    the search space and expected settings of the experiment, such as number of
    arms per trial, optimization algorithm settings, expected number of trials
    in the experiment, etc.

    Args:
        search_space: SearchSpace, based on the properties of which to select the
            generation strategy.
        use_batch_trials: Whether this generation strategy will be used to generate
            batched trials instead of 1-arm trials.
        enforce_sequential_optimization: Whether to enforce that 1) the generation
            strategy needs to be updated with `min_trials_observed` observations for
            a given generation step before proceeding to the next one and 2) maximum
            number of trials running at once (max_parallelism) if enforced for the
            BayesOpt step. NOTE: `max_parallelism_override` and `max_parallelism_cap`
            settings will still take their effect on max parallelism even if
            `enforce_sequential_optimization=False`, so if those settings are specified,
            max parallelism will be enforced.
        random_seed: Fixed random seed for the Sobol generator.
        winsorize_botorch_model: Whether to apply the winsorization transform
            prior to applying other transforms for fitting the BoTorch model.
        winsorization_limits: Bounds for winsorization, if winsorizing, expressed
            as percentile. Usually only the upper winsorization trim is used when
            minimizing, and only the lower when maximizing.
        no_bayesian_optimization: If True, Bayesian optimization generation
            strategy will not be suggested and quasi-random strategy will be used.
        num_trials: Total number of trials in the optimization, if
            known in advance.
        num_initialization_trials: Specific number of initialization trials, if wanted.
            Typically, initialization trials are generated quasi-randomly.
        max_parallelism_override: Integer, with which to override the default max
            parallelism setting for all steps in the generation strategy returned from
            this function. Each generation step has a `max_parallelism` value, which
            restricts how many trials can run simultaneously during a given generation
            step. By default, the parallelism setting is chosen as appropriate for the
            model in a given generation step. If `max_parallelism_override` is -1,
            no max parallelism will be enforced for any step of the generation strategy.
            Be aware that parallelism is limited to improve performance of Bayesian
            optimization, so only disable its limiting if necessary.
        max_parallelism_cap: Integer cap on parallelism in this generation strategy.
            If specified, `max_parallelism` setting in each generation step will be set
            to the minimum of the default setting for that step and the value of this
            cap. `max_parallelism_cap` is meant to just be a hard limit on parallelism
            (e.g. to avoid overloading machine(s) that evaluate the experiment trials).
            Specify only if not specifying `max_parallelism_override`.
        use_saasbo: Whether to use SAAS prior for any GPEI generation steps.
        verbose: Whether GP model should produce verbose logs. If not ``None``, its
            value gets added to ``model_kwargs`` during ``generation_strategy``
            construction. Defaults to ``True`` for SAASBO, else ``None``. Verbose
            outputs are currently only available for SAASBO, so if ``verbose is not
            None`` for a different model type, it will be overridden to ``None`` with
            a warning.
        experiment: If specified, `_experiment` attribute of the generation strategy
            will be set to this experiment (useful for associating a generation
            strategy with a given experiment before it's first used to ``gen`` with
            that experiment).
    """
    suggested_model = _suggest_gp_model(
        search_space=search_space,
        num_trials=num_trials,
        optimization_config=optimization_config,
        use_saasbo=use_saasbo,
    )
    if not no_bayesian_optimization and suggested_model is not None:
        if not enforce_sequential_optimization and (  # pragma: no cover
                max_parallelism_override or max_parallelism_cap):
            logger.info(
                "If `enforce_sequential_optimization` is False, max parallelism is "
                "not enforced and other max parallelism settings will be ignored."
            )
        if max_parallelism_override and max_parallelism_cap:
            raise ValueError(
                "If `max_parallelism_override` specified, cannot also apply "
                "`max_parallelism_cap`.")

        # If number of initialization trials is not specified, estimate it.
        if num_initialization_trials is None:
            if use_batch_trials:  # Batched trials.
                num_initialization_trials = 1
            elif num_trials is not None:  # 1-arm trials with specified `num_trials`.
                num_initialization_trials = max(
                    5,
                    min(
                        not_none(num_trials) // 5,
                        2 * len(search_space.tunable_parameters),
                    ),
                )
            else:  # 1-arm trials.
                num_initialization_trials = max(
                    5, 2 * len(search_space.tunable_parameters))

        # Determine max parallelism for the generation steps.
        if max_parallelism_override == -1:
            # `max_parallelism_override` of -1 means no max parallelism enforcement in
            # the generation strategy, which means `max_parallelism=None` in gen. steps.
            sobol_parallelism = bo_parallelism = None
        elif max_parallelism_override is not None:
            sobol_parallelism = bo_parallelism = max_parallelism_override
        elif max_parallelism_cap is not None:  # Max parallelism override is None by now
            sobol_parallelism = max_parallelism_cap
            bo_parallelism = min(max_parallelism_cap,
                                 DEFAULT_BAYESIAN_PARALLELISM)
        elif not enforce_sequential_optimization:
            # If no max parallelism settings specified and not enforcing sequential
            # optimization, do not limit parallelism.
            sobol_parallelism = bo_parallelism = None
        else:  # No additional max parallelism settings, use defaults
            sobol_parallelism = None  # No restriction on Sobol phase
            bo_parallelism = DEFAULT_BAYESIAN_PARALLELISM

        # `verbose` default behavior and overrides
        model_is_saasbo = not_none(suggested_model).name in [
            "FULLYBAYESIANMOO",
            "FULLYBAYESIAN",
        ]
        if verbose is None and model_is_saasbo:
            verbose = True
        elif verbose is not None and not model_is_saasbo:
            logger.warning(
                f"Overriding `verbose = {verbose}` to `None` for non-SAASBO GP step."
            )
            verbose = None

        # create `generation_strategy`
        gs = GenerationStrategy(steps=[
            _make_sobol_step(
                num_trials=num_initialization_trials,
                enforce_num_trials=enforce_sequential_optimization,
                seed=random_seed,
                max_parallelism=sobol_parallelism,
                should_deduplicate=should_deduplicate,
            ),
            _make_botorch_step(
                model=suggested_model,
                winsorize=winsorize_botorch_model,
                winsorization_limits=winsorization_limits,
                max_parallelism=bo_parallelism,
                should_deduplicate=should_deduplicate,
                verbose=verbose,
            ),
        ])
        logger.info(
            f"Using Bayesian Optimization generation strategy: {gs}. Iterations after"
            f" {num_initialization_trials} will take longer to generate due to "
            " model-fitting.")
    else:
        if verbose is not None:
            logger.warning(
                f"Ignoring `verbose = {verbose}` for `generation_strategy` "
                "without a GP step.")
        gs = GenerationStrategy(steps=[
            _make_sobol_step(seed=random_seed,
                             should_deduplicate=should_deduplicate)
        ])
        logger.info("Using Sobol generation strategy.")
    if experiment:
        gs.experiment = experiment
    return gs
Ejemplo n.º 30
0
 def _get_frontier_evaluator(self) -> TFrontierEvaluator:
     return (
         # pyre-ignore [16]: `TorchModel has no attribute `frontier_evaluator`
         not_none(self.model).frontier_evaluator if hasattr(
             self.model, "frontier_evaluator") else
         get_default_frontier_evaluator())