예제 #1
0
 def should_stop_trials_early(
     self,
     trial_indices: Set[int],
     experiment: Experiment,
     **kwargs: Dict[str, Any],
 ) -> Dict[int, Optional[str]]:
     # Make sure that we can lookup data for the trial,
     # even though we won't use it in this dummy strategy
     data = experiment.lookup_data(trial_indices=trial_indices)
     if data.df.empty:
         raise Exception(
             f"No data found for trials {trial_indices}; "
             "can't determine whether or not to stop early.")
     return {idx: None for idx in trial_indices if idx % 2 == 1}
예제 #2
0
    def _check_validity_and_get_data(
            self, experiment: Experiment) -> Optional[MapData]:
        """Validity checks and returns the `MapData` used for early stopping."""
        if experiment.optimization_config is None:
            raise UnsupportedError(  # pragma: no cover
                "Experiment must have an optimization config in order to use an "
                "early stopping strategy.")

        optimization_config = not_none(experiment.optimization_config)
        objective_name = optimization_config.objective.metric.name

        data = experiment.lookup_data()
        if data.df.empty:
            logger.info(f"{self.__class__.__name__} received empty data. "
                        "Not stopping any trials.")
            return None
        if objective_name not in set(data.df["metric_name"]):
            logger.info(f"{self.__class__.__name__} did not receive data "
                        "from the objective metric. Not stopping any trials.")
            return None

        if not isinstance(data, MapData):
            logger.info(
                f"{self.__class__.__name__} expects MapData, but the "
                f"data attached to experiment is of type {type(data)}. "
                "Not stopping any trials.")
            return None

        data = checked_cast(MapData, data)
        map_keys = data.map_keys
        if len(list(map_keys)) > 1:
            logger.info(
                f"{self.__class__.__name__} expects MapData with a single "
                "map key, but the data attached to the experiment has multiple: "
                f"{data.map_keys}. Not stopping any trials.")
            return None
        return data
예제 #3
0
파일: best_point.py 프로젝트: kjanoudi/Ax
def get_pareto_optimal_parameters(
    experiment: Experiment,
    generation_strategy: GenerationStrategy,
    use_model_predictions: bool = True,
) -> Optional[Dict[int, Tuple[TParameterization, TModelPredictArm]]]:
    """Identifies the best parameterizations tried in the experiment so far,
    using model predictions if ``use_model_predictions`` is true and using
    observed values from the experiment otherwise. By default, uses model
    predictions to account for observation noise.

    NOTE: The format of this method's output is as follows:
    { trial_index --> (parameterization, (means, covariances) }, where means
    are a dictionary of form { metric_name --> metric_mean } and covariances
    are a nested dictionary of form
    { one_metric_name --> { another_metric_name: covariance } }.

    Args:
        experiment: Experiment, from which to find Pareto-optimal arms.
        generation_strategy: Generation strategy containing the modelbridge.
        use_model_predictions: Whether to extract the Pareto frontier using
            model predictions or directly observed values. If ``True``,
            the metric means and covariances in this method's output will
            also be based on model predictions and may differ from the
            observed values.

    Returns:
        ``None`` if it was not possible to extract the Pareto frontier,
        otherwise a mapping from trial index to the tuple of:
        - the parameterization of the arm in that trial,
        - two-item tuple of metric means dictionary and covariance matrix
            (model-predicted if ``use_model_predictions=True`` and observed
            otherwise).
    """
    # Validate aspects of the experiment: that it is a MOO experiment and
    # that the current model can be used to produce the Pareto frontier.
    if not not_none(experiment.optimization_config).is_moo_problem:
        raise UnsupportedError(
            "Please use `get_best_parameters` for single-objective problems.")

    moo_optimization_config = checked_cast(MultiObjectiveOptimizationConfig,
                                           experiment.optimization_config)
    if moo_optimization_config.outcome_constraints:
        # TODO[drfreund]: Test this flow and remove error.
        raise NotImplementedError(
            "Support for outcome constraints is currently under development.")

    # Extract or instantiate modelbridge to use for Pareto frontier extraction.
    mb = generation_strategy.model
    if mb is None or not isinstance(mb, MultiObjectiveTorchModelBridge):
        logger.info(
            "Can only extract a Pareto frontier using a multi-objective model bridge"
            f", but currently used model bridge is: {mb} of type {type(mb)}. Will "
            "use `Models.MOO` instead to extract Pareto frontier.")
        mb = checked_cast(
            MultiObjectiveTorchModelBridge,
            Models.MOO(experiment=experiment,
                       data=checked_cast(Data, experiment.lookup_data())),
        )
    else:
        # Make sure the model is up-to-date with the most recent data.
        generation_strategy._set_or_update_current_model(data=None)

    # If objective thresholds are not specified in optimization config, extract
    # the inferred ones if possible or infer them anew if not.
    objective_thresholds_override = None
    if not moo_optimization_config.objective_thresholds:
        lgr = generation_strategy.last_generator_run
        if lgr and lgr.gen_metadata and "objective_thresholds" in lgr.gen_metadata:
            objective_thresholds_override = lgr.gen_metadata[
                "objective_thresholds"]
        objective_thresholds_override = mb.infer_objective_thresholds(
            search_space=experiment.search_space,
            optimization_config=experiment.optimization_config,
            fixed_features=None,
        )
        logger.info(
            f"Using inferred objective thresholds: {objective_thresholds_override}, "
            "as objective thresholds were not specified as part of the optimization "
            "configuration on the experiment.")

    # Extract the Pareto frontier and format it as follows:
    # { trial_index --> (parameterization, (means, covariances) }
    pareto_util = predicted_pareto if use_model_predictions else observed_pareto
    pareto_optimal_observations = pareto_util(
        modelbridge=mb, objective_thresholds=objective_thresholds_override)
    return {
        int(not_none(obs.features.trial_index)): (
            obs.features.parameters,
            (obs.data.means_dict, obs.data.covariance_matrix),
        )
        for obs in pareto_optimal_observations
    }
예제 #4
0
파일: best_point.py 프로젝트: kjanoudi/Ax
def get_best_from_model_predictions_with_trial_index(
    experiment: Experiment,
) -> Optional[Tuple[int, TParameterization, Optional[TModelPredictArm]]]:
    """Given an experiment, returns the best predicted parameterization and corresponding
    prediction based on the most recent Trial with predictions. If no trials have
    predictions returns None.

    Only some models return predictions. For instance GPEI does while Sobol does not.

    TModelPredictArm is of the form:
        ({metric_name: mean}, {metric_name_1: {metric_name_2: cov_1_2}})

    Args:
        experiment: Experiment, on which to identify best raw objective arm.

    Returns:
        Tuple of parameterization and model predictions for it.
    """
    # pyre-ignore [16]
    if isinstance(experiment.optimization_config.objective, MultiObjective):
        logger.warning(
            "get_best_from_model_predictions is deprecated for multi-objective "
            "optimization configs. This method will return an arbitrary point on "
            "the pareto frontier.")
    for idx, trial in sorted(experiment.trials.items(),
                             key=lambda x: x[0],
                             reverse=True):
        gr = None
        if isinstance(trial, Trial):
            gr = trial.generator_run
        elif isinstance(trial, BatchTrial):
            if len(trial.generator_run_structs) > 0:
                # In theory batch_trial can have >1 gr, grab the first
                gr = trial.generator_run_structs[0].generator_run

        if gr is not None and gr.best_arm_predictions is not None:  # pragma: no cover
            data = experiment.lookup_data()
            if not isinstance(data, Data):
                return _gr_to_prediction_with_trial_index(idx, gr)

            model = get_model_from_generator_run(generator_run=gr,
                                                 experiment=experiment,
                                                 data=data)

            # If model is not ArrayModelBridge, just use the best arm frmo the
            # last good generator run
            if not isinstance(model, ArrayModelBridge):
                return _gr_to_prediction_with_trial_index(idx, gr)

            # Check to see if the model is worth using
            cv_results = cross_validate(model=model)
            diagnostics = compute_diagnostics(result=cv_results)
            assess_model_fit_results = assess_model_fit(
                diagnostics=diagnostics)
            objective_name = experiment.optimization_config.objective.metric.name
            # If model fit is bad use raw results
            if (objective_name in
                    assess_model_fit_results.bad_fit_metrics_to_fisher_score):
                logger.warning(
                    "Model fit is poor; falling back on raw data for best point."
                )

                if not _is_all_noiseless(df=data.df,
                                         metric_name=objective_name):
                    logger.warning(
                        "Model fit is poor and data on objective metric " +
                        f"{objective_name} is noisy; interpret best points " +
                        "results carefully.")

                return _get_best_poor_model_fit(experiment=experiment)

            res = model.model_best_point()
            if res is None:
                return _gr_to_prediction_with_trial_index(idx, gr)

            best_arm, best_arm_predictions = res

            return idx, not_none(best_arm).parameters, best_arm_predictions

    return None
예제 #5
0
    def should_stop_trials_early(
        self,
        trial_indices: Set[int],
        experiment: Experiment,
        **kwargs: Dict[str, Any],
    ) -> Dict[int, Optional[str]]:
        """Stop a trial if its performance is in the bottom `percentile_threshold`
        of the trials at the same step.

        Args:
            trial_indices: Indices of candidate trials to consider for early stopping.
            experiment: Experiment that contains the trials and other contextual data.

        Returns:
            A dictionary mapping trial indices that should be early stopped to
            (optional) messages with the associated reason. An empty dictionary
            means no suggested updates to any trial's status.
        """
        if experiment.optimization_config is None:
            raise UnsupportedError(  # pragma: no cover
                "Experiment must have an optimization config in order to use an "
                "early stopping strategy."
            )

        optimization_config = not_none(experiment.optimization_config)
        objective_name = optimization_config.objective.metric.name
        minimize = optimization_config.objective.minimize

        data = experiment.lookup_data(keep_latest_map_values_only=False)
        if data.df.empty:
            logger.info(
                "PercentileEarlyStoppingStrategy received empty data. "
                "Not stopping any trials."
            )
            return {}

        if not isinstance(data, MapData):
            raise ValueError(
                "PercentileEarlyStoppingStrategy expects MapData, but the "
                f"data attached to experiment is of type {type(data)}."
            )

        map_keys = data.map_keys
        if len(map_keys) > 1:
            raise ValueError(  # pragma: no cover
                "PercentileEarlyStoppingStrategy expects MapData with a single "
                "map key, but the data attached to the experiment has multiple: "
                f"{data.map_keys}."
            )
        map_key = map_keys[0]

        df = data.df
        metric_to_aligned_means, _ = align_partial_results(
            df=df,
            progr_key=map_key,
            metrics=[objective_name],
        )
        aligned_means = metric_to_aligned_means[objective_name]
        decisions = {
            trial_index: self.should_stop_trial_early(
                trial_index=trial_index,
                experiment=experiment,
                df=aligned_means,
                percentile_threshold=self.percentile_threshold,
                map_key=map_key,
                minimize=minimize,
            )
            for trial_index in trial_indices
        }
        return {
            trial_index: reason
            for trial_index, (should_stop, reason) in decisions.items()
            if should_stop
        }
예제 #6
0
def get_standard_plots(
    experiment: Experiment,
    model: Optional[ModelBridge],
    data: Optional[Union[Data, MapData]] = None,
    model_transitions: Optional[List[int]] = None,
) -> List[go.Figure]:
    """Extract standard plots for single-objective optimization.

    Extracts a list of plots from an ``Experiment`` and ``ModelBridge`` of general
    interest to an Ax user. Currently not supported are
    - TODO: multi-objective optimization
    - TODO: ChoiceParameter plots

    Args:
        - experiment: The ``Experiment`` from which to obtain standard plots.
        - model: The ``ModelBridge`` used to suggest trial parameters.
        - data: If specified, data, to which to fit the model before generating plots.
        - model_transitions: The arm numbers at which shifts in generation_strategy
            occur.

    Returns:
        - a plot of objective value vs. trial index, to show experiment progression
        - a plot of objective value vs. range parameter values, only included if the
          model associated with generation_strategy can create predictions. This
          consists of:

            - a plot_slice plot if the search space contains one range parameter
            - an interact_contour plot if the search space contains multiple
              range parameters

    """
    objective = not_none(experiment.optimization_config).objective
    if isinstance(objective, ScalarizedObjective):
        logger.warning(
            "get_standard_plots does not currently support ScalarizedObjective "
            "optimization experiments. Returning an empty list.")
        return []

    if data is None:
        data = experiment.lookup_data()
        if isinstance(data, MapData):
            data = data.deduplicate_data()

    if data.df.empty:
        logger.info(
            f"Experiment {experiment} does not yet have data, nothing to plot."
        )
        return []

    output_plot_list = []
    output_plot_list.append(
        _get_objective_trace_plot(
            experiment=experiment,
            data=checked_cast(Data, data)
            if isinstance(data, Data) else checked_cast(MapData, data),
            model_transitions=model_transitions
            if model_transitions is not None else [],
        ))

    # Objective vs. parameter plot requires a `Model`, so add it only if model
    # is alrady available. In cases where initially custom trials are attached,
    # model might not yet be set on the generation strategy.
    if model:
        # TODO: Check if model can predict in favor of try/catch.
        try:
            output_plot_list.extend(
                _get_objective_v_param_plots(
                    experiment=experiment,
                    model=model,
                ))
            output_plot_list.extend(_get_cross_validation_plots(model=model))
            feature_importance_plot = plot_feature_importance_by_feature_plotly(
                model=model,
                relative=False,
                caption=feature_importance_caption)
            feature_importance_plot.layout.title = "[ADVANCED] " + str(
                # pyre-fixme[16]: go.Figure has no attribute `layout`
                feature_importance_plot.layout.title.text)
            output_plot_list.append(feature_importance_plot)
        except NotImplementedError:
            # Model does not implement `predict` method.
            pass

    return [plot for plot in output_plot_list if plot is not None]