コード例 #1
0
def make_search_space(
    parameters: List[TParameterRepresentation],
    parameter_constraints: List[str],
) -> SearchSpace:
    typed_parameters = [parameter_from_json(p) for p in parameters]
    is_hss = any(p.is_hierarchical for p in typed_parameters)
    search_space_cls = HierarchicalSearchSpace if is_hss else SearchSpace

    parameter_map = {p.name: p for p in typed_parameters}

    typed_parameter_constraints = [
        constraint_from_str(c, parameter_map) for c in parameter_constraints
    ]

    if any(
            any(
                isinstance(parameter_map[parameter], ChoiceParameter)
                for parameter in constraint.constraint_dict)
            for constraint in typed_parameter_constraints):
        raise UnsupportedError(
            "Constraints on ChoiceParameters are not allowed. Try absorbing "
            "this constraint into the associated range parameter's bounds.")

    if any(
            any(
                isinstance(parameter_map[parameter], FixedParameter)
                for parameter in constraint.constraint_dict)
            for constraint in typed_parameter_constraints):
        raise UnsupportedError(
            "Constraints on FixedParameters are not allowed. Try absorbing "
            "this constraint into the associated range parameter's bounds.")

    ss = search_space_cls(
        parameters=typed_parameters,
        parameter_constraints=typed_parameter_constraints,
    )

    logger.info(f"Created search space: {ss}.")
    if is_hss:
        hss = checked_cast(HierarchicalSearchSpace, ss)
        logger.info(
            "Hieararchical structure of the search space: \n"
            f"{hss.hierarchical_structure_str(parameter_names_only=True)}")

    return search_space_cls(
        parameters=typed_parameters,
        parameter_constraints=typed_parameter_constraints,
    )
コード例 #2
0
    def search_space(self, search_space: SearchSpace) -> None:
        # Allow all modifications when no trials present.
        if not hasattr(self, "_search_space") or len(self.trials) < 1:
            self._search_space = search_space
            return

        # At least 1 trial is present.
        if self.immutable_search_space_and_opt_config:
            raise UnsupportedError(
                "Modifications of search space are disabled by the "
                f"`{Keys.IMMUTABLE_SEARCH_SPACE_AND_OPT_CONF.value}` "
                "property that is set to `True` on this experiment.")
        if len(search_space.parameters) < len(self._search_space.parameters):
            raise ValueError(
                "New search_space must contain all parameters in the existing."
            )
        for param_name, parameter in search_space.parameters.items():
            if param_name not in self._search_space.parameters:
                raise ValueError(
                    f"Cannot add new parameter `{param_name}` because "
                    "it is not defined in the existing search space.")
            elif (parameter.parameter_type !=
                  self._search_space.parameters[param_name].parameter_type):
                raise ValueError(
                    f"Expected parameter `{param_name}` to be of type "
                    f"{self._search_space.parameters[param_name].parameter_type}, "
                    f"got {parameter.parameter_type}.")
        self._search_space = search_space
コード例 #3
0
ファイル: moo_acquisition.py プロジェクト: jeffersonp317/Ax
    def __init__(
        self,
        surrogate: Surrogate,
        search_space_digest: SearchSpaceDigest,
        objective_weights: Tensor,
        objective_thresholds: Optional[Tensor],
        botorch_acqf_class: Optional[Type[AcquisitionFunction]] = None,
        options: Optional[Dict[str, Any]] = None,
        pending_observations: Optional[List[Tensor]] = None,
        outcome_constraints: Optional[Tuple[Tensor, Tensor]] = None,
        linear_constraints: Optional[Tuple[Tensor, Tensor]] = None,
        fixed_features: Optional[Dict[int, float]] = None,
    ) -> None:
        botorch_acqf_class = not_none(
            botorch_acqf_class or self.default_botorch_acqf_class
        )
        if not issubclass(botorch_acqf_class, qExpectedHypervolumeImprovement):
            raise UnsupportedError(
                "Only qExpectedHypervolumeImprovement is currently supported as "
                f"a MOOAcquisition botorch_acqf_class. Got: {botorch_acqf_class}."
            )

        super().__init__(
            surrogate=surrogate,
            botorch_acqf_class=botorch_acqf_class,
            search_space_digest=search_space_digest,
            objective_weights=objective_weights,
            objective_thresholds=objective_thresholds,
            outcome_constraints=outcome_constraints,
            linear_constraints=linear_constraints,
            fixed_features=fixed_features,
            pending_observations=pending_observations,
            options=options,
        )
コード例 #4
0
def _get_outcome_constraints_from_config(
        optimization_config: OptimizationConfig,
        metric_name: str) -> List[OutcomeConstraint]:
    """Get all outcome constraints (non-scalarized) for a given metric."""
    # Check for scalarized outcome constraints for the given metric
    if any(
            isinstance(oc, ScalarizedOutcomeConstraint)
            and metric_name in [metric.name for metric in oc.metrics]
            for oc in optimization_config.outcome_constraints):
        warnings.warn(
            "Automatic winsorization isn't supported for a "
            "`ScalarizedOutcomeConstraint`. Specify the winsorization settings "
            f"manually if you want to winsorize metric {metric_name}.")
    # Filter scalarized outcome constraints
    outcome_constraints = [
        oc for oc in optimization_config.outcome_constraints
        if not isinstance(oc, ScalarizedOutcomeConstraint)
        and oc.metric.name == metric_name
    ]
    # Raise an error if there are relative constraints
    if any(oc.relative for oc in outcome_constraints):
        raise UnsupportedError(
            "Automatic winsorization doesn't support relative outcome constraints. "
            "Make sure a `Derelativize` transform is applied first.")
    return outcome_constraints
コード例 #5
0
    def __init__(
        self,
        surrogate: Surrogate,
        bounds: List[Tuple[float, float]],
        objective_weights: Tensor,
        objective_thresholds: Optional[Tensor],
        botorch_acqf_class: Optional[Type[AcquisitionFunction]] = None,
        options: Optional[Dict[str, Any]] = None,
        pending_observations: Optional[List[Tensor]] = None,
        outcome_constraints: Optional[Tuple[Tensor, Tensor]] = None,
        linear_constraints: Optional[Tuple[Tensor, Tensor]] = None,
        fixed_features: Optional[Dict[int, float]] = None,
        target_fidelities: Optional[Dict[int, float]] = None,
    ) -> None:
        botorch_acqf_class = not_none(botorch_acqf_class
                                      or self.default_botorch_acqf_class)
        if not issubclass(botorch_acqf_class, qExpectedHypervolumeImprovement):
            raise UnsupportedError(
                "Only qExpectedHypervolumeImprovement is currently supported as "
                f"a MOOAcquisition botorch_acqf_class. Got: {botorch_acqf_class}."
            )

        # Calculate `Y` and inject into options.
        # NOTE: Ideally we would do this in `compute_model_dependencies` and not need a
        # separate `__init__` for `MOOAcquisition`, but the obstacle is currently that
        # in that case `Y` would not be `subset` along with the model. This should be
        # revisited in the future.
        trd = self._extract_training_data(surrogate=surrogate)
        Ys = [trd.Y] if isinstance(
            trd, TrainingData) else [i.Y for i in trd.values()]
        options = options or {}

        # subset model only to the outcomes we need for the optimization
        if options.get(Keys.SUBSET_MODEL, True):
            _, _, _, Ys = subset_model(
                model=surrogate.model,
                objective_weights=objective_weights,
                Ys=Ys,
            )

        # pyre-ignore [6]: pyre wrong that `Ys` is not optional
        Y = torch.stack(Ys).transpose(0, 1).squeeze()
        options["Y"] = Y

        super().__init__(
            surrogate=surrogate,
            botorch_acqf_class=botorch_acqf_class,
            bounds=bounds,
            objective_weights=objective_weights,
            objective_thresholds=objective_thresholds,
            outcome_constraints=outcome_constraints,
            linear_constraints=linear_constraints,
            fixed_features=fixed_features,
            pending_observations=pending_observations,
            target_fidelities=target_fidelities,
            options=options,
        )
コード例 #6
0
    def _maybe_save_experiment_and_generation_strategy(
        self, experiment: Experiment, generation_strategy: GenerationStrategy
    ) -> Tuple[bool, bool]:
        """If DB settings are set on this `WithDBSettingsBase` instance, checks
        whether given experiment and generation strategy are already saved and
        saves them, if not.

        Returns:
            Tuple of two booleans: whether experiment was saved in the course of
                this function's execution and whether generation strategy was
                saved.
        """
        saved_exp, saved_gs = False, False
        if self.db_settings_set:
            if experiment._name is None:
                raise ValueError(
                    "Experiment must specify a name to use storage functionality."
                )
            exp_name = not_none(experiment.name)
            exp_id, gs_id = self._get_experiment_and_generation_strategy_db_id(
                experiment_name=exp_name
            )
            if exp_id:  # Experiment in DB.
                # TODO: Switch to just updating experiment when selective-field
                # update is available.
                logger.info(f"Experiment {exp_name} is in DB, updating it.")
                self._save_experiment_to_db_if_possible(experiment=experiment)
                saved_exp = True
            else:  # Experiment not yet in DB.
                logger.info(f"Experiment {exp_name} is not yet in DB, storing it.")
                self._save_experiment_to_db_if_possible(experiment=experiment)
                saved_exp = True
            if gs_id and generation_strategy._db_id != gs_id:
                raise UnsupportedError(
                    "Experiment was associated with generation strategy in DB, "
                    f"but a new generation strategy {generation_strategy.name} "
                    "was provided. To use the generation strategy currently in DB,"
                    " instantiate scheduler via: `Scheduler.with_stored_experiment`."
                )
            if not gs_id or generation_strategy._db_id is None:
                # There is no GS associated with experiment or the generation
                # strategy passed in is different from the one associated with
                # experiment currently.
                logger.info(
                    f"Generation strategy {generation_strategy.name} is not yet in DB, "
                    "storing it."
                )
                # If generation strategy does not yet have an experiment attached,
                # attach the current experiment to it, as otherwise it will not be
                # possible to retrieve by experiment name.
                if generation_strategy._experiment is None:
                    generation_strategy.experiment = experiment
                self._save_generation_strategy_to_db_if_possible(
                    generation_strategy=generation_strategy
                )
                saved_gs = True
        return saved_exp, saved_gs
コード例 #7
0
 def optimization_config(self,
                         optimization_config: OptimizationConfig) -> None:
     if (getattr(self, "_optimization_config", None) is not None
             and self.immutable_search_space_and_opt_config):
         raise UnsupportedError(
             "Modifications of optimization config are disabled by the "
             f"`{Keys.IMMUTABLE_SEARCH_SPACE_AND_OPT_CONF.value}` "
             "property that is set to `True` on this experiment.")
     for metric_name in optimization_config.metrics.keys():
         if metric_name in self._tracking_metrics:
             self.remove_tracking_metric(metric_name)
     self._optimization_config = optimization_config
コード例 #8
0
    def __init__(
        self,
        df: Optional[pd.DataFrame] = None,
        map_key_infos: Optional[Iterable[MapKeyInfo]] = None,
        description: Optional[str] = None,
    ) -> None:
        if map_key_infos is None and df is not None:
            raise ValueError("map_key_infos may be `None` iff `df` is None.")

        self._map_key_infos = map_key_infos or []

        if df is None:  # If df is None create an empty dataframe with appropriate cols
            self._map_df = pd.DataFrame(
                columns=self.required_columns().union(self.map_keys))
        else:
            columns = set(df.columns)
            missing_columns = self.required_columns() - columns
            if missing_columns:
                raise UnsupportedError(
                    f"Dataframe must contain required columns {missing_columns}."
                )
            extra_columns = columns - self.supported_columns(
                extra_column_names=self.map_keys)
            if extra_columns:
                raise UnsupportedError(
                    f"Columns {[mki.key for mki in extra_columns]} are not supported."
                )
            df = df.dropna(axis=0, how="all").reset_index(drop=True)
            df = self._safecast_df(df=df,
                                   extra_column_types=self.map_key_to_type)

            col_order = [
                c for c in self.column_data_types(self.map_key_to_type)
                if c in df.columns
            ]
            self._map_df = df[col_order]

        self.description = description

        self._memo_df = None
コード例 #9
0
def _get_objective_threshold_from_moo_config(
        optimization_config: MultiObjectiveOptimizationConfig,
        metric_name: str) -> List[ObjectiveThreshold]:
    """Get the non-relative objective threshold for a given metric."""
    objective_thresholds = [
        ot for ot in optimization_config.objective_thresholds
        if ot.metric.name == metric_name
    ]
    if any(oc.relative for oc in objective_thresholds):
        raise UnsupportedError(
            "Automatic winsorization doesn't support relative objective thresholds. "
            "Make sure a `Derelevatize` transform is applied first.")
    return objective_thresholds
コード例 #10
0
ファイル: experiment.py プロジェクト: kjanoudi/Ax
 def optimization_config(self,
                         optimization_config: OptimizationConfig) -> None:
     if (len(self.trials) > 0
             and getattr(self, "_optimization_config", None) is not None
             and self.immutable_search_space_and_opt_config):
         raise UnsupportedError(
             "Modifications of optimization config are disabled by the "
             f"`{Keys.IMMUTABLE_SEARCH_SPACE_AND_OPT_CONF.value}` "
             "property that is set to `True` on this experiment.")
     for metric_name in optimization_config.metrics.keys():
         if metric_name in self._tracking_metrics:
             self.remove_tracking_metric(metric_name)
     self._optimization_config = optimization_config
     if (isinstance(optimization_config.objective.metrics[0], MapMetric)
             and self._default_data_type is not DataType.MAP_DATA):
         self._default_data_type = DataType.MAP_DATA
コード例 #11
0
    def _check_validity_and_get_data(
            self, experiment: Experiment) -> Optional[MapData]:
        """Validity checks and returns the `MapData` used for early stopping."""
        if experiment.optimization_config is None:
            raise UnsupportedError(  # pragma: no cover
                "Experiment must have an optimization config in order to use an "
                "early stopping strategy.")

        optimization_config = not_none(experiment.optimization_config)
        objective_name = optimization_config.objective.metric.name

        data = experiment.lookup_data()
        if data.df.empty:
            logger.info(f"{self.__class__.__name__} received empty data. "
                        "Not stopping any trials.")
            return None
        if objective_name not in set(data.df["metric_name"]):
            logger.info(f"{self.__class__.__name__} did not receive data "
                        "from the objective metric. Not stopping any trials.")
            return None

        if not isinstance(data, MapData):
            logger.info(
                f"{self.__class__.__name__} expects MapData, but the "
                f"data attached to experiment is of type {type(data)}. "
                "Not stopping any trials.")
            return None

        data = checked_cast(MapData, data)
        map_keys = data.map_keys
        if len(list(map_keys)) > 1:
            logger.info(
                f"{self.__class__.__name__} expects MapData with a single "
                "map key, but the data attached to the experiment has multiple: "
                f"{data.map_keys}. Not stopping any trials.")
            return None
        return data
コード例 #12
0
ファイル: best_point.py プロジェクト: kjanoudi/Ax
def get_pareto_optimal_parameters(
    experiment: Experiment,
    generation_strategy: GenerationStrategy,
    use_model_predictions: bool = True,
) -> Optional[Dict[int, Tuple[TParameterization, TModelPredictArm]]]:
    """Identifies the best parameterizations tried in the experiment so far,
    using model predictions if ``use_model_predictions`` is true and using
    observed values from the experiment otherwise. By default, uses model
    predictions to account for observation noise.

    NOTE: The format of this method's output is as follows:
    { trial_index --> (parameterization, (means, covariances) }, where means
    are a dictionary of form { metric_name --> metric_mean } and covariances
    are a nested dictionary of form
    { one_metric_name --> { another_metric_name: covariance } }.

    Args:
        experiment: Experiment, from which to find Pareto-optimal arms.
        generation_strategy: Generation strategy containing the modelbridge.
        use_model_predictions: Whether to extract the Pareto frontier using
            model predictions or directly observed values. If ``True``,
            the metric means and covariances in this method's output will
            also be based on model predictions and may differ from the
            observed values.

    Returns:
        ``None`` if it was not possible to extract the Pareto frontier,
        otherwise a mapping from trial index to the tuple of:
        - the parameterization of the arm in that trial,
        - two-item tuple of metric means dictionary and covariance matrix
            (model-predicted if ``use_model_predictions=True`` and observed
            otherwise).
    """
    # Validate aspects of the experiment: that it is a MOO experiment and
    # that the current model can be used to produce the Pareto frontier.
    if not not_none(experiment.optimization_config).is_moo_problem:
        raise UnsupportedError(
            "Please use `get_best_parameters` for single-objective problems.")

    moo_optimization_config = checked_cast(MultiObjectiveOptimizationConfig,
                                           experiment.optimization_config)
    if moo_optimization_config.outcome_constraints:
        # TODO[drfreund]: Test this flow and remove error.
        raise NotImplementedError(
            "Support for outcome constraints is currently under development.")

    # Extract or instantiate modelbridge to use for Pareto frontier extraction.
    mb = generation_strategy.model
    if mb is None or not isinstance(mb, MultiObjectiveTorchModelBridge):
        logger.info(
            "Can only extract a Pareto frontier using a multi-objective model bridge"
            f", but currently used model bridge is: {mb} of type {type(mb)}. Will "
            "use `Models.MOO` instead to extract Pareto frontier.")
        mb = checked_cast(
            MultiObjectiveTorchModelBridge,
            Models.MOO(experiment=experiment,
                       data=checked_cast(Data, experiment.lookup_data())),
        )
    else:
        # Make sure the model is up-to-date with the most recent data.
        generation_strategy._set_or_update_current_model(data=None)

    # If objective thresholds are not specified in optimization config, extract
    # the inferred ones if possible or infer them anew if not.
    objective_thresholds_override = None
    if not moo_optimization_config.objective_thresholds:
        lgr = generation_strategy.last_generator_run
        if lgr and lgr.gen_metadata and "objective_thresholds" in lgr.gen_metadata:
            objective_thresholds_override = lgr.gen_metadata[
                "objective_thresholds"]
        objective_thresholds_override = mb.infer_objective_thresholds(
            search_space=experiment.search_space,
            optimization_config=experiment.optimization_config,
            fixed_features=None,
        )
        logger.info(
            f"Using inferred objective thresholds: {objective_thresholds_override}, "
            "as objective thresholds were not specified as part of the optimization "
            "configuration on the experiment.")

    # Extract the Pareto frontier and format it as follows:
    # { trial_index --> (parameterization, (means, covariances) }
    pareto_util = predicted_pareto if use_model_predictions else observed_pareto
    pareto_optimal_observations = pareto_util(
        modelbridge=mb, objective_thresholds=objective_thresholds_override)
    return {
        int(not_none(obs.features.trial_index)): (
            obs.features.parameters,
            (obs.data.means_dict, obs.data.covariance_matrix),
        )
        for obs in pareto_optimal_observations
    }
コード例 #13
0
def compute_pareto_frontier(
    experiment: Experiment,
    primary_objective: Metric,
    secondary_objective: Metric,
    data: Optional[Data] = None,
    outcome_constraints: Optional[List[OutcomeConstraint]] = None,
    absolute_metrics: Optional[List[str]] = None,
    num_points: int = 10,
    trial_index: Optional[int] = None,
    chebyshev: bool = True,
) -> ParetoFrontierResults:
    """Compute the Pareto frontier between two objectives. For experiments
    with batch trials, a trial index or data object must be provided.

    Args:
        experiment: The experiment to compute a pareto frontier for.
        primary_objective: The primary objective to optimize.
        secondary_objective: The secondary objective against which
            to trade off the primary objective.
        outcome_constraints: Outcome
            constraints to be respected by the optimization. Can only contain
            constraints on metrics that are not primary or secondary objectives.
        absolute_metrics: List of outcome metrics that
            should NOT be relativized w.r.t. the status quo (all other outcomes
            will be in % relative to status_quo).
        num_points: The number of points to compute on the
            Pareto frontier.
        chebyshev: Whether to use augmented_chebyshev_scalarization
            when computing Pareto Frontier points.

    Returns:
        ParetoFrontierResults: A NamedTuple with the following fields:
            - param_dicts: The parameter dicts of the
                points generated on the Pareto Frontier.
            - means: The posterior mean predictions of
                the model for each metric (same order as the param dicts).
            - sems: The posterior sem predictions of
                the model for each metric (same order as the param dicts).
            - primary_metric: The name of the primary metric.
            - secondary_metric: The name of the secondary metric.
            - absolute_metrics: List of outcome metrics that
                are NOT be relativized w.r.t. the status quo (all other metrics
                are in % relative to status_quo).
    """
    # TODO(jej): Implement using MultiObjectiveTorchModelBridge's _pareto_frontier
    model_gen_options = {
        "acquisition_function_kwargs": {
            "chebyshev_scalarization": chebyshev
        }
    }

    if (trial_index is None and data is None and any(
            isinstance(t, BatchTrial) for t in experiment.trials.values())):
        raise UnsupportedError(
            "Must specify trial index or data for experiment with batch trials"
        )
    absolute_metrics = [] if absolute_metrics is None else absolute_metrics
    for metric in absolute_metrics:
        if metric not in experiment.metrics:
            raise ValueError(f"Model was not fit on metric `{metric}`")

    if outcome_constraints is None:
        outcome_constraints = []
    else:
        # ensure we don't constrain an objective
        _validate_outcome_constraints(
            outcome_constraints=outcome_constraints,
            primary_objective=primary_objective,
            secondary_objective=secondary_objective,
        )

    # build posterior mean model
    if not data:
        try:
            data = (experiment.trials[trial_index].fetch_data()
                    if trial_index else experiment.fetch_data())
        except Exception as e:
            logger.info(f"Could not fetch data from experiment or trial: {e}")

    oc = _build_new_optimization_config(
        weights=np.array([0.5, 0.5]),
        primary_objective=primary_objective,
        secondary_objective=secondary_objective,
        outcome_constraints=outcome_constraints,
    )
    model = Models.MOO(
        experiment=experiment,
        data=data,
        acqf_constructor=get_PosteriorMean,
        optimization_config=oc,
    )

    status_quo = experiment.status_quo
    if status_quo:
        try:
            status_quo_prediction = model.predict([
                ObservationFeatures(
                    parameters=status_quo.parameters,
                    # pyre-fixme [6]: Expected `Optional[np.int64]` for trial_index
                    trial_index=trial_index,
                )
            ])
        except ValueError as e:
            logger.warning(f"Could not predict OOD status_quo outcomes: {e}")
            status_quo = None
            status_quo_prediction = None
    else:
        status_quo_prediction = None

    param_dicts: List[TParameterization] = []

    # Construct weightings with linear angular spacing.
    # TODO: Verify whether 0, 1 weights cause problems because of subset_model.
    alpha = np.linspace(0 + 0.01, np.pi / 2 - 0.01, num_points)
    primary_weight = (-1 if primary_objective.lower_is_better else
                      1) * np.cos(alpha)
    secondary_weight = (-1 if secondary_objective.lower_is_better else
                        1) * np.sin(alpha)
    weights_list = np.stack([primary_weight, secondary_weight]).transpose()
    for weights in weights_list:
        outcome_constraints = outcome_constraints
        oc = _build_new_optimization_config(
            weights=weights,
            primary_objective=primary_objective,
            secondary_objective=secondary_objective,
            outcome_constraints=outcome_constraints,
        )
        # TODO: (jej) T64002590 Let this serve as a starting point for optimization.
        # ex. Add global spacing criterion. Implement on BoTorch side.
        # pyre-fixme [6]: Expected different type for model_gen_options
        run = model.gen(1,
                        model_gen_options=model_gen_options,
                        optimization_config=oc)
        param_dicts.append(run.arms[0].parameters)

    # Call predict on points to get their decomposed metrics.
    means, cov = model.predict(
        [ObservationFeatures(parameters) for parameters in param_dicts])

    return _extract_pareto_frontier_results(
        param_dicts=param_dicts,
        means=means,
        variances=cov,
        primary_metric=primary_objective.name,
        secondary_metric=secondary_objective.name,
        absolute_metrics=absolute_metrics,
        outcome_constraints=outcome_constraints,
        status_quo_prediction=status_quo_prediction,
    )
コード例 #14
0
 def map_df(self, df: pd.DataFrame):
     raise UnsupportedError(
         "MapData's underlying DataFrame is immutable; create a new" +
         " MapData via `__init__` or `from_multiple_data`.")
コード例 #15
0
    def should_stop_trials_early(
        self,
        trial_indices: Set[int],
        experiment: Experiment,
        **kwargs: Dict[str, Any],
    ) -> Dict[int, Optional[str]]:
        """Stop a trial if its performance is in the bottom `percentile_threshold`
        of the trials at the same step.

        Args:
            trial_indices: Indices of candidate trials to consider for early stopping.
            experiment: Experiment that contains the trials and other contextual data.

        Returns:
            A dictionary mapping trial indices that should be early stopped to
            (optional) messages with the associated reason. An empty dictionary
            means no suggested updates to any trial's status.
        """
        if experiment.optimization_config is None:
            raise UnsupportedError(  # pragma: no cover
                "Experiment must have an optimization config in order to use an "
                "early stopping strategy."
            )

        optimization_config = not_none(experiment.optimization_config)
        objective_name = optimization_config.objective.metric.name
        minimize = optimization_config.objective.minimize

        data = experiment.lookup_data(keep_latest_map_values_only=False)
        if data.df.empty:
            logger.info(
                "PercentileEarlyStoppingStrategy received empty data. "
                "Not stopping any trials."
            )
            return {}

        if not isinstance(data, MapData):
            raise ValueError(
                "PercentileEarlyStoppingStrategy expects MapData, but the "
                f"data attached to experiment is of type {type(data)}."
            )

        map_keys = data.map_keys
        if len(map_keys) > 1:
            raise ValueError(  # pragma: no cover
                "PercentileEarlyStoppingStrategy expects MapData with a single "
                "map key, but the data attached to the experiment has multiple: "
                f"{data.map_keys}."
            )
        map_key = map_keys[0]

        df = data.df
        metric_to_aligned_means, _ = align_partial_results(
            df=df,
            progr_key=map_key,
            metrics=[objective_name],
        )
        aligned_means = metric_to_aligned_means[objective_name]
        decisions = {
            trial_index: self.should_stop_trial_early(
                trial_index=trial_index,
                experiment=experiment,
                df=aligned_means,
                percentile_threshold=self.percentile_threshold,
                map_key=map_key,
                minimize=minimize,
            )
            for trial_index in trial_indices
        }
        return {
            trial_index: reason
            for trial_index, (should_stop, reason) in decisions.items()
            if should_stop
        }
コード例 #16
0
ファイル: instantiation.py プロジェクト: bletham/Ax
def make_experiment(
    parameters: List[TParameterRepresentation],
    name: Optional[str] = None,
    parameter_constraints: Optional[List[str]] = None,
    outcome_constraints: Optional[List[str]] = None,
    status_quo: Optional[TParameterization] = None,
    experiment_type: Optional[str] = None,
    # Single-objective optimization arguments:
    objective_name: Optional[str] = None,
    minimize: bool = False,
    # Multi-objective optimization arguments:
    objectives: Optional[Dict[str, str]] = None,
    objective_thresholds: Optional[List[str]] = None,
) -> Experiment:
    """Instantiation wrapper that allows for Ax `Experiment` creation
    without importing or instantiating any Ax classes.

    Args:
        parameters: List of dictionaries representing parameters in the
            experiment search space.
            Required elements in the dictionaries are:
            1. "name" (name of parameter, string),
            2. "type" (type of parameter: "range", "fixed", or "choice", string),
            and one of the following:
            3a. "bounds" for range parameters (list of two values, lower bound
            first),
            3b. "values" for choice parameters (list of values), or
            3c. "value" for fixed parameters (single value).
            Optional elements are:
            1. "log_scale" (for float-valued range parameters, bool),
            2. "value_type" (to specify type that values of this parameter should
            take; expects "float", "int", "bool" or "str"),
            3. "is_fidelity" (bool) and "target_value" (float) for fidelity
            parameters,
            4. "is_ordered" (bool) for choice parameters,
            5. "is_task" (bool) for task parameters, and
            6. "digits" (int) for float-valued range parameters.
        name: Name of the experiment to be created.
        parameter_constraints: List of string representation of parameter
            constraints, such as "x3 >= x4" or "-x3 + 2*x4 - 3.5*x5 >= 2". For
            the latter constraints, any number of arguments is accepted, and
            acceptable operators are "<=" and ">=".
        parameter_constraints: List of string representation of parameter
                constraints, such as "x3 >= x4" or "-x3 + 2*x4 - 3.5*x5 >= 2". For
                the latter constraints, any number of arguments is accepted, and
                acceptable operators are "<=" and ">=".
        outcome_constraints: List of string representation of outcome
            constraints of form "metric_name >= bound", like "m1 <= 3."
        status_quo: Parameterization of the current state of the system.
            If set, this will be added to each trial to be evaluated alongside
            test configurations.
        experiment_type: String indicating type of the experiment (e.g. name of
            a product in which it is used), if any.
        objective_name: Name of the metric used as objective in this experiment,
            if experiment is single-objective optimization.
        minimize: Whether this experiment represents a minimization problem, if
            experiment is a single-objective optimization.
        objectives: Mapping from an objective name to "minimize" or "maximize"
            representing the direction for that objective. Used only for
            multi-objective optimization experiments.
        objective_thresholds: A list of objective threshold constraints for multi-
            objective optimization, in the same string format as `outcome_constraints`
            argument.
    """
    if objective_name is not None and (objectives is not None
                                       or objective_thresholds is not None):
        raise UnsupportedError(
            "Ambiguous objective definition: for single-objective optimization "
            "`objective_name` and `minimize` arguments expected. For multi-objective "
            "optimization `objectives` and `objective_thresholds` arguments expected."
        )

    status_quo_arm = None if status_quo is None else Arm(parameters=status_quo)

    if objectives is None:
        optimization_config = OptimizationConfig(
            objective=Objective(
                metric=Metric(
                    name=objective_name or DEFAULT_OBJECTIVE_NAME,
                    lower_is_better=minimize,
                ),
                minimize=minimize,
            ),
            outcome_constraints=make_outcome_constraints(
                outcome_constraints or [], status_quo_arm is not None),
        )
    else:
        optimization_config = make_optimization_config(
            objectives,
            objective_thresholds or [],
            outcome_constraints or [],
            status_quo_arm is not None,
        )

    return Experiment(
        name=name,
        search_space=make_search_space(parameters, parameter_constraints
                                       or []),
        optimization_config=optimization_config,
        status_quo=status_quo_arm,
        experiment_type=experiment_type,
    )
コード例 #17
0
ファイル: instantiation.py プロジェクト: proteanblank/Ax
def make_experiment(
    parameters: List[TParameterRepresentation],
    name: Optional[str] = None,
    parameter_constraints: Optional[List[str]] = None,
    outcome_constraints: Optional[List[str]] = None,
    status_quo: Optional[TParameterization] = None,
    experiment_type: Optional[str] = None,
    tracking_metric_names: Optional[List[str]] = None,
    # Single-objective optimization arguments:
    objective_name: Optional[str] = None,
    minimize: bool = False,
    # Multi-objective optimization arguments:
    objectives: Optional[Dict[str, str]] = None,
    objective_thresholds: Optional[List[str]] = None,
    support_intermediate_data: bool = False,
    immutable_search_space_and_opt_config: bool = True,
    is_test: bool = False,
) -> Experiment:
    """Instantiation wrapper that allows for Ax `Experiment` creation
    without importing or instantiating any Ax classes.

    Args:
        parameters: List of dictionaries representing parameters in the
            experiment search space.
            Required elements in the dictionaries are:
            1. "name" (name of parameter, string),
            2. "type" (type of parameter: "range", "fixed", or "choice", string),
            and one of the following:
            3a. "bounds" for range parameters (list of two values, lower bound
            first),
            3b. "values" for choice parameters (list of values), or
            3c. "value" for fixed parameters (single value).
            Optional elements are:
            1. "log_scale" (for float-valued range parameters, bool),
            2. "value_type" (to specify type that values of this parameter should
            take; expects "float", "int", "bool" or "str"),
            3. "is_fidelity" (bool) and "target_value" (float) for fidelity
            parameters,
            4. "is_ordered" (bool) for choice parameters,
            5. "is_task" (bool) for task parameters, and
            6. "digits" (int) for float-valued range parameters.
        name: Name of the experiment to be created.
        parameter_constraints: List of string representation of parameter
            constraints, such as "x3 >= x4" or "-x3 + 2*x4 - 3.5*x5 >= 2". For
            the latter constraints, any number of arguments is accepted, and
            acceptable operators are "<=" and ">=".
        parameter_constraints: List of string representation of parameter
                constraints, such as "x3 >= x4" or "-x3 + 2*x4 - 3.5*x5 >= 2". For
                the latter constraints, any number of arguments is accepted, and
                acceptable operators are "<=" and ">=".
        outcome_constraints: List of string representation of outcome
            constraints of form "metric_name >= bound", like "m1 <= 3."
        status_quo: Parameterization of the current state of the system.
            If set, this will be added to each trial to be evaluated alongside
            test configurations.
        experiment_type: String indicating type of the experiment (e.g. name of
            a product in which it is used), if any.
        tracking_metric_names: Names of additional tracking metrics not used for
            optimization.
        objective_name: Name of the metric used as objective in this experiment,
            if experiment is single-objective optimization.
        minimize: Whether this experiment represents a minimization problem, if
            experiment is a single-objective optimization.
        objectives: Mapping from an objective name to "minimize" or "maximize"
            representing the direction for that objective. Used only for
            multi-objective optimization experiments.
        objective_thresholds: A list of objective threshold constraints for multi-
            objective optimization, in the same string format as `outcome_constraints`
            argument.
        support_intermediate_data: Whether trials may report metrics results for
            incomplete runs.
        immutable_search_space_and_opt_config: Whether it's possible to update the
            search space and optimization config on this experiment after creation.
            Defaults to True. If set to True, we won't store or load copies of the
            search space and optimization config on each generator run, which will
            improve storage performance.
        is_test: Whether this experiment will be a test experiment (useful for
            marking test experiments in storage etc). Defaults to False.
    """
    if objective_name is not None and (
        objectives is not None or objective_thresholds is not None
    ):
        raise UnsupportedError(
            "Ambiguous objective definition: for single-objective optimization "
            "`objective_name` and `minimize` arguments expected. For multi-objective "
            "optimization `objectives` and `objective_thresholds` arguments expected."
        )

    status_quo_arm = None if status_quo is None else Arm(parameters=status_quo)

    # TODO(jej): Needs to be decided per-metric when supporting heterogenous data.
    metric_cls = MapMetric if support_intermediate_data else Metric
    if objectives is None:
        optimization_config = OptimizationConfig(
            objective=Objective(
                metric=metric_cls(
                    name=objective_name or DEFAULT_OBJECTIVE_NAME,
                    lower_is_better=minimize,
                ),
                minimize=minimize,
            ),
            outcome_constraints=make_outcome_constraints(
                outcome_constraints or [], status_quo_arm is not None
            ),
        )
    else:
        optimization_config = make_optimization_config(
            objectives,
            objective_thresholds or [],
            outcome_constraints or [],
            status_quo_arm is not None,
        )

    tracking_metrics = (
        None
        if tracking_metric_names is None
        else [Metric(name=metric_name) for metric_name in tracking_metric_names]
    )

    default_data_type = (
        DataType.MAP_DATA if support_intermediate_data else DataType.DATA
    )

    immutable_ss_and_oc = immutable_search_space_and_opt_config
    properties = (
        {}
        if not immutable_search_space_and_opt_config
        else {Keys.IMMUTABLE_SEARCH_SPACE_AND_OPT_CONF.value: immutable_ss_and_oc}
    )

    return Experiment(
        name=name,
        search_space=make_search_space(parameters, parameter_constraints or []),
        optimization_config=optimization_config,
        status_quo=status_quo_arm,
        experiment_type=experiment_type,
        tracking_metrics=tracking_metrics,
        default_data_type=default_data_type,
        properties=properties,
        is_test=is_test,
    )
コード例 #18
0
ファイル: experiment.py プロジェクト: Balandat/Ax
    def attach_data(
        self,
        data: Data,
        combine_with_last_data: bool = False,
        overwrite_existing_data: bool = False,
    ) -> int:
        """Attach data to experiment. Stores data in `experiment._data_by_trial`,
        to be looked up via `experiment.lookup_data_for_trial`.

        Args:
            data: Data object to store.
            combine_with_last_data: By default, when attaching data, it's identified
                by its timestamp, and `experiment.lookup_data_for_trial` returns
                data by most recent timestamp. Sometimes, however, we want to combine
                the data from multiple calls to `attach_data` into one dataframe.
                This might be because:
                    - We attached data for some metrics at one point and data for
                    the rest of the metrics later on.
                    - We attached data for some fidelity at one point and data for
                    another fidelity later one.
                To achieve that goal, set `combine_with_last_data` to `True`.
                In this case, we will take the most recent previously attached
                data, append the newly attached data to it, attach a new
                Data object with the merged result, and delete the old one.
                Afterwards, calls to `lookup_data_for_trial` will return this
                new combined data object. This operation will also validate that the
                newly added data does not contain observations for metrics that
                already have observations at the same fidelity in the most recent data.
            overwrite_existing_data: By default, we keep around all data that has
                ever been attached to the experiment. However, if we know that
                the incoming data contains all the information we need for a given
                trial, we can replace the existing data for that trial, thereby
                reducing the amount we need to store in the database.

        Returns:
            Timestamp of storage in millis.
        """
        if combine_with_last_data and overwrite_existing_data:
            raise UnsupportedError(
                "Cannot set both combine_with_last_data=True and "
                "overwrite_existing_data=True. Data can either be "
                "combined, or overwritten, or neither.")
        data_type = type(data)
        data_init_args = data.serialize_init_args(data)
        if data.df.empty:
            raise ValueError("Data to attach is empty.")
        metrics_not_on_exp = set(data.true_df["metric_name"].values) - set(
            self.metrics.keys())
        if metrics_not_on_exp:
            logger.info(
                f"Attached data has some metrics ({metrics_not_on_exp}) that are "
                "not among the metrics on this experiment. Note that attaching data "
                "will not automatically add those metrics to the experiment. "
                "For these metrics to be automatically fetched by `experiment."
                "fetch_data`, add them via `experiment.add_tracking_metric` or update "
                "the experiment's optimization config.")
        cur_time_millis = current_timestamp_in_millis()
        for trial_index, trial_df in data.true_df.groupby(
                data.true_df["trial_index"]):
            current_trial_data = (self._data_by_trial[trial_index]
                                  if trial_index in self._data_by_trial else
                                  OrderedDict())
            if combine_with_last_data and len(current_trial_data) > 0:
                last_ts, last_data = list(current_trial_data.items())[-1]
                last_data_type = type(last_data)
                merge_keys = ["trial_index", "metric_name", "arm_name"
                              ] + (last_data.map_keys if issubclass(
                                  last_data_type, MapData) else [])
                merged = pd.merge(
                    last_data.true_df,
                    trial_df,
                    on=merge_keys,
                    how="inner",
                )
                if not merged.empty:
                    raise ValueError(
                        f"Last data for trial {trial_index} already contained an "
                        f"observation for metric {merged.head()['metric_name']}."
                    )
                del current_trial_data[last_ts]
                current_trial_data[
                    cur_time_millis] = last_data_type.from_multiple_data([
                        last_data,
                        last_data_type(trial_df, **data_init_args),
                    ])
            elif overwrite_existing_data:
                if len(current_trial_data) > 0:
                    _, last_data = list(current_trial_data.items())[-1]
                    last_data_metrics = set(last_data.df["metric_name"])
                    new_data_metrics = set(trial_df["metric_name"])
                    if last_data_metrics.difference(new_data_metrics):
                        raise ValueError(
                            "overwrite_trial_data is True, but the new data contains "
                            "only a subset of the metrics that are present in the "
                            "previous data.")
                current_trial_data = OrderedDict(
                    {cur_time_millis: data_type(trial_df, **data_init_args)})
            else:
                current_trial_data[cur_time_millis] = data_type(
                    trial_df, **data_init_args)
            self._data_by_trial[trial_index] = current_trial_data

        return cur_time_millis