Esempio n. 1
0
 def trials_as_df(self) -> Optional[pd.DataFrame]:
     """Puts information on individual trials into a data frame for easy
     viewing. For example:
     Gen. Step | Model | Trial Index | Trial Status | Arm Parameterizations
     0         | Sobol | 0           | RUNNING      | {"0_0":{"x":9.17...}}
     """
     logger.info(
         "Note that parameter values in dataframe are rounded to 2 decimal "
         "points; the values in the dataframe are thus not the exact ones "
         "suggested by Ax in trials.")
     if self._experiment is None or all(
             len(trials) == 0
             for trials in self.trial_indices_by_step.values()):
         return None
     records = [{
         "Generation Step": step_idx,
         "Generation Model": self._steps[step_idx].model_name,
         "Trial Index": trial_idx,
         "Trial Status": self.experiment.trials[trial_idx].status.name,
         "Arm Parameterizations": {
             arm.name: _round_floats_for_logging(arm.parameters)
             for arm in self.experiment.trials[trial_idx].arms
         },
     } for step_idx, trials in self.trial_indices_by_step.items()
                for trial_idx in trials]
     return pd.DataFrame.from_records(records).reindex(columns=[
         "Generation Step",
         "Generation Model",
         "Trial Index",
         "Trial Status",
         "Arm Parameterizations",
     ])
Esempio n. 2
0
    def update_trial_data(
        self,
        trial_index: int,
        raw_data: TEvaluationOutcome,
        metadata: Optional[Dict[str, Union[str, int]]] = None,
        sample_size: Optional[int] = None,
    ) -> None:
        """
        Attaches additional data for completed trial (for example, if trial was
        completed with data for only one of the required metrics and more data
        needs to be attached).

        Args:
            trial_index: Index of trial within the experiment.
            raw_data: Evaluation data for the trial. Can be a mapping from
                metric name to a tuple of mean and SEM, just a tuple of mean and
                SEM if only one metric in optimization, or just the mean if there
                is no SEM.  Can also be a list of (fidelities, mapping from
                metric name to a tuple of mean and SEM).
            metadata: Additional metadata to track about this run.
            sample_size: Number of samples collected for the underlying arm,
                optional.
        """
        assert isinstance(
            trial_index, int
        ), f"Trial index must be an int, got: {trial_index}."  # pragma: no cover
        trial = self._get_trial(trial_index=trial_index)
        if not trial.status.is_completed:
            raise ValueError(
                f"Trial {trial.index} has not yet been completed with data."
                "To complete it, use `ax_client.complete_trial`.")
        sample_sizes = {
            not_none(trial.arm).name: sample_size
        } if sample_size else {}
        evaluations, data = self._make_evaluations_and_data(
            trial=trial,
            raw_data=raw_data,
            metadata=metadata,
            sample_sizes=sample_sizes)
        trial._run_metadata.update(metadata or {})
        # Registering trial data update is needed for generation strategies that
        # leverage the `update` functionality of model and bridge setup and therefore
        # need to be aware of new data added to experiment. Usually this happends
        # seamlessly, by looking at newly completed trials, but in this case trial
        # status does not change, so we manually register the new data.
        # Currently this call will only result in a `NotImplementedError` if generation
        # strategy uses `update` (`GenerationStep.use_update` is False by default).
        self.generation_strategy._register_trial_data_update(trial=trial,
                                                             data=data)
        self.experiment.attach_data(data, combine_with_last_data=True)
        data_for_logging = _round_floats_for_logging(
            item=evaluations[next(iter(evaluations.keys()))])
        logger.info(
            f"Added data: {_round_floats_for_logging(item=data_for_logging)} "
            f"to trial {trial.index}.")
        self._save_experiment_to_db_if_possible(
            experiment=self.experiment,
            suppress_all_errors=self._suppress_storage_errors,
        )
Esempio n. 3
0
    def complete_trial(
        self,
        trial_index: int,
        raw_data: TEvaluationOutcome,
        metadata: Optional[Dict[str, Union[str, int]]] = None,
        sample_size: Optional[int] = None,
    ) -> None:
        """
        Completes the trial with given metric values and adds optional metadata
        to it.

        Args:
            trial_index: Index of trial within the experiment.
            raw_data: Evaluation data for the trial. Can be a mapping from
                metric name to a tuple of mean and SEM, just a tuple of mean and
                SEM if only one metric in optimization, or just the mean if there
                is no SEM.  Can also be a list of (fidelities, mapping from
                metric name to a tuple of mean and SEM).
            metadata: Additional metadata to track about this run.
            sample_size: Number of samples collected for the underlying arm,
                optional.
        """
        # Validate that trial can be completed.
        if not isinstance(trial_index, int):  # pragma: no cover
            raise ValueError(
                f"Trial index must be an int, got: {trial_index}.")
        trial = self._get_trial(trial_index=trial_index)
        self._validate_can_complete_trial(trial=trial)

        # Format the data to save.
        sample_sizes = {
            not_none(trial.arm).name: sample_size
        } if sample_size else {}
        evaluations, data = self._make_evaluations_and_data(
            trial=trial,
            raw_data=raw_data,
            metadata=metadata,
            sample_sizes=sample_sizes)
        trial._run_metadata = metadata or {}
        for metric_name in data.df["metric_name"].values:
            if metric_name not in self.experiment.metrics:
                logger.info(
                    f"Data was logged for metric {metric_name} that was not yet "
                    "tracked on the experiment. Adding it as tracking metric.")
                self.experiment.add_tracking_metric(Metric(name=metric_name))
        self.experiment.attach_data(data=data)
        trial.mark_completed()
        data_for_logging = _round_floats_for_logging(
            item=evaluations[next(iter(evaluations.keys()))])
        logger.info(f"Completed trial {trial_index} with data: "
                    f"{_round_floats_for_logging(item=data_for_logging)}.")
        self._save_updated_trial_to_db_if_possible(
            experiment=self.experiment,
            trial=trial,
            suppress_all_errors=self._suppress_storage_errors,
        )
Esempio n. 4
0
    def complete_trial(
        self,
        trial_index: int,
        raw_data: TEvaluationOutcome,
        metadata: Optional[Dict[str, Union[str, int]]] = None,
        sample_size: Optional[int] = None,
    ) -> None:
        """
        Completes the trial with given metric values and adds optional metadata
        to it.

        Args:
            trial_index: Index of trial within the experiment.
            raw_data: Evaluation data for the trial. Can be a mapping from
                metric name to a tuple of mean and SEM, just a tuple of mean and
                SEM if only one metric in optimization, or just the mean if there
                is no SEM.  Can also be a list of (fidelities, mapping from
                metric name to a tuple of mean and SEM).
            metadata: Additional metadata to track about this run.
            sample_size: Number of samples collected for the underlying arm,
                optional.
        """
        assert isinstance(
            trial_index, int
        ), f"Trial index must be an int, got: {trial_index}."  # pragma: no cover
        trial = self._get_trial(trial_index=trial_index)
        if metadata is not None:
            trial._run_metadata = metadata

        arm_name = not_none(trial.arm).name
        evaluations = {
            arm_name:
            raw_data_to_evaluation(raw_data=raw_data,
                                   objective_name=self.objective_name)
        }
        sample_sizes = {arm_name: sample_size} if sample_size else {}
        data = data_from_evaluations(
            evaluations=evaluations,
            trial_index=trial.index,
            sample_sizes=sample_sizes,
            start_time=(checked_cast_optional(int, metadata.get("start_time"))
                        if metadata is not None else None),
            end_time=(checked_cast_optional(int, metadata.get("end_time"))
                      if metadata is not None else None),
        )
        # In service API, a trial may be completed multiple times (for multiple
        # metrics, for example).
        trial.mark_completed(allow_repeat_completion=True)
        self.experiment.attach_data(data)
        data_for_logging = _round_floats_for_logging(
            item=evaluations[next(iter(evaluations.keys()))])
        logger.info(f"Completed trial {trial_index} with data: "
                    f"{_round_floats_for_logging(item=data_for_logging)}.")
        self._updated_trials.append(trial_index)
        self._save_experiment_and_generation_strategy_to_db_if_possible()
Esempio n. 5
0
    def update_trial_data(
        self,
        trial_index: int,
        raw_data: TEvaluationOutcome,
        metadata: Optional[Dict[str, Union[str, int]]] = None,
        sample_size: Optional[int] = None,
    ) -> None:
        """
        Attaches additional data for completed trial (for example, if trial was
        completed with data for only one of the required metrics and more data
        needs to be attached).

        Args:
            trial_index: Index of trial within the experiment.
            raw_data: Evaluation data for the trial. Can be a mapping from
                metric name to a tuple of mean and SEM, just a tuple of mean and
                SEM if only one metric in optimization, or just the mean if there
                is no SEM.  Can also be a list of (fidelities, mapping from
                metric name to a tuple of mean and SEM).
            metadata: Additional metadata to track about this run.
            sample_size: Number of samples collected for the underlying arm,
                optional.
        """
        assert isinstance(
            trial_index, int
        ), f"Trial index must be an int, got: {trial_index}."  # pragma: no cover
        trial = self._get_trial(trial_index=trial_index)
        if not trial.status.is_completed:
            raise ValueError(
                f"Trial {trial.index} has not yet been completed with data."
                "To complete it, use `ax_client.complete_trial`.")
        sample_sizes = {
            not_none(trial.arm).name: sample_size
        } if sample_size else {}
        evaluations, data = self._make_evaluations_and_data(
            trial=trial,
            raw_data=raw_data,
            metadata=metadata,
            sample_sizes=sample_sizes)
        trial._run_metadata.update(metadata or {})
        self.experiment.attach_data(data, combine_with_last_data=True)
        data_for_logging = _round_floats_for_logging(
            item=evaluations[next(iter(evaluations.keys()))])
        logger.info(
            f"Added data: {_round_floats_for_logging(item=data_for_logging)} "
            f"to trial {trial.index}.")
        self._save_experiment_and_maybe_generation_strategy_to_db_if_possible(
            save_generation_strategy=
            False,  # No changes were made to gen. strategy.
            suppress_all_errors=self._suppress_storage_errors,
        )
Esempio n. 6
0
    def complete_trial(
        self,
        trial_index: int,
        raw_data: TEvaluationOutcome,
        metadata: Optional[Dict[str, Union[str, int]]] = None,
        sample_size: Optional[int] = None,
    ) -> None:
        """
        Completes the trial with given metric values and adds optional metadata
        to it.

        NOTE: When ``raw_data`` does not specify SEM for a given metric, Ax
        will default to the assumption that the data is noisy (specifically,
        corrupted by additive zero-mean Gaussian noise) and that the
        level of noise should be inferred by the optimization model. To
        indicate that the data is noiseless, set SEM to 0.0, for example:

        .. code-block:: python

          ax_client.complete_trial(
              trial_index=0,
              raw_data={"my_objective": (objective_mean_value, 0.0)}
          )

        Args:
            trial_index: Index of trial within the experiment.
            raw_data: Evaluation data for the trial. Can be a mapping from
                metric name to a tuple of mean and SEM, just a tuple of mean and
                SEM if only one metric in optimization, or just the mean if SEM is
                unknown (then Ax will infer observation noise level).
                Can also be a list of (fidelities, mapping from
                metric name to a tuple of mean and SEM).
            metadata: Additional metadata to track about this run.
            sample_size: Number of samples collected for the underlying arm,
                optional.
        """
        # Validate that trial can be completed.
        if not isinstance(trial_index, int):  # pragma: no cover
            raise ValueError(f"Trial index must be an int, got: {trial_index}.")
        trial = self._get_trial(trial_index=trial_index)
        self._validate_can_complete_trial(trial=trial)

        # Format the data to save.
        sample_sizes = {not_none(trial.arm).name: sample_size} if sample_size else {}
        evaluations, data = self._make_evaluations_and_data(
            trial=trial, raw_data=raw_data, metadata=metadata, sample_sizes=sample_sizes
        )
        self._validate_trial_data(trial=trial, data=data)
        trial._run_metadata = metadata or {}

        self.experiment.attach_data(data=data)
        trial.mark_completed()
        data_for_logging = _round_floats_for_logging(
            item=evaluations[next(iter(evaluations.keys()))]
        )
        logger.info(
            f"Completed trial {trial_index} with data: "
            f"{_round_floats_for_logging(item=data_for_logging)}."
        )
        self._save_updated_trial_to_db_if_possible(
            experiment=self.experiment,
            trial=trial,
            suppress_all_errors=self._suppress_storage_errors,
        )