Example #1
0
    def gen(
        self,
        experiment: Experiment,
        new_data: Optional[Data] = None,  # Take in just the new data.
        n: int = 1,
        **kwargs: Any,
    ) -> GeneratorRun:
        """Produce the next points in the experiment."""
        self._set_experiment(experiment=experiment)

        # Get arm signatures for each entry in new_data that is indeed new.
        new_arms = self._get_new_arm_signatures(experiment=experiment,
                                                new_data=new_data)
        enough_observed = (len(self._observed) +
                           len(new_arms)) >= self._curr.min_arms_observed
        unlimited_arms = self._curr.num_arms == -1
        enough_generated = (not unlimited_arms
                            and len(self._generated) >= self._curr.num_arms)
        remaining_arms = self._curr.num_arms - len(self._generated)

        # Check that minimum observed_arms is satisfied if it's enforced.
        if self._curr.enforce_num_arms and enough_generated and not enough_observed:
            raise ValueError(
                "All trials for current model have been generated, but not enough "
                "data has been observed to fit next model. Try again when more data "
                "are available.")
            # TODO[Lena, T44021164]: take into account failed trials. Potentially
            # reduce `_generated` count when a trial mentioned in new data failed.
        if (self._curr.enforce_num_arms and not unlimited_arms
                and 0 < remaining_arms < n):
            raise ValueError(
                f"Cannot generate {n} new arms as there are only {remaining_arms} "
                "remaining arms to generate using the current model.")

        all_data = (Data.from_multiple_data(
            data=[self._data, new_data]) if new_data else self._data)

        if self._model is None:
            # Instantiate the first model.
            self._set_current_model(experiment=experiment, data=all_data)
        elif enough_generated and enough_observed:
            # Change to the next model.
            self._change_model(experiment=experiment, data=all_data)
        elif new_data is not None:
            # We're sticking with the curr. model, but should update with new data.
            # pyre-fixme[16]: `Optional` has no attribute `update`.
            self._model.update(experiment=experiment, data=new_data)

        kwargs = consolidate_kwargs(
            kwargs_iterable=[self._curr.model_gen_kwargs, kwargs],
            keywords=get_function_argument_names(not_none(self._model).gen),
        )
        gen_run = not_none(self._model).gen(n=n, **kwargs)

        # If nothing failed, update known data, _generated, and _observed.
        self._data = all_data
        self._generated.extend([arm.signature for arm in gen_run.arms])
        self._observed.extend(new_arms)
        self._generator_runs.append(gen_run)
        return gen_run
Example #2
0
    def _fetch_trial_data(self,
                          trial_index: int,
                          metrics: Optional[List[Metric]] = None,
                          **kwargs: Any) -> Data:
        if not self.metrics and not metrics:
            raise ValueError(
                "No metrics to fetch data for, as no metrics are defined for "
                "this experiment, and none were passed in to `fetch_trial_data`."
            )
        trial = self.trials[trial_index]

        if (trial.status == TrialStatus.CANDIDATE
                or trial.status == TrialStatus.DISPATCHED):
            return self.lookup_data_for_trial(trial_index=trial_index)
        elif not trial.status.expecting_data:
            return Data()

        try:
            return Data.from_multiple_data([
                metric_cls.fetch_trial_data_multi(trial, metric_list, **kwargs)
                for metric_cls, metric_list in self._metrics_by_class(
                    metrics=metrics).items()
            ])
        except NotImplementedError:
            # If some of the metrics do not implement data fetching, we should
            # fall back to data that has been attached.
            return self.lookup_data_for_trial(trial_index=trial_index)
Example #3
0
 def _lookup_or_fetch_trials_data(
     self,
     trials: List[BaseTrial],
     metrics: Optional[Iterable[Metric]] = None,
     **kwargs: Any,
 ) -> Data:
     if not self.metrics and not metrics:
         raise ValueError(
             "No metrics to fetch data for, as no metrics are defined for "
             "this experiment, and none were passed in to `fetch_data`."
         )
     if not any(t.status.expecting_data for t in trials):
         return Data()
     metrics_to_fetch = list(metrics or self.metrics.values())
     metrics_by_class = self._metrics_by_class(metrics=metrics_to_fetch)
     data_list = []
     for metric_cls in metrics_by_class:
         data_list.append(
             metric_cls.lookup_or_fetch_experiment_data_multi(
                 experiment=self,
                 metrics=metrics_by_class[metric_cls],
                 trials=trials,
                 **kwargs,
             )
         )
     return Data.from_multiple_data(data=data_list)
Example #4
0
 def testFetchTrialsData(self):
     exp = self._setupBraninExperiment(n=5)
     batch_0 = exp.trials[0]
     batch_1 = exp.trials[1]
     batch_0_data = exp.fetch_trials_data(trial_indices=[0])
     self.assertEqual(set(batch_0_data.df["trial_index"].values), {0})
     self.assertEqual(
         set(batch_0_data.df["arm_name"].values), {a.name for a in batch_0.arms}
     )
     batch_1_data = exp.fetch_trials_data(trial_indices=[1])
     self.assertEqual(set(batch_1_data.df["trial_index"].values), {1})
     self.assertEqual(
         set(batch_1_data.df["arm_name"].values), {a.name for a in batch_1.arms}
     )
     self.assertEqual(
         exp.fetch_trials_data(trial_indices=[0, 1]),
         Data.from_multiple_data([batch_0_data, batch_1_data]),
     )
     with self.assertRaisesRegex(ValueError, ".* not associated .*"):
         exp.fetch_trials_data(trial_indices=[2])
     # Try to fetch data when there are only metrics and no attached data.
     exp.remove_tracking_metric(metric_name="b")  # Remove implemented metric.
     exp.add_tracking_metric(Metric(name="dummy"))  # Add unimplemented metric.
     self.assertTrue(exp.fetch_trials_data(trial_indices=[0]).df.empty)
     # Try fetching attached data.
     exp.attach_data(batch_0_data)
     exp.attach_data(batch_1_data)
     self.assertEqual(exp.fetch_trials_data(trial_indices=[0]), batch_0_data)
     self.assertEqual(exp.fetch_trials_data(trial_indices=[1]), batch_1_data)
     self.assertEqual(set(batch_0_data.df["trial_index"].values), {0})
     self.assertEqual(
         set(batch_0_data.df["arm_name"].values), {a.name for a in batch_0.arms}
     )
Example #5
0
    def testFromMultipleDataReturnSubclass(self):
        CustomData = custom_data_class(column_data_types={"metadata": str},
                                       required_columns={"metadata"})
        data = [
            CustomData(df=pd.DataFrame([
                {
                    "arm_name": "0_1",
                    "mean": 3.7,
                    "sem": 0.5,
                    "metric_name": "b",
                    "metadata": "42",
                },
                {
                    "arm_name": "0_2",
                    "mean": 3.7,
                    "sem": 1.5,
                    "metric_name": "x",
                    "metadata": "43",
                },
            ])),
            CustomData(df=pd.DataFrame([{
                "arm_name": "0_3",
                "mean": 2.4,
                "sem": 0.1,
                "metric_name": "a",
                "metadata": "42",
            }])),
        ]

        returned_data_object = Data.from_multiple_data(data)

        self.assertIsInstance(returned_data_object, CustomData)
Example #6
0
 def fetch_data(self,
                metrics: Optional[List[Metric]] = None,
                **kwargs: Any) -> Data:
     return Data.from_multiple_data([
         trial.fetch_data(**kwargs, metrics=metrics)
         if trial.status.expecting_data else Data()
         for trial in self.trials.values()
     ])
Example #7
0
    def gen(
        self,
        experiment: Experiment,
        new_data: Optional[Data] = None,  # Take in just the new data.
        n: int = 1,
        **kwargs: Any,
    ) -> GeneratorRun:
        """Produce the next points in the experiment."""
        # Get arm signatures for each entry in new_data that is indeed new.
        new_arms = self._get_new_arm_signatures(experiment=experiment,
                                                new_data=new_data)

        enough_observed = (len(self._observed) +
                           len(new_arms)) >= self._curr.min_arms_observed
        unlimited_arms = self._curr.num_arms == -1
        enough_generated = (not unlimited_arms
                            and len(self._generated) >= self._curr.num_arms)
        remaining_arms = self._curr.num_arms - len(self._generated)

        # Check that minimum observed_arms is satisfied if it's enforced.
        if self._curr.enforce_num_arms and enough_generated and not enough_observed:
            raise ValueError(
                "All trials for current model have been generated, but not enough "
                "data has been observed to fit next model. Try again when more data "
                "are available.")

        if (self._curr.enforce_num_arms and not unlimited_arms
                and 0 < remaining_arms < n):
            raise ValueError(
                f"Cannot generate {n} new arms as there are only {remaining_arms} "
                "remaining arms to generate using the current model.")

        all_data = (Data.from_multiple_data(
            data=[self._data, new_data]) if new_data else self._data)

        if self._model is None:
            # Instantiate the first model.
            self._set_current_model(experiment=experiment,
                                    data=all_data,
                                    **kwargs)
        elif enough_generated and enough_observed:
            # Change to the next model.
            self._change_model(experiment=experiment, data=all_data, **kwargs)
        elif new_data is not None:
            # We're sticking with the current model, but update with new data
            self._model.update(experiment=experiment, data=new_data)

        gen_run = not_none(self._model).gen(n=n,
                                            **(self._curr.model_gen_kwargs
                                               or {}))

        # If nothing failed, update known data, _generated, and _observed.
        self._data = all_data
        self._observed.extend(new_arms)
        self._generated.extend(a.signature for a in gen_run.arms)
        return gen_run
Example #8
0
    def _get_new_data(self) -> Data:
        """
        Returns new data since the last run of the generator.

        Returns:
            Latest data.
        """
        return Data.from_multiple_data(
            [self.experiment.lookup_data_for_trial(idx) for idx in self._updated_trials]
        )
Example #9
0
    def testFromMultipleDataMismatchedTypes(self):
        # create two custom data types
        CustomDataA = custom_data_class(
            column_data_types={"metadata": str, "created_time": pd.Timestamp},
            required_columns={"metadata"},
        )

        CustomDataB = custom_data_class(column_data_types={"year": pd.Timestamp})

        # Test data of multiple empty custom types raises a value error
        with self.assertRaises(ValueError):
            Data.from_multiple_data([CustomDataA(), CustomDataB()])

        # Test data of multiple non-empty types raises a value error
        with self.assertRaises(ValueError):
            data_elt_A = CustomDataA(
                df=pd.DataFrame(
                    [
                        {
                            "arm_name": "0_1",
                            "mean": 3.7,
                            "sem": 0.5,
                            "metric_name": "b",
                            "metadata": "42",
                            "created_time": "2018-09-20",
                        }
                    ]
                )
            )
            data_elt_B = CustomDataB(
                df=pd.DataFrame(
                    [
                        {
                            "arm_name": "0_1",
                            "mean": 3.7,
                            "sem": 0.5,
                            "metric_name": "b",
                            "year": "2018-09-20",
                        }
                    ]
                )
            )
            Data.from_multiple_data([data_elt_A, data_elt_B])
Example #10
0
    def fetch_trial_data_multi(cls, trial: core.base_trial.BaseTrial,
                               metrics: Iterable[Metric],
                               **kwargs: Any) -> Data:
        """Fetch multiple metrics data for one trial.

        Default behavior calls `fetch_trial_data` for each metric.
        Subclasses should override this to trial data computation for multiple metrics.
        """
        return Data.from_multiple_data(
            [metric.fetch_trial_data(trial, **kwargs) for metric in metrics])
Example #11
0
    def eval(self) -> Data:
        """
        Evaluate all arms in the experiment with the evaluation
        function passed as argument to this SimpleExperiment.
        """

        return Data.from_multiple_data([
            self.eval_trial(trial) for trial in self.trials.values()
            if trial.status != TrialStatus.FAILED
        ])
Example #12
0
    def _fetch_trials_data(
        self,
        trials: List[BaseTrial],
        metrics: Optional[Iterable[Metric]] = None,
        **kwargs: Any,
    ) -> Data:
        if not self.metrics and not metrics:
            raise ValueError(
                "No metrics to fetch data for, as no metrics are defined for "
                "this experiment, and none were passed in to `fetch_data`.")
        metrics = list(metrics or self.metrics.values())
        if all(type(m) is Metric for m in metrics):
            # All metrics are 'dummy' base `Metric` class metrics, which do not
            # implement actual data-fetching logic, so should look up attached
            # data instead of trying to fetch it via logic in metrics.
            return Data.from_multiple_data([
                self.lookup_data_for_trial(trial_index=t.index)[0]
                for t in trials
            ])
        elif all(
                isinstance(m, Metric) and type(m) is not Metric
                for m in metrics):
            # All metrics are subclasses of `Metric`, which should implement fetching.
            data_list = [
                metric_cls.fetch_experiment_data_multi(experiment=self,
                                                       metrics=metric_list,
                                                       trials=trials,
                                                       **kwargs)
                for metric_cls, metric_list in self._metrics_by_class(
                    metrics=metrics).items()
            ]
            # For trials in candidate phase, append any attached data
            for trial in trials:
                if trial.status == TrialStatus.CANDIDATE:
                    trial_data, _ = self.lookup_data_for_trial(
                        trial_index=trial.index)
                    if not trial_data.df.empty:
                        data_list.append(trial_data)

            return Data.from_multiple_data(data_list)

        raise ValueError(UNEXPECTED_METRIC_COMBINATION)
Example #13
0
    def fetch_data(self,
                   metrics: Optional[List[Metric]] = None,
                   **kwargs: Any) -> Data:
        """Fetches data for all metrics and trials on this experiment.

        Args:
            metrics: If provided, fetch data for these metrics instead of the ones
                defined on the experiment.
            kwargs: keyword args to pass to underlying metrics' fetch data functions.

        Returns:
            Data for the experiment.
        """
        if not self.metrics and not metrics:
            raise ValueError(
                "No metrics to fetch data for, as no metrics are defined for "
                "this experiment, and none were passed in to `fetch_data`.")
        try:
            data_list = [
                metric_cls.fetch_experiment_data_multi(self, metric_list,
                                                       **kwargs)
                for metric_cls, metric_list in self._metrics_by_class(
                    metrics=metrics).items()
            ]

            # For trials in candidate phase, append any attached data
            for trial in self.trials.values():
                if trial.status == TrialStatus.CANDIDATE:
                    trial_data = self.lookup_data_for_trial(
                        trial_index=trial.index)
                    if not trial_data.df.empty:
                        data_list.append(trial_data)

            return Data.from_multiple_data(data_list)
        except NotImplementedError:
            # If some of the metrics do not implement data fetching, we should
            # fall back to data that has been attached.
            return Data.from_multiple_data([
                self.lookup_data_for_trial(trial_index=idx)
                for idx in self.trials
            ])
Example #14
0
    def fetch_experiment_data(self, experiment: core.experiment.Experiment,
                              **kwargs: Any) -> Data:
        """Fetch this metric's data for an experiment.

        Default behavior is to fetch data from all trials expecting data
        and concatenate the results.
        """
        return Data.from_multiple_data([
            self.fetch_trial_data(trial, **kwargs)
            if trial.status.expecting_data else Data()
            for trial in experiment.trials.values()
        ])
Example #15
0
 def _fetch_trial_data_no_lookup(self, trial_index: int,
                                 metrics: Optional[List[Metric]],
                                 **kwargs: Any) -> Data:
     """Fetches data explicitly from metric logic, does not look up attached
     data on experiment.
     """
     return Data.from_multiple_data([
         metric_cls.fetch_trial_data_multi(self.trials[trial_index],
                                           metric_list, **kwargs)
         for metric_cls, metric_list in self._metrics_by_class(
             metrics=metrics).items()
     ])
Example #16
0
    def attach_data(self, data: Data, combine_with_last_data: bool = False) -> int:
        """Attach data to experiment. Stores data in `experiment._data_by_trial`,
        to be looked up via `experiment.lookup_data_by_trial`.

        Args:
            data: Data object to store.
            combine_with_last_data: By default, when attaching data, it's identified
                by its timestamp, and `experiment.lookup_data_by_trial` returns
                data by most recent timestamp. In some cases, however, the goal
                is to combine all data attached for a trial into a single `Data`
                object. To achieve that goal, every call to `attach_data` after
                the initial data is attached to trials, should be set to `True`.
                Then, the newly attached data will be appended to existing data,
                rather than stored as a separate object, and `lookup_data_by_trial`
                will return the combined data object, rather than just the most
                recently added data. This will validate that the newly added data
                does not contain observations for the metrics that already have
                observations in the most recent data stored.

        Returns:
            Timestamp of storage in millis.
        """
        if data.df.empty:
            raise ValueError("Data to attach is empty.")
        cur_time_millis = current_timestamp_in_millis()
        for trial_index, trial_df in data.df.groupby(data.df["trial_index"]):
            current_trial_data = (
                self._data_by_trial[trial_index]
                if trial_index in self._data_by_trial
                else OrderedDict()
            )
            if combine_with_last_data and len(current_trial_data) > 0:
                last_ts, last_data = list(current_trial_data.items())[-1]
                merged = pd.merge(
                    last_data.df,
                    trial_df,
                    on=["trial_index", "metric_name", "arm_name"],
                    how="inner",
                )
                if not merged.empty:
                    raise ValueError(
                        f"Last data for trial {trial_index} already contained an "
                        f"observation for metric {merged.head()['metric_name']}."
                    )
                current_trial_data[cur_time_millis] = Data.from_multiple_data(
                    [last_data, Data(trial_df)]
                )
            else:
                current_trial_data[cur_time_millis] = Data(trial_df)
            self._data_by_trial[trial_index] = current_trial_data

        return cur_time_millis
Example #17
0
    def eval(self) -> Data:
        """
        Evaluate all arms in the experiment with the evaluation
        function passed as argument to this SimpleExperiment.
        """

        # TODO(jej)[T87591836] Support non-`Data` data types.
        return Data.from_multiple_data(
            [  # pyre-fixme [6]: Incompatible paramtype: Expected `Data`
                #   but got `AbstractDataFrameData`.
                self.eval_trial(trial) for trial in self.trials.values()
                if trial.status != TrialStatus.FAILED
            ])
Example #18
0
    def fetch_data(self,
                   metrics: Optional[List[Metric]] = None,
                   **kwargs: Any) -> Data:
        if metrics is not None:
            raise ValueError(  # pragma: no cover
                "`metrics` argument is not supported for"
                "`MultiTypeExperiment.fetch_data`.")

        return Data.from_multiple_data([
            trial.fetch_data(
                **kwargs) if trial.status.expecting_data else Data()
            for trial in self.trials.values()
        ])
Example #19
0
    def fetch_experiment_data_multi(
        cls,
        experiment: "core.experiment.Experiment",
        metrics: Iterable["Metric"],
        **kwargs: Any,
    ) -> Data:
        """Fetch multiple metrics data for an experiment.

        Default behavior calls `fetch_experiment_data` for each metric.
        Subclasses should override this to batch data computation for multiple metrics.
        """
        return Data.from_multiple_data([
            metric.fetch_experiment_data(experiment, **kwargs)
            for metric in metrics
        ])
Example #20
0
    def _fetch_trial_data(self,
                          trial_index: int,
                          metrics: Optional[List[Metric]] = None,
                          **kwargs: Any) -> Data:
        if metrics is not None:
            raise ValueError(  # pragma: no cover
                "`metrics` argument is not supported for"
                "`MultiTypeExperiment._fetch_trial_data`.")

        trial = self.trials[trial_index]
        return Data.from_multiple_data([
            metric.fetch_trial_data(trial, **kwargs) if trial.trial_type
            == self.metric_to_trial_type[metric.name] else Data()
            for metric in self.metrics.values()
        ])
Example #21
0
File: metric.py Project: xiecong/Ax
    def fetch_experiment_data_multi(
        cls,
        experiment: "core.experiment.Experiment",
        metrics: Iterable["Metric"],
        **kwargs: Any,
    ) -> Data:
        """Fetch multiple metrics data for an experiment.

        Default behavior calls `fetch_trial_data_multi` for each trial.
        Subclasses should override to batch data computation across trials + metrics.
        """
        return Data.from_multiple_data([
            cls.fetch_trial_data_multi(trial, metrics, **kwargs)
            if trial.status.expecting_data else Data()
            for trial in experiment.trials.values()
        ])
Example #22
0
    def fetch_experiment_data_multi(
        cls,
        experiment: core.experiment.Experiment,
        metrics: Iterable[Metric],
        trials: Optional[Iterable[core.base_trial.BaseTrial]] = None,
        **kwargs: Any,
    ) -> Data:
        """Fetch multiple metrics data for an experiment.

        Default behavior calls `fetch_trial_data_multi` for each trial.
        Subclasses should override to batch data computation across trials + metrics.
        """
        return Data.from_multiple_data([
            cls.fetch_trial_data_multi(trial, metrics, **kwargs)
            if trial.status.expecting_data else Data() for trial in (
                experiment.trials.values() if trials is None else trials)
        ])
Example #23
0
    def _suggest_new_trial(self) -> Trial:
        """
        Suggest new candidate for this experiment.

        Args:
            n: Number of candidates to generate.

        Returns:
            Trial with candidate.
        """
        new_data = Data.from_multiple_data([
            self.experiment.lookup_data_for_trial(idx)
            for idx in self._updated_trials
        ])
        generator_run = not_none(self.generation_strategy).gen(
            experiment=self.experiment, new_data=new_data)
        return self.experiment.new_trial(generator_run=generator_run)
Example #24
0
    def testFetchTrialsData(self):
        exp = self._setupBraninExperiment(n=5)
        batch_0 = exp.trials[0]
        batch_1 = exp.trials[1]
        batch_0.mark_completed()
        batch_1.mark_completed()
        batch_0_data = exp.fetch_trials_data(trial_indices=[0])
        self.assertEqual(set(batch_0_data.df["trial_index"].values), {0})
        self.assertEqual(set(batch_0_data.df["arm_name"].values),
                         {a.name
                          for a in batch_0.arms})
        batch_1_data = exp.fetch_trials_data(trial_indices=[1])
        self.assertEqual(set(batch_1_data.df["trial_index"].values), {1})
        self.assertEqual(set(batch_1_data.df["arm_name"].values),
                         {a.name
                          for a in batch_1.arms})
        self.assertEqual(
            exp.fetch_trials_data(trial_indices=[0, 1]),
            Data.from_multiple_data([batch_0_data, batch_1_data]),
        )

        # Since NoisyFunction metric has overwrite_existing_data = False,
        # we should have two dfs per trial now
        self.assertEqual(len(exp.data_by_trial[0]), 2)

        with self.assertRaisesRegex(ValueError, ".* not associated .*"):
            exp.fetch_trials_data(trial_indices=[2])
        # Try to fetch data when there are only metrics and no attached data.
        exp.remove_tracking_metric(
            metric_name="b")  # Remove implemented metric.
        exp.add_tracking_metric(Metric(name="b"))  # Add unimplemented metric.
        self.assertEqual(len(exp.fetch_trials_data(trial_indices=[0]).df), 5)
        # Try fetching attached data.
        exp.attach_data(batch_0_data)
        exp.attach_data(batch_1_data)
        self.assertEqual(exp.fetch_trials_data(trial_indices=[0]),
                         batch_0_data)
        self.assertEqual(exp.fetch_trials_data(trial_indices=[1]),
                         batch_1_data)
        self.assertEqual(set(batch_0_data.df["trial_index"].values), {0})
        self.assertEqual(set(batch_0_data.df["arm_name"].values),
                         {a.name
                          for a in batch_0.arms})
Example #25
0
    def lookup_data_for_ts(self, timestamp: int) -> Data:
        """Collect data for all trials stored at this timestamp.

        Useful when many trials' data was fetched and stored simultaneously
        and user wants to retrieve same collection of data later.

        Can also be used to lookup specific data for a single trial
        when storage time is known.

        Args:
            timestamp: Timestamp in millis at which data was stored.

        Returns:
            Data object with all data stored at the timestamp.
        """
        trial_datas = []
        for _trial_index, ts_to_data in self._data_by_trial.items():
            if timestamp in ts_to_data:
                trial_datas.append(ts_to_data[timestamp])

        return Data.from_multiple_data(trial_datas)
Example #26
0
 def run_benchmark_run(
         self, setup: BenchmarkSetup,
         generation_strategy: GenerationStrategy) -> BenchmarkSetup:
     remaining_iterations = setup.total_iterations
     updated_trials = []
     while remaining_iterations > 0:
         num_suggestions = min(remaining_iterations, setup.batch_size)
         generator_run = generation_strategy.gen(
             experiment=setup,
             new_data=Data.from_multiple_data(
                 [setup._fetch_trial_data(idx) for idx in updated_trials]),
             n=setup.batch_size,
         )
         updated_trials = []
         if setup.batch_size > 1:  # pragma: no cover
             trial = setup.new_batch_trial().add_generator_run(
                 generator_run).run()
         else:
             trial = setup.new_trial(generator_run=generator_run).run()
         updated_trials.append(trial.index)
         remaining_iterations -= num_suggestions
     return setup
Example #27
0
 def testEmptyData(self):
     df = Data().df
     self.assertTrue(df.empty)
     self.assertTrue(set(df.columns == REQUIRED_COLUMNS))
     self.assertTrue(Data.from_multiple_data([]).df.empty)
Example #28
0
    def lookup_or_fetch_experiment_data_multi(
        cls,
        experiment: core.experiment.Experiment,
        metrics: Iterable[Metric],
        trials: Optional[Iterable[core.base_trial.BaseTrial]] = None,
        **kwargs: Any,
    ) -> Data:
        """Fetch or lookup (with fallback to fetching) data for given metrics,
        depending on whether they are available while running.

        If metric is available while running, its data can change (and therefore
        we should always re-fetch it). If metric is available only upon trial
        completion, its data does not change, so we can look up that data on
        the experiment and only fetch the data that is not already attached to
        the experiment.

        NOTE: If fetching data for a metrics class that is only available upon
        trial completion, data fetched in this function (data that was not yet
        available on experiment) will be attached to experiment.
        """
        # If this metric is available while trial is running, just default to
        # `fetch_experiment_data_multi`.
        if cls.is_available_while_running():
            return cls.fetch_experiment_data_multi(experiment=experiment,
                                                   metrics=metrics,
                                                   trials=trials,
                                                   **kwargs)

        # If this metric is available only upon trial completion, look up data
        # on experiment and only fetch data that is not already cached.
        if trials is None:
            completed_trials = experiment.trials_by_status[
                core.base_trial.TrialStatus.COMPLETED]
        else:
            completed_trials = [t for t in trials if t.status.is_completed]

        if not completed_trials:
            return Data()

        trials_data = []
        for trial in completed_trials:
            cached_trial_data = experiment.lookup_data_for_trial(
                trial_index=trial.index)[0]

            cached_metric_names = cached_trial_data.metric_names
            metrics_to_fetch = [
                m for m in metrics if m.name not in cached_metric_names
            ]
            if not metrics_to_fetch:
                # If all needed data fetched from cache, no need to fetch any other data
                # for trial.
                trials_data.append(cached_trial_data)
                continue

            try:
                fetched_trial_data = cls.fetch_experiment_data_multi(
                    experiment=experiment,
                    metrics=metrics_to_fetch,
                    trials=[trial],
                    **kwargs,
                )

            except NotImplementedError:
                # Metric does not implement fetching logic and only uses lookup.
                fetched_trial_data = Data()

            final_data = Data.from_multiple_data(
                [cached_trial_data, fetched_trial_data])
            if not final_data.df.empty:
                experiment.attach_data(final_data)
            trials_data.append(final_data)

        return Data.from_multiple_data(
            trials_data, subset_metrics=[m.name for m in metrics])
Example #29
0
 def update_data(self, new_data):
     """Upadates data in an experiment and after a trial"""
     self.data = (Data.from_multiple_data(
         data=[self.data, new_data]) if new_data else self.data)
     return new_data