def test_get_pending_observation_features(self): # Pending observations should be none if there aren't any. self.assertIsNone(get_pending_observation_features(self.experiment)) self.trial.mark_running(no_runner_required=True) # Now that the trial is deployed, it should become a pending trial on the # experiment and appear as pending for all metrics. self.assertEqual( get_pending_observation_features(self.experiment), {"tracking": [self.obs_feat], "m2": [self.obs_feat], "m1": [self.obs_feat]}, ) # With `fetch_data` on trial returning data for metric "m2", that metric # should no longer have pending observation features. with patch.object( self.trial, "fetch_data", return_value=Data.from_evaluations( {self.trial.arm.name: {"m2": (1, 0)}}, trial_index=self.trial.index ), ): self.assertEqual( get_pending_observation_features(self.experiment), {"tracking": [self.obs_feat], "m2": [], "m1": [self.obs_feat]}, ) # When a trial is marked failed, it should no longer appear in pending... self.trial.mark_failed() self.assertIsNone(get_pending_observation_features(self.experiment)) # ... unless specified to include failed trials in pending observations. self.assertEqual( get_pending_observation_features( self.experiment, include_failed_as_pending=True ), {"tracking": [self.obs_feat], "m2": [self.obs_feat], "m1": [self.obs_feat]}, ) # When a trial is abandoned, it should appear in pending features whether # or not there is data for it. self.trial._status = TrialStatus.ABANDONED # Cannot re-mark a failed trial. self.assertEqual( get_pending_observation_features( self.experiment, include_failed_as_pending=True ), {"tracking": [self.obs_feat], "m2": [self.obs_feat], "m1": [self.obs_feat]}, ) # Checking with data for all metrics. with patch.object( self.trial, "fetch_data", return_value=Data.from_evaluations( {self.trial.arm.name: {"m1": (1, 0), "m2": (1, 0), "tracking": (1, 0)}}, trial_index=self.trial.index, ), ): self.assertEqual( get_pending_observation_features(self.experiment), { "tracking": [self.obs_feat], "m2": [self.obs_feat], "m1": [self.obs_feat], }, )
def test_pending_observations_as_array(self): # Mark a trial dispatched so that there are pending observations. self.trial.mark_running(no_runner_required=True) # If outcome names are respected, unlisted metrics should be filtered out. self.assertEqual( [ x.tolist() for x in pending_observations_as_array( pending_observations=get_pending_observation_features( self.experiment), outcome_names=["m2", "m1"], param_names=["x", "y", "z", "w"], ) ], [[["1", "foo", "True", "4"]], [["1", "foo", "True", "4"]]], ) self.experiment.attach_data( Data.from_evaluations({self.trial.arm.name: { "m2": (1, 0) }}, trial_index=self.trial.index)) # There should be no pending observations for metric m2 now, since the # only trial there is, has been updated with data for it. self.assertEqual( [ x.tolist() for x in pending_observations_as_array( pending_observations=get_pending_observation_features( self.experiment), outcome_names=["m2", "m1"], param_names=["x", "y", "z", "w"], ) ], [[], [["1", "foo", "True", "4"]]], )
def test_get_pending_observation_features(self): # Pending observations should be none if there aren't any. self.assertIsNone(get_pending_observation_features(self.experiment)) self.trial.mark_dispatched() # Now that the trial is deployed, it should become a pending trial on the # experiment and appear as pending for all metrics. self.assertEqual( get_pending_observation_features(self.experiment), {"tracking": [self.obs_feat], "m2": [self.obs_feat], "m1": [self.obs_feat]}, ) self.experiment.attach_data( Data.from_evaluations( {self.trial.arm.name: {"m2": (1, 0)}}, trial_index=self.trial.index ) ) # Not m2 should have empty pending features, since the trial was updated # for m2. self.assertEqual( get_pending_observation_features(self.experiment), {"tracking": [self.obs_feat], "m2": [], "m1": [self.obs_feat]}, ) # When a trial is marked failed, it should no longer appear in pending... self.trial.mark_failed() self.assertIsNone(get_pending_observation_features(self.experiment)) # ... unless specified to include failed trials in pending observations. self.assertEqual( get_pending_observation_features( self.experiment, include_failed_as_pending=True ), {"tracking": [self.obs_feat], "m2": [self.obs_feat], "m1": [self.obs_feat]}, )
def complete_trial( self, trial_index: int, raw_data: TEvaluationOutcome, metadata: Optional[Dict[str, str]] = None, sample_size: Optional[int] = None, ) -> None: """ Completes the trial with given metric values and adds optional metadata to it. Args: trial_index: Index of trial within the experiment. raw_data: Evaluation data for the trial. Can be a mapping from metric name to a tuple of mean and SEM, just a tuple of mean and SEM if only one metric in optimization, or just the mean if there is no SEM. Can also be a list of (fidelities, mapping from metric name to a tuple of mean and SEM). metadata: Additional metadata to track about this run. """ assert isinstance( trial_index, int ), f"Trial index must be an int, got: {trial_index}." # pragma: no cover trial = self.experiment.trials[trial_index] if not isinstance(trial, Trial): raise NotImplementedError( "Batch trial functionality is not yet available through Service API." ) if metadata is not None: trial._run_metadata = metadata arm_name = not_none(trial.arm).name objective_name = self.experiment.optimization_config.objective.metric.name evaluations = { arm_name: raw_data_to_evaluation(raw_data=raw_data, objective_name=objective_name) } sample_sizes = {arm_name: sample_size} if sample_size else {} # evaluations[arm_name] is either a trial evaluation # {metric_name -> (mean, SEM)} or a fidelity trial evaluation # [(fidelities, {metric_name -> (mean, SEM)})] if isinstance(evaluations[arm_name], dict): data = Data.from_evaluations( evaluations=cast(Dict[str, TTrialEvaluation], evaluations), trial_index=trial.index, sample_sizes=sample_sizes, ) else: data = Data.from_fidelity_evaluations( evaluations=cast(Dict[str, TFidelityTrialEvaluation], evaluations), trial_index=trial.index, sample_sizes=sample_sizes, ) trial.mark_completed() self.experiment.attach_data(data) self._updated_trials.append(trial_index) self._save_experiment_and_generation_strategy_if_possible()
def testFromEvaluations(self): data = Data.from_evaluations( evaluations={"0_1": {"b": (3.7, 0.5)}}, trial_index=0, sample_sizes={"0_1": 2}, ) self.assertEqual(len(data.df), 1)
def data_and_evaluations_from_raw_data( raw_data: Dict[str, TEvaluationOutcome], metric_names: List[str], trial_index: int, sample_sizes: Dict[str, int], start_time: Optional[int] = None, end_time: Optional[int] = None, ) -> Tuple[Dict[str, TEvaluationOutcome], AbstractDataFrameData]: """Transforms evaluations into Ax Data. Each evaluation is either a trial evaluation: {metric_name -> (mean, SEM)} or a fidelity trial evaluation for multi-fidelity optimizations: [(fidelities, {metric_name -> (mean, SEM)})]. Args: raw_data: Mapping from arm name to raw_data. metric_names: Names of metrics used to transform raw data to evaluations. trial_index: Index of the trial, for which the evaluations are. sample_sizes: Number of samples collected for each arm, may be empty if unavailable. start_time: Optional start time of run of the trial that produced this data, in milliseconds. end_time: Optional end time of run of the trial that produced this data, in milliseconds. """ evaluations = { arm_name: raw_data_to_evaluation( raw_data=raw_data[arm_name], metric_names=metric_names, start_time=start_time, end_time=end_time, ) for arm_name in raw_data } if all(isinstance(evaluations[x], dict) for x in evaluations.keys()): # All evaluations are no-fidelity evaluations. data = Data.from_evaluations( evaluations=cast(Dict[str, TTrialEvaluation], evaluations), trial_index=trial_index, sample_sizes=sample_sizes, start_time=start_time, end_time=end_time, ) elif all(isinstance(evaluations[x], list) for x in evaluations.keys()): # All evaluations are map evaluations. data = MapData.from_map_evaluations( evaluations=cast(Dict[str, TMapTrialEvaluation], evaluations), trial_index=trial_index, ) else: raise ValueError( # pragma: no cover "Evaluations included a mixture of no-fidelity and with-fidelity " "evaluations, which is not currently supported." ) return evaluations, data
def testFromEvaluations(self): data = Data.from_evaluations( evaluations={"0_1": {"b": (3.7, 0.5)}}, trial_index=0, sample_sizes={"0_1": 2}, start_time=current_timestamp_in_millis(), end_time=current_timestamp_in_millis(), ) self.assertEqual(len(data.df), 1) self.assertNotEqual(data, Data(self.df)) self.assertIn("start_time", data.df) self.assertIn("end_time", data.df)
def data_from_evaluations( evaluations: Dict[str, TEvaluationOutcome], trial_index: int, sample_sizes: Dict[str, int], start_time: Optional[int] = None, end_time: Optional[int] = None, ) -> Data: """Transforms evaluations into Ax Data. Each evaluation is either a trial evaluation: {metric_name -> (mean, SEM)} or a fidelity trial evaluation for multi-fidelity optimizations: [(fidelities, {metric_name -> (mean, SEM)})]. Args: evalutions: Mapping from arm name to evaluation. trial_index: Index of the trial, for which the evaluations are. sample_sizes: Number of samples collected for each arm, may be empty if unavailable. start_time: Optional start time of run of the trial that produced this data, in milliseconds. end_time: Optional end time of run of the trial that produced this data, in milliseconds. """ if all(isinstance(evaluations[x], dict) for x in evaluations.keys()): # All evaluations are no-fidelity evaluations. data = Data.from_evaluations( evaluations=cast(Dict[str, TTrialEvaluation], evaluations), trial_index=trial_index, sample_sizes=sample_sizes, start_time=start_time, end_time=end_time, ) elif all(isinstance(evaluations[x], list) for x in evaluations.keys()): # All evaluations are with-fidelity evaluations. data = Data.from_fidelity_evaluations( evaluations=cast(Dict[str, TFidelityTrialEvaluation], evaluations), trial_index=trial_index, sample_sizes=sample_sizes, start_time=start_time, end_time=end_time, ) else: raise ValueError( # pragma: no cover "Evaluations included a mixture of no-fidelity and with-fidelity " "evaluations, which is not currently supported." ) return data
def eval_trial(self, trial: BaseTrial) -> AbstractDataFrameData: """ Evaluate trial arms with the evaluation function of this experiment. Args: trial: trial, whose arms to evaluate. """ cached_data = self.lookup_data_for_trial(trial.index)[0] if not cached_data.df.empty: return cached_data evaluations = {} if not self.has_evaluation_function: raise ValueError( # pragma: no cover f"Cannot evaluate trial {trial.index} as no attached data was " "found and no evaluation function is set on this `SimpleExperiment.`" "`SimpleExperiment` is geared to synchronous and sequential cases " "where each trial is evaluated before more trials are created. " "For all other cases, use `Experiment`." ) if isinstance(trial, Trial): if not trial.arm: return Data() # pragma: no cover trial.mark_running() evaluations[not_none(trial.arm).name] = self.evaluation_function_outer( not_none(trial.arm).parameters, None ) elif isinstance(trial, BatchTrial): if not trial.arms: return Data() # pragma: no cover trial.mark_running() for arm, weight in trial.normalized_arm_weights().items(): arm_parameters: TParameterization = arm.parameters evaluations[arm.name] = self.evaluation_function_outer( arm_parameters, weight ) trial.mark_completed() data = Data.from_evaluations(evaluations, trial.index) self.attach_data(data) return data
def complete_trial( self, trial_index: int, # acceptable `raw_data` argument formats: # 1) {metric_name -> (mean, standard error)} # 2) (mean, standard error) and we assume metric name == objective name # 3) only the mean, and we assume metric name == objective name and # standard error == 0 raw_data: TEvaluationOutcome, metadata: Optional[Dict[str, str]] = None, ) -> None: """ Completes the trial with given metric values and adds optional metadata to it. Args: trial_index: Index of trial within the experiment. raw_data: Evaluation data for the trial. Can be a mapping from metric name to a tuple of mean and SEM, just a tuple of mean and SEM if only one metric in optimization, or just the mean if there is no SEM. metadata: Additional metadata to track about this run. """ assert isinstance( trial_index, int ), f"Trial index must be an int, got: {trial_index}." # pragma: no cover trial = self.experiment.trials[trial_index] if not isinstance(trial, Trial): raise NotImplementedError( "Batch trial functionality is not yet available through Service API." ) trial._status = TrialStatus.COMPLETED if metadata is not None: trial._run_metadata = metadata if isinstance(raw_data, dict): evaluations = {not_none(trial.arm).name: raw_data} elif isinstance(raw_data, tuple): evaluations = { not_none(trial.arm).name: { self.experiment.optimization_config.objective.metric.name: raw_data } } elif isinstance(raw_data, float) or isinstance(raw_data, int): evaluations = { not_none(trial.arm).name: { self.experiment.optimization_config.objective.metric.name: ( raw_data, 0.0, ) } } else: raise ValueError( "Raw data has an invalid type. The data must either be in the form " "of a dictionary of metric names to mean, sem tuples, " "or a single mean, sem tuple, or a single mean.") data = Data.from_evaluations(evaluations, trial.index) self.experiment.attach_data(data) self._updated_trials.append(trial_index) self._save_experiment_if_possible()
def test_get_pending_observation_features_hss(self): # Pending observations should be none if there aren't any. self.assertIsNone(get_pending_observation_features(self.hss_exp)) self.hss_trial.mark_running(no_runner_required=True) # Now that the trial is deployed, it should become a pending trial on the # experiment and appear as pending for all metrics. pending = get_pending_observation_features(self.hss_exp) self.assertEqual( pending, { "m1": [self.hss_obs_feat], "m2": [self.hss_obs_feat], }, ) # Check that transforming observation features works correctly since this # is applying `Cast` transform, it should inject full parameterization into # resulting obs.feats.). Therefore, transforming the extracted pending features # and observation features made from full parameterization should be the same. self.assertEqual( self.hss_sobol._transform_data( obs_feats=pending["m1"], obs_data=[], search_space=self.hss_exp.search_space, transforms=self.hss_sobol._raw_transforms, transform_configs=None, ), self.hss_sobol._transform_data( obs_feats=[self.hss_obs_feat_all_params.clone()], obs_data=[], search_space=self.hss_exp.search_space, transforms=self.hss_sobol._raw_transforms, transform_configs=None, ), ) # With `fetch_data` on trial returning data for metric "m2", that metric # should no longer have pending observation features. with patch.object( self.hss_trial, "lookup_data", return_value=Data.from_evaluations( {self.hss_trial.arm.name: { "m2": (1, 0) }}, trial_index=self.hss_trial.index, ), ): self.assertEqual( get_pending_observation_features(self.hss_exp), { "m2": [], "m1": [self.hss_obs_feat] }, ) # When a trial is marked failed, it should no longer appear in pending... self.hss_trial.mark_failed() self.assertIsNone(get_pending_observation_features(self.hss_exp)) # ... unless specified to include failed trials in pending observations. self.assertEqual( get_pending_observation_features(self.hss_exp, include_failed_as_pending=True), { "m1": [self.hss_obs_feat], "m2": [self.hss_obs_feat], }, ) # When an arm is abandoned, it should appear in pending features whether # or not there is data for it. hss_exp = get_hierarchical_search_space_experiment() hss_batch_trial = hss_exp.new_batch_trial(generator_run=self.hss_gr) hss_batch_trial.mark_arm_abandoned(hss_batch_trial.arms[0].name) # Checking with data for all metrics. with patch.object( hss_batch_trial, "fetch_data", return_value=Data.from_evaluations( { hss_batch_trial.arms[0].name: { "m1": (1, 0), "m2": (1, 0), } }, trial_index=hss_batch_trial.index, ), ): pending = get_pending_observation_features( hss_exp, include_failed_as_pending=True) self.assertEqual( pending, { "m1": [self.hss_obs_feat], "m2": [self.hss_obs_feat], }, ) # Check that candidate metadata is property propagated for abandoned arm. self.assertEqual( self.hss_sobol._transform_data( obs_feats=pending["m1"], obs_data=[], search_space=hss_exp.search_space, transforms=self.hss_sobol._raw_transforms, transform_configs=None, ), self.hss_sobol._transform_data( obs_feats=[self.hss_obs_feat_all_params.clone()], obs_data=[], search_space=hss_exp.search_space, transforms=self.hss_sobol._raw_transforms, transform_configs=None, ), ) # Checking with data for all metrics. with patch.object( hss_batch_trial, "fetch_data", return_value=Data.from_evaluations( { hss_batch_trial.arms[0].name: { "m1": (1, 0), "m2": (1, 0), } }, trial_index=hss_batch_trial.index, ), ): self.assertEqual( get_pending_observation_features(hss_exp), { "m2": [self.hss_obs_feat], "m1": [self.hss_obs_feat], }, )