def _evaluate_early_stopping_with_df( early_stopping_strategy: PercentileEarlyStoppingStrategy, experiment: Experiment, df: pd.DataFrame, ): """Helper function for testing PercentileEarlyStoppingStrategy on an arbitrary (MapData) df.""" metric_to_aligned_means, _ = align_partial_results( df=df, progr_key="timestamp", metrics=["branin_map"], ) aligned_means = metric_to_aligned_means["branin_map"] decisions = { trial_index: early_stopping_strategy.should_stop_trial_early( trial_index=trial_index, experiment=experiment, df=aligned_means, minimize=experiment.optimization_config.objective.minimize, ) for trial_index in set(experiment.trials.keys()) } return { trial_index: reason for trial_index, (should_stop, reason) in decisions.items() if should_stop }
def should_stop_trials_early( self, trial_indices: Set[int], experiment: Experiment, **kwargs: Dict[str, Any], ) -> Dict[int, Optional[str]]: """Stop a trial if its performance is in the bottom `percentile_threshold` of the trials at the same step. Args: trial_indices: Indices of candidate trials to consider for early stopping. experiment: Experiment that contains the trials and other contextual data. Returns: A dictionary mapping trial indices that should be early stopped to (optional) messages with the associated reason. An empty dictionary means no suggested updates to any trial's status. """ data = self._check_validity_and_get_data(experiment=experiment) if data is None: # don't stop any trials if we don't get data back return {} optimization_config = not_none(experiment.optimization_config) objective_name = optimization_config.objective.metric.name map_key = next(iter(data.map_keys)) minimize = optimization_config.objective.minimize df = data.map_df try: metric_to_aligned_means, _ = align_partial_results( df=df, progr_key=map_key, metrics=[objective_name], ) except Exception as e: logger.warning(f"Encountered exception while aligning data: {e}. " "Not early stopping any trials.") return {} aligned_means = metric_to_aligned_means[objective_name] decisions = { trial_index: self.should_stop_trial_early( trial_index=trial_index, experiment=experiment, df=aligned_means, minimize=minimize, ) for trial_index in trial_indices } return { trial_index: reason for trial_index, (should_stop, reason) in decisions.items() if should_stop }
def should_stop_trials_early( self, trial_indices: Set[int], experiment: Experiment, **kwargs: Dict[str, Any], ) -> Dict[int, Optional[str]]: """Stop a trial if its performance is in the bottom `percentile_threshold` of the trials at the same step. Args: trial_indices: Indices of candidate trials to consider for early stopping. experiment: Experiment that contains the trials and other contextual data. Returns: A dictionary mapping trial indices that should be early stopped to (optional) messages with the associated reason. An empty dictionary means no suggested updates to any trial's status. """ if experiment.optimization_config is None: raise UnsupportedError( # pragma: no cover "Experiment must have an optimization config in order to use an " "early stopping strategy." ) optimization_config = not_none(experiment.optimization_config) objective_name = optimization_config.objective.metric.name minimize = optimization_config.objective.minimize data = experiment.lookup_data(keep_latest_map_values_only=False) if data.df.empty: logger.info( "PercentileEarlyStoppingStrategy received empty data. " "Not stopping any trials." ) return {} if not isinstance(data, MapData): raise ValueError( "PercentileEarlyStoppingStrategy expects MapData, but the " f"data attached to experiment is of type {type(data)}." ) map_keys = data.map_keys if len(map_keys) > 1: raise ValueError( # pragma: no cover "PercentileEarlyStoppingStrategy expects MapData with a single " "map key, but the data attached to the experiment has multiple: " f"{data.map_keys}." ) map_key = map_keys[0] df = data.df metric_to_aligned_means, _ = align_partial_results( df=df, progr_key=map_key, metrics=[objective_name], ) aligned_means = metric_to_aligned_means[objective_name] decisions = { trial_index: self.should_stop_trial_early( trial_index=trial_index, experiment=experiment, df=aligned_means, percentile_threshold=self.percentile_threshold, map_key=map_key, minimize=minimize, ) for trial_index in trial_indices } return { trial_index: reason for trial_index, (should_stop, reason) in decisions.items() if should_stop }