def _set_or_update_model(self, data: Optional[Data]) -> None: if self._curr.num_trials == -1: # Unlimited trials, just use curr. model. self._set_or_update_current_model(data=data) return # Not unlimited trials => determine whether to transition to next model. enough_generated = self.num_can_complete_this_step >= self._curr.num_trials enough_observed = self.num_completed_this_step >= self._curr.min_trials_observed # Check that minimum observed_trials is satisfied if it's enforced. if self._curr.enforce_num_trials and enough_generated and not enough_observed: raise DataRequiredError( "All trials for current model have been generated, but not enough " "data has been observed to fit next model. Try again when more data " "are available.") if enough_generated and enough_observed: # Change to the next model. if len(self._steps) == self._curr.index + 1: raise GenerationStrategyCompleted( f"Generation strategy {self} generated all the trials as " "specified in its steps.") self._curr = self._steps[self._curr.index + 1] # This is the first time this step's model is initialized, so we don't # try to `update` it but rather initialize with all the data even if # `use_update` is true for the now-current generation step. self._set_current_model(data=data) else: # Continue generating from the current model. self._set_or_update_current_model(data=data)
def _set_or_update_model(self, data: Optional[Data]) -> None: model_state = {} lgr = self.last_generator_run if lgr is not None and lgr._model_state_after_gen is not None: model_state = not_none(lgr._model_state_after_gen) if self._curr.num_trials == -1: # Unlimited trials, just use curr. model. self._set_or_update_current_model(data=data, model_state=model_state) return # Not unlimited trials => determine whether to transition to next model. step_trials = self.trial_indices_by_step[self._curr.index] by_status = self.experiment.trial_indices_by_status num_completed = len(step_trials.intersection(by_status[TrialStatus.COMPLETED])) # Number of trials that will not be `COMPLETED`, used to avoid counting # unsuccessfully terminated trials against the number of generated trials # during determination of whether enough trials have been generated and # completed to proceed to the next generation step. num_will_not_complete = len( step_trials.intersection( by_status[TrialStatus.FAILED].union(by_status[TrialStatus.ABANDONED]) ) ) enough_observed = num_completed >= self._curr.min_trials_observed enough_generated = ( len(step_trials) - num_will_not_complete >= self._curr.num_trials ) # Check that minimum observed_trials is satisfied if it's enforced. if self._curr.enforce_num_trials and enough_generated and not enough_observed: raise DataRequiredError( "All trials for current model have been generated, but not enough " "data has been observed to fit next model. Try again when more data " "are available." ) if enough_generated and enough_observed: # Change to the next model. if len(self._steps) == self._curr.index + 1: raise GenerationStrategyCompleted( f"Generation strategy {self} generated all the trials as " "specified in its steps." ) self._curr = self._steps[self._curr.index + 1] # This is the first time this step's model is initialized, so we don't # try to `update` it but rather initialize with all the data even if # `use_update` is true for the now-current generation step. self._set_current_model(data=data, model_state=model_state) else: # Continue generating from the current model. self._set_or_update_current_model(data=data, model_state=model_state)
def _set_model(self, experiment: Experiment, data: Data) -> None: model_state = {} lgr = self.last_generator_run if lgr is not None and lgr._model_state_after_gen is not None: model_state = not_none(lgr._model_state_after_gen) if self._curr.num_trials == -1: # Unlimited trials, just use curr. model. self._set_current_model(experiment=experiment, data=data, **model_state) return # Not unlimited trials => determine whether to transition to next model. step_trials = self.trial_indices_by_step[self._curr.index] all_trials = experiment.trials completed = sum(1 for i in step_trials if all_trials[i].completed_successfully) did_not_complete = sum(1 for i in step_trials if all_trials[i].did_not_complete) enough_observed = completed >= self._curr.min_trials_observed enough_generated = len(step_trials) - did_not_complete >= self._curr.num_trials # Check that minimum observed_trials is satisfied if it's enforced. if self._curr.enforce_num_trials and enough_generated and not enough_observed: raise DataRequiredError( "All trials for current model have been generated, but not enough " "data has been observed to fit next model. Try again when more data " "are available." ) if enough_generated and enough_observed: # Change to the next model. if len(self._steps) == self._curr.index + 1: raise ValueError(f"Generation strategy {self.name} is completed.") self._curr = self._steps[self._curr.index + 1] self._set_current_model(experiment=experiment, data=data) else: # Continue generating from the current model. self._set_current_model(experiment=experiment, data=data, **model_state)
def _maybe_move_to_next_step(self, raise_data_required_error: bool = True ) -> bool: """Moves this generation strategy to next step if conditions for moving are met. This method is safe to use both when generating candidates or simply checking how many generator runs (to be made into trials) can currently be produced. Conditions for moving to next step: 1. ``num_trials`` in current generation step have been generated (generation strategy produced that many generator runs, which were then attached to trials), 2. ``min_trials_observed`` in current generation step have been completed, 3. current step is not the last in this generation strategy. NOTE: this method raises ``GenerationStrategyCompleted`` error if conditions 1 and 2 above are met, but the current step is the last in generation strategy. It also raises ``DataRequiredError`` if all conditions below are true: 1. ``raise_data_required_error`` argument is ``True``, 2. ``num_trials`` in current generation step have been generated, 3. ``min_trials_observed`` in current generation step have not been completed, 4. ``enforce_num_trials`` in current generation step is ``True``. Args: raise_data_required_error: Whether to raise ``DataRequiredError`` in the case detailed above. Not raising the error is useful if just looking to check how many generator runs (to be made into trials) can be produced, but not actually producing them yet. Returns: Whether generation strategy moved to the next step. """ to_gen, to_complete = self._num_trials_to_gen_and_complete_in_curr_step( ) if to_gen == to_complete == -1: # Unlimited trials, never moving to next step. return False enforcing_num_trials = self._curr.enforce_num_trials trials_left_to_gen = to_gen > 0 trials_left_to_complete = to_complete > 0 # If there is something left to gen or complete, we don't move to next step. if trials_left_to_gen or trials_left_to_complete: # Check that minimum observed_trials is satisfied if it's enforced. raise_error = raise_data_required_error if raise_error and enforcing_num_trials and not trials_left_to_gen: raise DataRequiredError( "All trials for current model have been generated, but not enough " "data has been observed to fit next model. Try again when more data" " are available.") return False # If nothing left to gen or complete, move to next step if one is available. if len(self._steps) == self._curr.index + 1: raise GenerationStrategyCompleted( f"Generation strategy {self} generated all the trials as " "specified in its steps.") self._curr = self._steps[self._curr.index + 1] # Moving to the next step also entails unsetting this GS's model (since # new step's model will be initialized for the first time, so we don't # try to `update` it but rather initialize with all the data even if # `use_update` is true for the new generation step; this is done in # `self._set_or_update_current_model). self._model = None return True
def gen( self, experiment: Experiment, data: Optional[Data] = None, n: int = 1, **kwargs: Any, ) -> GeneratorRun: """Produce the next points in the experiment.""" self._set_experiment(experiment=experiment) new_arm_signatures = set() data = data or experiment.fetch_data() if data is not None and not data.df.empty: if self._data.df.empty: new_data = data.df else: # Select only the new data to determine how many new arms were # evaluated since the generation strategy was last updated with # data (find rows that are in `data.df`, but not in `self._data.df`) merged = data.df.merge( self._data.df, on=[ "arm_name", "trial_index", "metric_name", "mean", "sem" ], how="left", indicator=True, ) new_data = merged[merged["_merge"] == "left_only"] # Get arm signatures for each entry in data that the GS hasn't seen yet. new_arm_signatures = { not_none(experiment.arms_by_name.get( row["arm_name"])).signature for _, row in new_data.iterrows() if (row["arm_name"] in experiment.arms_by_name and not not_none(experiment.trials.get( row["trial_index"])).status.is_failed) } enough_observed = (len(self._observed) + len(new_arm_signatures) ) >= self._curr.min_arms_observed unlimited_arms = self._curr.num_arms == -1 enough_generated = (not unlimited_arms and len(self._generated) >= self._curr.num_arms) # Check that minimum observed_arms is satisfied if it's enforced. if self._curr.enforce_num_arms and enough_generated and not enough_observed: raise DataRequiredError( "All trials for current model have been generated, but not enough " "data has been observed to fit next model. Try again when more data " "are available.") # TODO[Lena, T44021164]: take into account failed trials. Potentially # reduce `_generated` count when a trial mentioned in new data failed. lgr = self.last_generator_run if enough_generated and enough_observed: # Change to the next model. self._change_model(experiment=experiment, data=data) elif lgr is not None and lgr._model_state_after_gen is not None: model_state = not_none(lgr._model_state_after_gen) self._set_current_model(experiment=experiment, data=data, **model_state) else: self._set_current_model(experiment=experiment, data=data) model = not_none(self._model) kwargs = consolidate_kwargs( kwargs_iterable=[self._curr.model_gen_kwargs, kwargs], keywords=get_function_argument_names(not_none(self._model).gen), ) gen_run = model.gen(n=n, **kwargs) # If nothing failed, update known data, _generated, and _observed. self._data = data self._generated.extend([arm.signature for arm in gen_run.arms]) self._observed.extend(new_arm_signatures) self._generator_runs.append(gen_run) return gen_run