def test_suggest_unique(): """Verify that RandomSearch do not sample duplicates""" space = Space() space.register(Integer('yolo1', 'uniform', -3, 6)) random_search = Random(space) n_samples = 6 values = sum(random_search.suggest(n_samples), tuple()) assert len(values) == n_samples assert len(set(values)) == n_samples
def test_seeding(space): """Verify that seeding makes sampling deterministic""" random_search = Random(space) random_search.seed_rng(1) a = random_search.suggest(1)[0] assert not numpy.allclose(a, random_search.suggest(1)[0]) random_search.seed_rng(1) assert numpy.allclose(a, random_search.suggest(1)[0])
def test_set_state(space): """Verify that resetting state makes sampling deterministic""" random_search = Random(space) random_search.seed_rng(1) state = random_search.state_dict a = random_search.suggest(1)[0] assert not numpy.allclose(a, random_search.suggest(1)[0]) random_search.set_state(state) assert numpy.allclose(a, random_search.suggest(1)[0])
def test_suggest_unique_history(): """Verify that RandomSearch do not sample duplicates based observed points""" space = Space() space.register(Integer('yolo1', 'uniform', -3, 6)) random_search = Random(space) n_samples = 3 values = sum(random_search.suggest(n_samples), tuple()) assert len(values) == n_samples assert len(set(values)) == n_samples random_search.observe([[value] for value in values], [1] * n_samples) n_samples = 3 new_values = sum(random_search.suggest(n_samples), tuple()) assert len(new_values) == n_samples assert len(set(new_values)) == n_samples # No duplicates assert (set(new_values) & set(values)) == set()
class PBT(BaseAlgorithm): """Population Based Training algorithm Population based training is an evolutionary algorithm that evolve trials from low fidelity levels to high fidelity levels (ex: number of epochs). For a population of size `m`, it first samples `m` trials at lowest fidelity level. When trials are completed, it decides based on the ``exploit`` configuration whether the trial should be promoted to next fidelity level or whether another trial should be selected instead and forked. When a trial is forked, new hyperparameters are selected based on the trials hyperparameters and the ``explore`` configuration. The original trial's working_dir is then copied over to the new trial's working_dir so that the user script can resume execution from model parameters of original trial. It is important that the weights of models trained for each trial are saved in the corresponding directory at path ``trial.working_dir``. The file name does not matter. The entire directory is copied to a new ``trial.working_dir`` when PBT selects a good model and explore new hyperparameters. The new trial can be resumed by the user by loading the weigths found in the freshly copied ``new_trial.working_dir``, and saved back at the same path at end of trial execution. To access ``trial.working_dir`` from Oríon's commandline API, see documentation at https://orion.readthedocs.io/en/stable/user/script.html#command-line-templating. To access ``trial.working_dir`` from Oríon's Python API, set argument ``trial_arg="trial"`` when executing method :py:meth:`orion.client.experiment.ExperimentClient.workon`. The number of fidelity levels is determined by the argument ``generations``. The lowest and highest fidelity levels, and the distrubition, is determined by the search space's dimension that will have a prior ``fidelity(low, high, base)``, where ``base`` is the logarithm base of the dimension. Original PBT algorithm uses a base of 1. PBT will try to return as many trials as possible when calling ``suggest(num)``, up to ``num``. When ``population_size`` trials are sampled and more trials are requested, it will try to generate new trials by promoting or forking existing trials in a queue. This queue will get filled when calling ``observe(trials)`` on completed or broken trials. If trials are broken at lowest fidelity level, they are ignored and will not count in population size so that PBT can sample additional trials to reach ``population_size`` completed trials at lowest fidelity. If a trial is broken at higher fidelity, the original trial leading to the broken trial is examinated again for ``exploit`` and ``explore``. If the broken trial was the result of a fork, then we backtrack to the trial that was dropped during ``exploit`` in favor of the forked trial. If the broken trial was a promotion, then we backtrack to the original trial that was promoted. For more information on the algorithm, see original paper at https://arxiv.org/abs/1711.09846. Jaderberg, Max, et al. "Population based training of neural networks." arXiv preprint, arXiv:1711.09846 (2017). Notes ----- It is important that the experiment using this algorithm has a working directory properly set. The experiment's working dir serve as the base for the trial's working directories. The trial's working directory is ``trial.working_dir``. This is where the weights of the model should be saved. Using ``trial.hash_params`` to determine a unique working dir for the trial will result in working on a different directory than the one copied by PBT, hence missing the copied model parameters. Parameters ---------- space: `orion.algo.space.Space` Optimisation space with priors for each dimension. seed: None, int or sequence of int Seed for the random number generator used to sample new trials. Default: ``None`` population_size: int, optional Size of the population. No trial will be continued until there are `population_size` trials executed until lowest fidelity. If a trial is broken during execution at lowest fidelity, the algorithm will sample a new trial, keeping the population of *non-broken* trials at `population_size`. For efficiency it is better to have less workers running than population_size. Default: 50. generations: int, optional Number of generations, from lowest fidelity to highest one. This will determine how many branchings occur during the execution of PBT. Default: 10 exploit: dict or None, optional Configuration for a ``pbt.exploit.BaseExploit`` object that determines when if a trial should be exploited or not. If None, default configuration is a ``PipelineExploit`` with ``BacktrackExploit`` and ``TruncateExploit``. explore: dict or None, optional Configuration for a ``pbt.explore.BaseExplore`` object that returns new parameter values for exploited trials. If None, default configuration is a ``PipelineExplore`` with ``ResampleExplore`` and ``PerturbExplore``. fork_timeout: int, optional Maximum amount of time in seconds that an attempt to mutate a trial should take, otherwise algorithm.suggest() will raise ``SuggestionTimeout``. Default: 60 """ requires_type = None requires_dist = "linear" requires_shape = "flattened" def __init__( self, space, seed=None, population_size=50, generations=10, exploit=None, explore=None, fork_timeout=60, ): if exploit is None: exploit = { "of_type": "PipelineExploit", "exploit_configs": [ { "of_type": "BacktrackExploit", "min_forking_population": 5, "truncation_quantile": 0.9, "candidate_pool_ratio": 0.2, }, { "of_type": "TruncateExploit", "min_forking_population": 5, "truncation_quantile": 0.8, "candidate_pool_ratio": 0.2, }, ], } if explore is None: explore = { "of_type": "PipelineExplore", "explore_configs": [ {"of_type": "ResampleExplore", "probability": 0.2}, {"of_type": "PerturbExplore", "factor": 1.2, "volatility": 0.0001}, ], } self.random_search = Random(space) self._queue = [] fidelity_index = self.fidelity_index if fidelity_index is None: raise RuntimeError(SPACE_ERROR) self.fidelity_dim = space[fidelity_index] self.fidelities = compute_fidelities( generations, self.fidelity_dim.low, self.fidelity_dim.high, self.fidelity_dim.base, ) self.fidelity_upgrades = { a: b for a, b in zip(self.fidelities, self.fidelities[1:]) } logger.info("Executing PBT with fidelities: %s", self.fidelities) self.exploit_func = exploit_factory.create(**exploit) self.explore_func = explore_factory.create(**explore) self.lineages = Lineages() self._lineage_dropped_head = {} super(PBT, self).__init__( space, seed=seed, population_size=population_size, generations=generations, exploit=exploit, explore=explore, fork_timeout=fork_timeout, ) @property def space(self): """Return transformed space of PBT""" return self.random_search.space @space.setter def space(self, space): """Set the space of PBT and initialize it""" self.random_search.space = space @property def rng(self): """Random Number Generator""" return self.random_search.rng def seed_rng(self, seed): """Seed the state of the random number generator. Parameters ---------- seed: int Integer seed for the random number generator. """ self.random_search.seed_rng(seed) @property def state_dict(self): """Return a state dict that can be used to reset the state of the algorithm.""" state_dict = super(PBT, self).state_dict state_dict["random_search"] = self.random_search.state_dict state_dict["lineages"] = copy.deepcopy(self.lineages) state_dict["queue"] = copy.deepcopy(self._queue) return state_dict def set_state(self, state_dict): """Reset the state of the algorithm based on the given state_dict""" super(PBT, self).set_state(state_dict) self.random_search.set_state(state_dict["random_search"]) self.lineages = state_dict["lineages"] self._queue = state_dict["queue"] @property def _num_root(self): """Number of trials with lowest fidelity level that are not broken.""" return sum( int(lineage.root.item.status != "broken") for lineage in self.lineages ) @property def is_done(self): """Is done if ``population_size`` trials at highest fidelity level are completed.""" n_completed = 0 final_depth = self._get_depth_of(self.fidelity_dim.high) for trial in self.lineages.get_trials_at_depth(final_depth): n_completed += int(trial.status == "completed") return n_completed >= self.population_size def register(self, trial): """Save the trial as one suggested or observed by the algorithm The trial is additionally saved in the lineages object of PBT. Parameters ---------- trial: ``orion.core.worker.trial.Trial`` Trial from a `orion.algo.space.Space`. """ super(PBT, self).register(trial) self.lineages.register(trial) def suggest(self, num): """Suggest a ``num`` ber of new sets of parameters. PBT will try to sample up to ``population_size`` trials at lowest fidelity level. If more trials are required, it will try to promote or fork trials based on the queue of available trials observed. Parameters ---------- num: int Number of points to suggest. The algorithm may return less than the number of points requested. Returns ------- list of trials A list of trials representing values suggested by the algorithm. """ # Sample points until num is met, or population_size num_random_samples = min(max(self.population_size - self._num_root, 0), num) logger.debug( "PBT has %s pending or completed trials at root, %s broken trials.", self._num_root, len(self.lineages) - self._num_root, ) logger.debug("Sampling %s new trials", num_random_samples) trials = self._sample(num_random_samples) logger.debug("Sampled %s new trials", len(trials)) logger.debug( "After sampling, PBT has %s pending or completed trials at root, %s broken trials.", self._num_root, len(self.lineages) - self._num_root, ) # Then try branching based on observed_queue until num is met or queue is exhausted. num_fork_samples = max(num - len(trials), 0) logger.debug( "Attempting Forking %s trials, with %s trials queued available for forking", num_fork_samples, len(self._queue), ) forked_trials = self._fork_lineages(num_fork_samples) logger.debug("Forked %s new trials", len(forked_trials)) logger.debug( "After forking, PBT has %s pending or completed trials at root, %s broken trials.", self._num_root, len(self.lineages) - self._num_root, ) trials += forked_trials return trials def _sample(self, num): """Sample trials based on random search""" sampled_trials = self.random_search.suggest(num) trials = [] for trial in sampled_trials: branched_trial = trial.branch( params={self.fidelity_dim.name: self.fidelity_dim.low} ) self.register(branched_trial) trials.append(branched_trial) return trials def _get_depth_of(self, fidelity): """Get the depth of a fidelity in the lineages""" return self.fidelities.index(fidelity) def _fork_lineages(self, num): """Try to promote or fork up to ``num`` trials from the queue.""" branched_trials = [] skipped_trials = [] while len(branched_trials) < num and self._queue: trial = self._queue.pop(0) trial_to_branch, new_trial = self._generate_offspring(trial) if trial_to_branch is None: logger.debug("Skipping trial %s", trial) skipped_trials.append(trial) continue self.lineages.fork(trial_to_branch, new_trial) if trial is not trial_to_branch: logger.debug("Dropped trial %s in favor of %s", trial, trial_to_branch) self.lineages.set_jump(trial, new_trial) logger.debug("Forking trial %s to %s", trial_to_branch, new_trial) branched_trials.append(new_trial) self.register(new_trial) self._queue = skipped_trials + self._queue return branched_trials def _generate_offspring(self, trial): """Try to promote or fork a given trial.""" new_trial = trial if not self.has_suggested(new_trial): raise RuntimeError( "Trying to fork a trial that was not registered yet. This should never happen" ) attempts = 0 start = time.perf_counter() while ( self.has_suggested(new_trial) and time.perf_counter() - start <= self.fork_timeout ): trial_to_explore = self.exploit_func( self.rng, trial, self.lineages, ) if trial_to_explore is None: return None, None elif trial_to_explore is trial: new_params = {} trial_to_branch = trial logger.debug("Promoting trial %s, parameters stay the same.", trial) else: new_params = flatten( self.explore_func(self.rng, self.space, trial_to_explore.params) ) trial_to_branch = trial_to_explore logger.debug( "Forking trial %s with new parameters %s", trial_to_branch, new_params, ) # Set next level of fidelity new_params[self.fidelity_index] = self.fidelity_upgrades[ trial_to_branch.params[self.fidelity_index] ] new_trial = trial_to_branch.branch(params=new_params) new_trial = self.space.transform(self.space.reverse(new_trial)) logger.debug("Attempt %s - Creating new trial %s", attempts, new_trial) attempts += 1 if ( self.has_suggested(new_trial) and time.perf_counter() - start > self.fork_timeout ): raise RuntimeError( f"Could not generate unique new parameters for trial {trial.id} in " f"less than {self.fork_timeout} seconds. Attempted {attempts} times." ) return trial_to_branch, new_trial def _triage(self, trials): """Triage observed trials and return those that may be queued.""" trials_to_verify = [] # First try to resume from trials if necessary, then only push to queue leafs for trial in trials: if not self.has_suggested(trial): logger.debug("Ignoring unknown trial %s", trial) continue if not self.has_observed(trial) and trial.status in ["completed", "broken"]: logger.debug("Will verify trial %s for queue", trial) trials_to_verify.append(trial) self.register(trial) return trials_to_verify def _queue_trials_for_promotions(self, trials): """Queue trials if they are completed or ancestor trials if they are broken.""" for trial in trials: if trial.status == "broken": # Branch again from trial that lead to this broken one. lineage_to_retry = self.lineages.get_lineage(trial).get_true_ancestor() if lineage_to_retry: logger.debug( "Trial %s is broken, queuing ancestor %s to re-attempt forking.", trial, lineage_to_retry.item, ) self._queue.append(lineage_to_retry.item) else: logger.debug( ( "Trial %s from initial generation is broken, " "new trials can be sampled at next suggest() call." ), trial, ) elif trial.status == "completed": logger.debug( "Trial %s is completed, queuing it to attempt forking.", trial ) self._queue.append(trial) def observe(self, trials): """Observe the trials and queue those available for promotion or forking. Parameters ---------- trials: list of ``orion.core.worker.trial.Trial`` Trials from a `orion.algo.space.Space`. """ trials_to_verify = self._triage(trials) self._queue_trials_for_promotions(trials_to_verify)
def __init__( self, space, seed=None, population_size=50, generations=10, exploit=None, explore=None, fork_timeout=60, ): if exploit is None: exploit = { "of_type": "PipelineExploit", "exploit_configs": [ { "of_type": "BacktrackExploit", "min_forking_population": 5, "truncation_quantile": 0.9, "candidate_pool_ratio": 0.2, }, { "of_type": "TruncateExploit", "min_forking_population": 5, "truncation_quantile": 0.8, "candidate_pool_ratio": 0.2, }, ], } if explore is None: explore = { "of_type": "PipelineExplore", "explore_configs": [ {"of_type": "ResampleExplore", "probability": 0.2}, {"of_type": "PerturbExplore", "factor": 1.2, "volatility": 0.0001}, ], } self.random_search = Random(space) self._queue = [] fidelity_index = self.fidelity_index if fidelity_index is None: raise RuntimeError(SPACE_ERROR) self.fidelity_dim = space[fidelity_index] self.fidelities = compute_fidelities( generations, self.fidelity_dim.low, self.fidelity_dim.high, self.fidelity_dim.base, ) self.fidelity_upgrades = { a: b for a, b in zip(self.fidelities, self.fidelities[1:]) } logger.info("Executing PBT with fidelities: %s", self.fidelities) self.exploit_func = exploit_factory.create(**exploit) self.explore_func = explore_factory.create(**explore) self.lineages = Lineages() self._lineage_dropped_head = {} super(PBT, self).__init__( space, seed=seed, population_size=population_size, generations=generations, exploit=exploit, explore=explore, fork_timeout=fork_timeout, )
def __init__( self, space: Space, seed: int | Sequence[int] | None = None, population_size: int = 50, generations: int = 10, exploit: dict | None = None, explore: dict | None = None, fork_timeout: int = 60, ): if exploit is None: exploit = { "of_type": "PipelineExploit", "exploit_configs": [ { "of_type": "BacktrackExploit", "min_forking_population": 5, "truncation_quantile": 0.9, "candidate_pool_ratio": 0.2, }, { "of_type": "TruncateExploit", "min_forking_population": 5, "truncation_quantile": 0.8, "candidate_pool_ratio": 0.2, }, ], } if explore is None: explore = { "of_type": "PipelineExplore", "explore_configs": [ { "of_type": "ResampleExplore", "probability": 0.2 }, { "of_type": "PerturbExplore", "factor": 1.2, "volatility": 0.0001 }, ], } self.random_search = Random(space) self._queue: list[Trial] = [] fidelity_index = self.fidelity_index if fidelity_index is None: raise RuntimeError(SPACE_ERROR) fidelity_dim = space[fidelity_index] while isinstance(fidelity_dim, TransformedDimension): fidelity_dim = fidelity_dim.original_dimension assert isinstance(fidelity_dim, Fidelity) self.fidelity_dim = fidelity_dim self.fidelities = compute_fidelities( generations, self.fidelity_dim.low, self.fidelity_dim.high, self.fidelity_dim.base, ) self.fidelity_upgrades = dict(zip(self.fidelities, self.fidelities[1:])) logger.info("Executing PBT with fidelities: %s", self.fidelities) self.exploit_func = exploit_factory.create(**exploit) self.explore_func = explore_factory.create(**explore) self.lineages = Lineages() super().__init__(space) self.seed = seed self.population_size = population_size self.generations = generations self.exploit = exploit self.explore = explore self.fork_timeout = fork_timeout if seed is not None: self.seed_rng(seed=seed)