def test_duplicate_only_once(storage, monkeypatch): """Test that trials may not be duplicated twice""" with disable_duplication(monkeypatch): build_evc_tree(list(range(5))) for exp in ["root", "parent", "experiment", "child", "grand-child"]: assert len(get_experiment(name=exp).fetch_trials(with_evc_tree=False)) == len( Trial.allowed_stati ) experiment = build_experiment(name="experiment") experiment._experiment.duplicate_pending_trials() for exp in ["root", "parent", "child", "grand-child"]: assert len(get_experiment(name=exp).fetch_trials(with_evc_tree=False)) == len( Trial.allowed_stati ) assert ( len(experiment.fetch_trials(with_evc_tree=False)) == len(Trial.allowed_stati) + N_PENDING * 4 ) experiment._experiment.duplicate_pending_trials() for exp in ["root", "parent", "child", "grand-child"]: assert len(get_experiment(name=exp).fetch_trials(with_evc_tree=False)) == len( Trial.allowed_stati ) assert ( len(experiment.fetch_trials(with_evc_tree=False)) == len(Trial.allowed_stati) + N_PENDING * 4 )
def compute_stats( monitoring_method="ptera", executor="joblib", max_trials=(498, 500), sleep_time=30, ): experiment = build_experiment( f"test-io-{executor}-{monitoring_method}", space=dict(x="uniform(0, 1, precision=100)"), max_trials=max_trials[1], ) with experiment.tmp_executor(executor, n_workers=1): experiment.workon( foo, max_trials=max_trials[1], max_trials_per_worker=max_trials[0], sleep_time=0.0001, ) with monitoring_methods[monitoring_method]() as data: experiment.workon( foo, max_trials=max_trials[1], max_trials_per_worker=max_trials[1] - max_trials[0], sleep_time=sleep_time, ) return data
def build_root_experiment(space=None, trials=None): """Build a root experiment and generate trials.""" if space is None: space = { "x": "uniform(0, 100)", "y": "uniform(0, 100)", "z": "uniform(0, 100)" } if trials is None: trials = [{"x": i, "y": i * 2, "z": i**2} for i in range(4)] root = build_experiment(name="root", max_trials=len(trials), space=space) generate_trials(root, trials)
def test_fix_lost_trials_in_evc(storage, monkeypatch): """Test that lost trials from parents can be fixed as well. `fix_lost_trials` is tested more carefully in experiment's unit-tests (without the EVC). """ with disable_duplication(monkeypatch), mocked_datetime(monkeypatch): build_evc_tree(list(range(5))) for exp_name in ["root", "parent", "experiment", "child", "grand-child"]: exp = get_experiment(name=exp_name) assert len(exp.fetch_trials(with_evc_tree=False)) == len(Trial.allowed_stati) assert len(exp.fetch_trials_by_status("reserved", with_evc_tree=False)) == 1 experiment = build_experiment(name="experiment") experiment._experiment.fix_lost_trials() for exp_name in ["root", "parent", "experiment", "child", "grand-child"]: exp = get_experiment(name=exp_name) assert len(exp.fetch_trials(with_evc_tree=False)) == len(Trial.allowed_stati) assert len(exp.fetch_trials_by_status("reserved", with_evc_tree=False)) == 0
def test_duplicate_race_conditions(storage, monkeypatch, caplog): """Test that duplication does not raise an error during race conditions.""" with disable_duplication(monkeypatch): build_evc_tree(list(range(2))) experiment = build_experiment(name="parent") def register_race_condition(trial): raise DuplicateKeyError("Race condition!") monkeypatch.setattr( experiment._experiment._storage, "register_trial", register_race_condition ) assert len(experiment.fetch_trials(with_evc_tree=False)) == len(Trial.allowed_stati) with caplog.at_level(logging.DEBUG): experiment._experiment.duplicate_pending_trials() assert "Race condition while trying to duplicate trial" in caplog.text
def run_hpo(): # Specify the database where the experiments are stored. We use a local PickleDB here. storage = { "type": "legacy", "database": { "type": "pickleddb", "host": "./db.pkl", }, } # Load the data for the specified experiment experiment = build_experiment( "hyperband-cifar10", space={ "epochs": "fidelity(1, 120, base=4)", "learning_rate": "loguniform(1e-5, 0.1)", "momentum": "uniform(0, 0.9)", "weight_decay": "loguniform(1e-10, 1e-2)", "gamma": "loguniform(0.97, 1)", }, algorithms={ "hyperband": { "seed": 1, "repetitions": 5, }, }, storage=storage, ) trials = 1 while not experiment.is_done: print("trial", trials) trial = experiment.suggest() if trial is None and experiment.is_done: break valid_error_rate = main( **trial.params, checkpoint=f"{experiment.working_dir}/{trial.hash_params}") experiment.observe(trial, valid_error_rate, name="valid_error_rate") trials += 1
def test_duplicate_closest_duplicated_pending_trials(storage, monkeypatch): """Test that only closest duplicated pending trials are duplicated""" with disable_duplication(monkeypatch): build_evc_tree([0, 0, 1, 2, 2]) for exp in ["root", "parent", "experiment", "child", "grand-child"]: assert len(get_experiment(name=exp).fetch_trials(with_evc_tree=False)) == len( Trial.allowed_stati ) experiment = build_experiment(name="experiment") experiment._experiment.duplicate_pending_trials() for exp in ["root", "parent", "child", "grand-child"]: assert len(get_experiment(name=exp).fetch_trials(with_evc_tree=False)) == len( Trial.allowed_stati ) assert ( len(experiment.fetch_trials(with_evc_tree=False)) == len(Trial.allowed_stati) + N_PENDING * 2 )
def build_child_experiment(space=None, trials=None, name="child", parent="root"): """Build a child experiment by branching from `parent` and generate trials.""" if trials is None: trials = [None for i in range(6)] max_trials = get_experiment(parent).max_trials + len(trials) child = build_experiment( name=name, space=space, max_trials=max_trials, branching={ "branch_from": parent, "enable": True }, ) assert child.name == name assert child.version == 1 generate_trials(child, trials)
# a narrowed prior for the learning rate, we will see that it becomes an unimportant # hyperparameter. # See documentation on :ref:`EVC system` for more information on branching, or # :py:func:`orion.client.build_experiment` for informations on ``branching`` arguments. # Original learning rate prior was ``loguniform(1e-5, 0.1)``. We will narrow it to # ``loguniform(1e-3, 0.1)``. from orion.client import build_experiment # Branch from "hyperband-cifar10" with a narrower search space. experiment = build_experiment( "narrow-hyperband-cifar10", branching={"branch_from": "hyperband-cifar10"}, space={ "epochs": "fidelity(1, 120, base=4)", "learning_rate": "loguniform(1e-3, 0.1)", "momentum": "uniform(0, 0.9)", "weight_decay": "loguniform(1e-10, 1e-2)", "gamma": "loguniform(0.97, 1)", }, storage=storage, ) experiment.plot.lpi() #%% # The prior of the learning rate is arguably large, spanning over 3 orders of magnitude # `(0.001, 0.1)`. Nevertheless, for this problem, most learning rates within this range # leads to optimal results whenever the other hyperparameters are optimal. What you must remember # is that defining to narrow search spaces may lead to misleading local parameter importance. # See :ref:`sphx_glr_auto_examples_plot_4_partial_dependencies.py` for a visualization to verify if # the search space you defined may be too narrow.
def hparam_sweep( self, setting: SettingABC, search_space: Dict[str, Union[str, Dict]] = None, experiment_id: str = None, database_path: Union[str, Path] = None, max_runs: int = None, debug: bool = False, ) -> Tuple[Dict, float]: """ Performs a Hyper-Parameter Optimization sweep using orion. Changes the values in `self.hparams` iteratively, returning the best hparams found so far. Parameters ---------- setting : Setting Setting to run the sweep on. search_space : Dict[str, Union[str, Dict]], optional Search space of the hyper-parameter optimization algorithm. Defaults to `None`, in which case the result of the `get_search_space` method is used. experiment_id : str, optional Unique Id to use when creating the experiment in Orion. Defaults to `None`, in which case a hash of the `setting`'s fields is used. database_path : Union[str, Path], optional Path to a pickle file to be used by Orion to store the hyper-parameters and their corresponding values. Default to `None`, in which case the database is created at path `./orion_db.pkl`. max_runs : int, optional Maximum number of runs to perform. Defaults to `None`, in which case the run lasts until the search space is exhausted. debug : bool, optional Wether to run Orion in debug-mode, where the database is an EphemeralDb, meaning it gets created for the sweep and destroyed at the end of the sweep. Returns ------- Tuple[BaselineModel.HParams, float] Best HParams, and the corresponding performance. """ try: from orion.client import build_experiment from orion.core.worker.trial import Trial except ImportError as e: raise RuntimeError( f"Need to install the optional dependencies for HPO, using " f"`pip install -e .[hpo]` (error: {e})") from e search_space = search_space or self.get_search_space(setting) logger.info("HPO Search space:\n" + json.dumps(search_space, indent="\t")) database_path: Path = Path(database_path or "./orion_db.pkl") logger.info(f"Will use database at path '{database_path}'.") experiment_name = self.get_experiment_name(setting, experiment_id=experiment_id) experiment = build_experiment( name=experiment_name, space=search_space, debug=debug, algorithms="BayesianOptimizer", max_trials=max_runs, storage={ "type": "legacy", "database": { "type": "pickleddb", "host": str(database_path) }, }, ) previous_trials: List[Trial] = experiment.fetch_trials_by_status( "completed") # Since Orion works in a 'lower is better' fashion, so if the `objective` of the # Results class for the given Setting have "higher is better", we negate the # objectives when extracting them and again before submitting them to Orion. lower_is_better = setting.Results.lower_is_better sign = 1 if lower_is_better else -1 if previous_trials: logger.info(f"Using existing Experiment {experiment} which has " f"{len(previous_trials)} existing trials.") else: logger.info(f"Created new experiment with name {experiment_name}") trials_performed = 0 failed_trials = 0 red = partial(colorize, color="red") green = partial(colorize, color="green") while not (experiment.is_done or failed_trials == 3): # Get a new suggestion of hparams to try: trial: Trial = experiment.suggest() # --------- # (Re)create the Model with the suggested Hparams values. # --------- new_hparams: Dict = trial.params # Inner function, just used to make the code below a bit simpler. # TODO: We should probably also change some values in the Config (e.g. # log_dir, checkpoint_dir, etc) between runs. logger.info("Suggested values for this run:\n" + json.dumps(new_hparams, indent="\t")) self.adapt_to_new_hparams(new_hparams) # --------- # Evaluate the (adapted) method on the setting: # --------- try: result: Results = setting.apply(self) except Exception: logger.error( red("Encountered an error, this trial will be dropped:")) logger.error(red("-" * 60)) with StringIO() as s: traceback.print_exc(file=s) s.seek(0) logger.error(red(s.read())) logger.error(red("-" * 60)) failed_trials += 1 logger.error(red(f"({failed_trials} failed trials so far). ")) experiment.release(trial) else: # Report the results to Orion: orion_result = dict( name=result.objective_name, type="objective", value=sign * result.objective, ) experiment.observe(trial, [orion_result]) trials_performed += 1 logger.info( green( f"Trial #{trials_performed}: {result.objective_name} = {result.objective}" )) # Receive the results, maybe log to wandb, whatever you wanna do. self.receive_results(setting, result) logger.info("Experiment statistics: \n" + "\n".join(f"\t{key}: {value}" for key, value in experiment.stats.items())) logger.info(f"Number of previous trials: {len(previous_trials)}") logger.info( f"Trials successfully completed by this worker: {trials_performed}" ) logger.info(f"Failed Trials attempted by this worker: {failed_trials}") if "best_trials_id" not in experiment.stats: raise RuntimeError( "Can't find the best trial, experiment might be broken!") best_trial: Trial = experiment.get_trial( uid=experiment.stats["best_trials_id"]) best_hparams = best_trial.params best_objective = best_trial.objective return best_hparams, best_objective
#%% # Joblib # ------ # # `Joblib`_ is a lightweight library for task parallel execution in Python. It is the default # backend used by Oríon to spawn multiple workers. # # We first build the experiment and limit it to 200 trials. from orion.client import build_experiment experiment = build_experiment( name="joblib_example", max_trials=200, space=space, storage=storage, ) #%% # Since joblib is the default backend, we do not need to do anything special to use it. # We can simply call # :meth:`ExperimentClient.workon() <orion.client.experiment.ExperimentClient.workon>` # and specify the number of workers that we want. experiment.workon(main, n_workers=4) #%% # It is as simple as this. #
} #%% # We define the search space for the optimization. Here, the optimization algorithm may explore # real values for ``x`` between 0 and 30 only. See documentation of :ref:`search-space` for more # information. space = {"x": "uniform(0, 30)"} #%% # We then build the experiment with the name ``random-rosenbrock``. The name is by Oríon as # an `id` for the experiment. Each experiment must have a unique name. experiment = build_experiment( "random-rosenbrock", space=space, storage=storage, ) #%% # For this example we use a 1-d rosenbrock function. We must return a list of results, # for Oríon. Results must have the format # ``{name: <str>: type: <'objective', 'constraint' or 'gradient'>, value=<float>}`` otherwise # a ``ValueError`` will be raised. At least one of the results must have the type ``objective``, # the metric that is minimized by the algorithm. def rosenbrock(x, noise=None): """Evaluate partial information of a quadratic.""" y = x - 34.56789 z = 4 * y**2 + 23.4