def test_concurent_producers(monkeypatch): """Test concurrent production of new trials.""" with create_producer() as (producer, storage): trials_in_db_before = len(storage._fetch_trials({})) new_trials_in_db_before = len(storage._fetch_trials({"status": "new"})) producer.experiment.algorithms.algorithm.possible_values = [(1, )] # Make sure it starts from index 0 producer.experiment.algorithms.seed_rng(0) second_producer = Producer(producer.experiment) second_producer.experiment = copy.deepcopy(producer.experiment) sleep = 0.5 def suggest(self, num): time.sleep(sleep) return producer.experiment.algorithms.algorithm.possible_values[0] monkeypatch.setattr(producer.experiment.algorithms.algorithm, "suggest", suggest) pool = threading.Pool(2) first_result = pool.apply_async(producer.produce) second_result = pool.apply_async(second_producer.produce, dict(timeout=0)) assert first_result.get(sleep * 5) == 1 # TODO: Use Oríon's custom AcquireLockTimeoutError with pytest.raises(TimeoutError): second_result.get(sleep * 5) # `num_new_trials` new trials were registered at database assert len(storage._fetch_trials({})) == trials_in_db_before + 1 assert (len(storage._fetch_trials({"status": "new" })) == new_trials_in_db_before + 1) new_trials = list( storage._fetch_trials({ "status": "new", "submit_time": random_dt })) assert new_trials[0].experiment == producer.experiment.id assert new_trials[0].start_time is None assert new_trials[0].end_time is None assert new_trials[0].results == [] assert new_trials[0].params == { "/decoding_layer": "gru", "/encoding_layer": "rnn", } assert new_trials[1].params == { "/decoding_layer": "gru", "/encoding_layer": "gru", }
def producer(hacked_exp, random_dt, exp_config): """Return a setup `Producer`.""" # make init done hacked_exp.configure(exp_config[0][3]) # insert fake point fake_point = ('gru', 'rnn') assert fake_point in hacked_exp.space hacked_exp.algorithms.algorithm.value = fake_point return Producer(hacked_exp)
def test_algorithm_is_done(monkeypatch, producer): """Verify that producer won't register new samples if algorithm is done meanwhile.""" producer.experiment.max_trials = 8 producer.experiment.algorithms.algorithm.max_trials = 8 producer.experiment.pool_size = 10 producer = Producer(producer.experiment) def suggest_one_only(self, num=1): """Return only one point, whatever `num` is""" return [("gru", "rnn")] monkeypatch.delattr(producer.experiment.algorithms.algorithm.__class__, "is_done") monkeypatch.setattr(producer.experiment.algorithms.algorithm.__class__, "suggest", suggest_one_only) assert producer.experiment.pool_size == 10 trials_in_exp_before = len(producer.experiment.fetch_trials()) assert trials_in_exp_before == producer.experiment.max_trials - 1 producer.update() producer.produce() assert len( producer.experiment.fetch_trials()) == producer.experiment.max_trials assert producer.naive_algorithm.is_done assert not producer.experiment.is_done
def workon(function, space, name='loop', algorithms=None, max_trials=None): """Optimize a function over a given search space This will create a new experiment with an in-memory storage and optimize the given function until `max_trials` is reached or the `algorithm` is done (some algorithms like random search are never done). For informations on how to fetch results, see :py:class:`orion.client.experiment.ExperimentClient`. .. note:: Each call to this function will create a separate in-memory storage. Parameters ---------- name: str Name of the experiment version: int, optional Version of the experiment. Defaults to last existing version for a given `name` or 1 for new experiment. space: dict, optional Optimization space of the algorithm. Should have the form `dict(name='<prior>(args)')`. algorithms: str or dict, optional Algorithm used for optimization. max_trials: int, optional Maximum number or trials before the experiment is considered done. Raises ------ `NotImplementedError` If the algorithm specified is not properly installed. """ # Clear singletons and keep pointers to restore them. singletons = update_singletons() setup_storage(storage={'type': 'legacy', 'database': {'type': 'EphemeralDB'}}) experiment = experiment_builder.build( name, version=1, space=space, algorithms=algorithms, strategy='NoParallelStrategy', max_trials=max_trials) producer = Producer(experiment) experiment_client = ExperimentClient(experiment, producer) experiment_client.workon(function, max_trials=max_trials) # Restore singletons update_singletons(singletons) return experiment_client
def workon(experiment, worker_trials=None): """Try to find solution to the search problem defined in `experiment`.""" producer = Producer(experiment) consumer = Consumer(experiment) log.debug("##### Init Experiment #####") try: iterator = range(int(worker_trials)) except (OverflowError, TypeError): # When worker_trials is inf iterator = itertools.count() for _ in iterator: log.debug("#### Poll for experiment termination.") if experiment.is_broken: log.info( "#### Experiment has reached broken trials threshold, terminating." ) return if experiment.is_done: break log.debug("#### Try to reserve a new trial to evaluate.") trial = reserve_trial(experiment, producer) if trial is not None: log.debug( "#### Successfully reserved %s to evaluate. Consuming...", trial) consumer.consume(trial) stats = experiment.stats if not stats: log.info("No trials completed.") return best = get_storage().get_trial(uid=stats['best_trials_id']) stats_stream = io.StringIO() pprint.pprint(stats, stream=stats_stream) stats_string = stats_stream.getvalue() best_stream = io.StringIO() pprint.pprint(best.to_dict()['params'], stream=best_stream) best_string = best_stream.getvalue() log.info("##### Search finished successfully #####") log.info("\nRESULTS\n=======\n%s\n", stats_string) log.info("\nBEST PARAMETERS\n===============\n%s", best_string)
def test_concurent_producers(producer, storage, random_dt): """Test concurrent production of new trials.""" trials_in_db_before = len(storage._fetch_trials({})) new_trials_in_db_before = len(storage._fetch_trials({"status": "new"})) # Set so that first producer's algorithm generate valid point on first time # And second producer produce same point and thus must produce next one two. # Hence, we know that producer algo will have _num == 1 and # second producer algo will have _num == 2 producer.algorithm.algorithm.possible_values = [("gru", "rnn"), ("gru", "gru")] # Make sure it starts from index 0 producer.algorithm.seed_rng(0) assert producer.experiment.pool_size == 1 second_producer = Producer(producer.experiment) second_producer.algorithm = copy.deepcopy(producer.algorithm) producer.update() second_producer.update() producer.produce() second_producer.produce() # Algorithm was required to suggest some trials num_new_points = producer.algorithm.algorithm._num assert num_new_points == 1 # pool size num_new_points = second_producer.algorithm.algorithm._num assert num_new_points == 2 # pool size # `num_new_points` new trials were registered at database assert len(storage._fetch_trials({})) == trials_in_db_before + 2 assert len(storage._fetch_trials({"status": "new"})) == new_trials_in_db_before + 2 new_trials = list( storage._fetch_trials({ "status": "new", "submit_time": random_dt })) assert new_trials[0].experiment == producer.experiment.id assert new_trials[0].start_time is None assert new_trials[0].end_time is None assert new_trials[0].results == [] assert new_trials[0].params == { "/decoding_layer": "gru", "/encoding_layer": "rnn", } assert new_trials[1].params == { "/decoding_layer": "gru", "/encoding_layer": "gru", }
def create_experiment(exp_config=None, trial_config=None, stati=None): """Context manager for the creation of an ExperimentClient and storage init""" if exp_config is None: exp_config = config if trial_config is None: trial_config = base_trial if stati is None: stati = ['new', 'interrupted', 'suspended', 'reserved', 'completed'] with OrionState(experiments=[exp_config], trials=generate_trials(trial_config, stati)) as cfg: experiment = experiment_builder.build(name=exp_config['name']) if cfg.trials: experiment._id = cfg.trials[0]['experiment'] client = ExperimentClient(experiment, Producer(experiment)) yield cfg, experiment, client client.close()
def producer(hacked_exp, random_dt, exp_config, categorical_values): """Return a setup `Producer`.""" # make init done # TODO: Remove this commented out if test pass # hacked_exp.configure(exp_config[0][3]) # # insert fake point # fake_point = ('gru', 'rnn') # assert fake_point in hacked_exp.space # hacked_exp.algorithms.algorithm.value = fake_point hacked_exp.configure(exp_config[0][3]) hacked_exp.pool_size = 1 hacked_exp.algorithms.algorithm.possible_values = categorical_values hacked_exp.producer['strategy'] = DumbParallelStrategy() return Producer(hacked_exp)
def create_producer(): """Return a setup `Producer`.""" # make init done with OrionState( experiments=[base_experiment], trials=generate_trials(exp_config=base_experiment), ) as cfg: experiment = cfg.get_experiment(name="default_name") experiment.algorithms.algorithm.possible_values = [ (v, ) for v in range(0, 11) ] experiment.algorithms.seed_rng(0) experiment.max_trials = 20 experiment.algorithms.algorithm.max_trials = 20 producer = Producer(experiment) yield producer, cfg.storage()
def producer(monkeypatch, hacked_exp, random_dt, categorical_values): """Return a setup `Producer`.""" # make init done assert hacked_exp.pool_size == 1 hacked_exp.algorithms.algorithm.possible_values = categorical_values hacked_exp.algorithms.seed_rng(0) hacked_exp.producer["strategy"] = DumbParallelStrategy() producer = Producer(hacked_exp) def backoff(self): """Dont wait, just update.""" self.update() self.failure_count += 1 monkeypatch.setattr(Producer, "backoff", backoff) return producer
def create_experiment(exp_config=None, trial_config=None, statuses=None): """Context manager for the creation of an ExperimentClient and storage init""" if exp_config is None: raise ValueError("Parameter 'exp_config' is missing") if trial_config is None: raise ValueError("Parameter 'trial_config' is missing") if statuses is None: statuses = ["new", "interrupted", "suspended", "reserved", "completed"] from orion.client.experiment import ExperimentClient with OrionState( experiments=[exp_config], trials=generate_trials(trial_config, statuses, exp_config), ) as cfg: experiment = experiment_builder.build(name=exp_config["name"]) if cfg.trials: experiment._id = cfg.trials[0]["experiment"] client = ExperimentClient(experiment, Producer(experiment)) yield cfg, experiment, client client.close()
def workon(experiment): """Try to find solution to the search problem defined in `experiment`.""" producer = Producer(experiment) consumer = Consumer(experiment) log.debug("##### Init Experiment #####") while True: log.debug("#### Try to reserve a new trial to evaluate.") trial = experiment.reserve_trial(score_handle=producer.algorithm.score) if trial is None: log.debug("#### Failed to pull a new trial from database.") log.debug( "#### Fetch most recent completed trials and update algorithm." ) producer.update() log.debug("#### Poll for experiment termination.") if experiment.is_done: break log.debug("#### Produce new trials.") producer.produce() else: log.debug( "#### Successfully reserved %s to evaluate. Consuming...", trial) consumer.consume(trial) stats = experiment.stats best = Database().read('trials', {'_id': stats['best_trials_id']})[0] stats_stream = io.StringIO() pprint.pprint(stats, stream=stats_stream) stats_string = stats_stream.getvalue() best_stream = io.StringIO() pprint.pprint(best['params'], stream=best_stream) best_string = best_stream.getvalue() log.info("##### Search finished successfully #####") log.info("\nRESULTS\n=======\n%s\n", stats_string) log.info("\nBEST PARAMETERS\n===============\n%s", best_string)
def test_concurent_producers(producer, database, random_dt): """Test concurrent production of new trials.""" trials_in_db_before = database.trials.count() new_trials_in_db_before = database.trials.count({'status': 'new'}) print(producer.experiment.fetch_trials({})) # Set so that first producer's algorithm generate valid point on first time # And second producer produce same point and thus must produce next one two. # Hence, we know that producer algo will have _num == 1 and # second producer algo will have _num == 2 producer.algorithm.algorithm.possible_values = [('rnn', 'gru'), ('gru', 'gru')] # Make sure it starts from index 0 producer.algorithm.seed_rng(0) assert producer.experiment.pool_size == 1 second_producer = Producer(producer.experiment) second_producer.algorithm = copy.deepcopy(producer.algorithm) producer.update() second_producer.update() print(producer.algorithm.algorithm._index) print(second_producer.algorithm.algorithm._index) producer.produce() print(producer.algorithm.algorithm._index) print(second_producer.algorithm.algorithm._index) second_producer.produce() # Algorithm was required to suggest some trials num_new_points = producer.algorithm.algorithm._num assert num_new_points == 1 # pool size num_new_points = second_producer.algorithm.algorithm._num assert num_new_points == 2 # pool size # `num_new_points` new trials were registered at database assert database.trials.count() == trials_in_db_before + 2 assert database.trials.count({'status': 'new'}) == new_trials_in_db_before + 2 new_trials = list( database.trials.find({ 'status': 'new', 'submit_time': random_dt })) assert new_trials[0]['experiment'] == producer.experiment.name assert new_trials[0]['start_time'] is None assert new_trials[0]['end_time'] is None assert new_trials[0]['results'] == [] assert new_trials[0]['params'] == [{ 'name': '/encoding_layer', 'type': 'categorical', 'value': 'rnn' }, { 'name': '/decoding_layer', 'type': 'categorical', 'value': 'gru' }] assert new_trials[1]['params'] == [{ 'name': '/encoding_layer', 'type': 'categorical', 'value': 'gru' }, { 'name': '/decoding_layer', 'type': 'categorical', 'value': 'gru' }]
def workon( experiment, max_trials=None, max_broken=None, max_idle_time=None, heartbeat=None, user_script_config=None, interrupt_signal_code=None, ignore_code_changes=None, ): """Try to find solution to the search problem defined in `experiment`.""" producer = Producer(experiment, max_idle_time) consumer = Consumer( experiment, heartbeat, user_script_config, interrupt_signal_code, ignore_code_changes, ) log.debug("##### Init Experiment #####") try: iterator = range(int(max_trials)) except (OverflowError, TypeError): # When worker_trials is inf iterator = itertools.count() worker_broken_trials = 0 for _ in iterator: log.debug("#### Poll for experiment termination.") if experiment.is_broken: print( "#### Experiment has reached broken trials threshold, terminating." ) break if experiment.is_done: print("##### Search finished successfully #####") break log.debug("#### Try to reserve a new trial to evaluate.") try: trial = reserve_trial(experiment, producer) except WaitingForTrials as ex: print( "### Experiment failed to reserve new trials: {reason}, terminating. " .format(reason=str(ex))) break if trial is not None: log.info("#### Successfully reserved %s to evaluate. Consuming...", trial) success = consumer.consume(trial) if not success: worker_broken_trials += 1 if worker_broken_trials >= max_broken: print( "#### Worker has reached broken trials threshold, terminating." ) print(worker_broken_trials, max_broken) break print("\n" + format_stats(experiment)) print("\n" + COMPLETION_MESSAGE.format(experiment=experiment)) if not experiment.is_done: print(NONCOMPLETED_MESSAGE.format(experiment=experiment))
def test_concurent_producers(producer, database, random_dt): """Test concurrent production of new trials.""" trials_in_db_before = database.trials.count() new_trials_in_db_before = database.trials.count({"status": "new"}) # Set so that first producer's algorithm generate valid point on first time # And second producer produce same point and thus must produce next one two. # Hence, we know that producer algo will have _num == 1 and # second producer algo will have _num == 2 producer.algorithm.algorithm.possible_values = [("gru", "rnn"), ("gru", "gru")] # Make sure it starts from index 0 producer.algorithm.seed_rng(0) assert producer.experiment.pool_size == 1 second_producer = Producer(producer.experiment) second_producer.algorithm = copy.deepcopy(producer.algorithm) producer.update() second_producer.update() producer.produce() second_producer.produce() # Algorithm was required to suggest some trials num_new_points = producer.algorithm.algorithm._num assert num_new_points == 1 # pool size num_new_points = second_producer.algorithm.algorithm._num assert num_new_points == 2 # pool size # `num_new_points` new trials were registered at database assert database.trials.count() == trials_in_db_before + 2 assert database.trials.count({"status": "new"}) == new_trials_in_db_before + 2 new_trials = list( database.trials.find({ "status": "new", "submit_time": random_dt })) assert new_trials[0]["experiment"] == producer.experiment.name assert new_trials[0]["start_time"] is None assert new_trials[0]["end_time"] is None assert new_trials[0]["results"] == [] assert new_trials[0]["params"] == [ { "name": "/decoding_layer", "type": "categorical", "value": "gru" }, { "name": "/encoding_layer", "type": "categorical", "value": "rnn" }, ] assert new_trials[1]["params"] == [ { "name": "/decoding_layer", "type": "categorical", "value": "gru" }, { "name": "/encoding_layer", "type": "categorical", "value": "gru" }, ]
def workon( experiment, n_workers=None, pool_size=None, max_trials=None, max_broken=None, max_idle_time=None, reservation_timeout=None, heartbeat=None, user_script_config=None, interrupt_signal_code=None, ignore_code_changes=None, executor=None, executor_configuration=None, idle_timeout=None, ): """Try to find solution to the search problem defined in `experiment`.""" # NOTE: Remove in v0.3.0 if max_idle_time is not None and reservation_timeout is None: reservation_timeout = max_idle_time producer = Producer(experiment) consumer = Consumer( experiment, user_script_config, interrupt_signal_code, ignore_code_changes, ) client = ExperimentClient(experiment, producer, heartbeat=heartbeat) if executor is None: executor = orion.core.config.worker.executor if executor_configuration is None: executor_configuration = orion.core.config.worker.executor_configuration log.debug("Starting workers") with client.tmp_executor(executor, n_workers=n_workers, **executor_configuration): try: client.workon( consumer, n_workers=n_workers, pool_size=pool_size, reservation_timeout=reservation_timeout, max_trials_per_worker=max_trials, max_broken=max_broken, trial_arg="trial", on_error=on_error, idle_timeout=idle_timeout, ) except BrokenExperiment as e: print(e) if client.is_done: print("Search finished successfully") print("\n" + format_stats(client)) print("\n" + COMPLETION_MESSAGE.format(experiment=client)) if not experiment.is_done: print(NONCOMPLETED_MESSAGE.format(experiment=client))
def create_experiment( name, version=None, space=None, algorithms=None, strategy=None, max_trials=None, storage=None, branching=None, max_idle_time=None, heartbeat=None, working_dir=None, debug=False): """Create an experiment There is 2 main scenarios 1) The experiment is new ``name`` and ``space`` arguments are required, otherwise ``NoConfigurationError`` will be raised. All other arguments (``algorithms``, ``strategy``, ``max_trials``, ``storage``, ``branching`` and ``working_dir``) will be replaced by system's defaults if ommited. The system's defaults can also be overriden in global configuration file as described for the database in :ref:`Database Configuration`. We do not recommand overriding the algorithm configuration using system's default, but overriding the storage configuration can be very convenient if the same storage is used for all your experiments. 2) The experiment exist in the database. We can break down this scenario in two sub-scenarios for clarity. 2.1) Only experiment name is given. The configuration will be fetched from database. 2.2) Some other arguments than the name are given. The configuration will be fetched from database and given arguments will override them. ``max_trials`` may be overwritten in DB, but any other changes will lead to a branching. Instead of creating the experiment ``(name, version)``, it will create a new experiment ``(name, version+1)`` which will have the same configuration than ``(name, version)`` except for the differing arguments given by user. This new experiment will have access to trials of ``(name, version)``, adapted according to the differences between ``version`` and ``version+1``. A previous version can be accessed by specifying the ``version`` argument. Causes of experiment branching are: - Change of search space - New dimension - Different prior - Missing dimension - Change of algorithm - Change of strategy (Not implemented yet) - Change of code version (Only supported by commandline API for now) Parameters ---------- name: str Name of the experiment version: int, optional Version of the experiment. Defaults to last existing version for a given ``name`` or 1 for new experiment. space: dict, optional Optimization space of the algorithm. Should have the form ``dict(name='<prior>(args)')``. algorithms: str or dict, optional Algorithm used for optimization. strategy: str or dict, optional Parallel strategy to use to parallelize the algorithm. max_trials: int, optional Maximum number or trials before the experiment is considered done. storage: dict, optional Configuration of the storage backend. working_dir: str, optional Working directory created for the experiment inside which a unique folder will be created for each trial. Defaults to a temporary directory that is deleted at end of execution. max_idle_time: int, optional Maximum time the producer can spend trying to generate a new suggestion. Such timeout are generally caused by slow database, large number of concurrent workers leading to many race conditions or small search spaces with integer/categorical dimensions that may be fully explored. Defaults to `orion.core.config.worker.max_idle_time`. heartbeat: int, optional Frequency (seconds) at which the heartbeat of the trial is updated. If the heartbeat of a `reserved` trial is larger than twice the configured heartbeat, Oríon will reset the status of the trial to `interrupted`. This allows restoring lost trials (ex: due to killed worker). Defaults to `orion.core.config.worker.max_idle_time`. debug: bool, optional If using in debug mode, the storage config is overrided with legacy:EphemeralDB. Defaults to False. branching: dict, optional Arguments to control the branching. branch_to: str, optional Name of the experiment to branch to. The parent experiment will be the one specified by ``(name, version)``, and the child will be ``(branch_to, 1)``. branch_from: str, optional Name of the experiment to branch from. The parent experiment will be the one specified by ``(branch_from, last version)``, and the child will be ``(name, 1)``. manual_resolution: bool, optional Starts the prompt to resolve manually the conflicts. Defaults to False. algorithm_change: bool, optional Whether to automatically solve the algorithm conflict (change of algo config). Defaults to True. code_change_type: str, optional How to resolve code change automatically. Must be one of 'noeffect', 'unsure' or 'break'. Defaults to 'break'. cli_change_type: str, optional How to resolve cli change automatically. Must be one of 'noeffect', 'unsure' or 'break'. Defaults to 'break'. config_change_type: str, optional How to resolve config change automatically. Must be one of 'noeffect', 'unsure' or 'break'. Defaults to 'break'. Raises ------ `orion.core.utils.SingletonAlreadyInstantiatedError` If the storage is already instantiated and given configuration is different. Storage is a singleton, you may only use one instance per process. `orion.core.utils.exceptions.NoConfigurationError` The experiment is not in database and no space is provided by the user. `orion.core.utils.exceptions.RaceCondition` There was a race condition during branching and new version cannot be infered because of that. Single race conditions are normally handled seemlessly. If this error gets raised, it means that different modifications occured during each race condition resolution. This is likely due to quick code change during experiment creation. Make sure your script is not generating files within your code repository. `orion.core.utils.exceptions.BranchingEvent` The configuration is different than the corresponding one in DB and the branching cannot be solved automatically. This usually happens if the version=x is specified but the experiment ``(name, x)`` already has a child ``(name, x+1)``. If you really need to branch from version ``x``, give it a new name to branch to with ``branching={'branch_to': <new_name>}``. `NotImplementedError` If the algorithm, storage or strategy specified is not properly installed. """ setup_storage(storage=storage, debug=debug) try: experiment = experiment_builder.build( name, version=version, space=space, algorithms=algorithms, strategy=strategy, max_trials=max_trials, branching=branching, working_dir=working_dir) except RaceCondition: # Try again, but if it fails again, raise. Race conditions due to version increment should # only occur once in a short window of time unless code version is changing at a crazy pace. try: experiment = experiment_builder.build( name, version=version, space=space, algorithms=algorithms, strategy=strategy, max_trials=max_trials, branching=branching, working_dir=working_dir) except RaceCondition as e: raise RaceCondition( "There was a race condition during branching and new version cannot be infered " "because of that. Single race conditions are normally handled seemlessly. If this " "error gets raised, it means that different modifications occured during each race " "condition resolution. This is likely due to quick code change during experiment " "creation. Make sure your script is not generating files within your code " "repository.") from e producer = Producer(experiment, max_idle_time) return ExperimentClient(experiment, producer, heartbeat)