def add_configurations(self, experiments: Union[Experiment, List[Experiment], Dict[str, Dict]]): """Chains generator given experiment specifications. Arguments: experiments (Experiment | list | dict): Experiments to run. """ experiment_list = convert_to_experiment_list(experiments) for experiment in experiment_list: grid_vals = count_spec_samples(experiment.spec, num_samples=1) lazy_eval = grid_vals > SERIALIZATION_THRESHOLD if lazy_eval: warnings.warn( f"The number of pre-generated samples ({grid_vals}) " "exceeds the serialization threshold " f"({int(SERIALIZATION_THRESHOLD)}). Resume ability is " "disabled. To fix this, reduce the number of " "dimensions/size of the provided grid search.") previous_samples = self._total_samples points_to_evaluate = copy.deepcopy(self._points_to_evaluate) self._total_samples += count_variants(experiment.spec, points_to_evaluate) iterator = _TrialIterator(uuid_prefix=self._uuid_prefix, num_samples=experiment.spec.get( "num_samples", 1), unresolved_spec=experiment.spec, output_path=experiment.dir_name, points_to_evaluate=points_to_evaluate, lazy_eval=lazy_eval, start=previous_samples) self._iterators.append(iterator) self._trial_generator = itertools.chain(self._trial_generator, iterator)
def run_experiments(experiments: Union[Experiment, Mapping, Sequence[Union[Experiment, Mapping]]], scheduler: Optional[TrialScheduler] = None, server_port: Optional[int] = None, verbose: Union[int, Verbosity] = Verbosity.V3_TRIAL_DETAILS, progress_reporter: Optional[ProgressReporter] = None, resume: bool = False, queue_trials: bool = False, reuse_actors: bool = False, trial_executor: Optional[RayTrialExecutor] = None, raise_on_failed_trial: bool = True, concurrent: bool = True, callbacks: Optional[Sequence[Callback]] = None): """Runs and blocks until all trials finish. Examples: >>> experiment_spec = Experiment("experiment", my_func) >>> run_experiments(experiments=experiment_spec) >>> experiment_spec = {"experiment": {"run": my_func}} >>> run_experiments(experiments=experiment_spec) Returns: List of Trial objects, holding data for each executed trial. """ # This is important to do this here # because it schematize the experiments # and it conducts the implicit registration. experiments = convert_to_experiment_list(experiments) if concurrent: return run(experiments, server_port=server_port, verbose=verbose, progress_reporter=progress_reporter, resume=resume, queue_trials=queue_trials, reuse_actors=reuse_actors, trial_executor=trial_executor, raise_on_failed_trial=raise_on_failed_trial, scheduler=scheduler, callbacks=callbacks).trials else: trials = [] for exp in experiments: trials += run(exp, server_port=server_port, verbose=verbose, progress_reporter=progress_reporter, resume=resume, queue_trials=queue_trials, reuse_actors=reuse_actors, trial_executor=trial_executor, raise_on_failed_trial=raise_on_failed_trial, scheduler=scheduler, callbacks=callbacks).trials return trials
def run_experiments(experiments, scheduler=None, server_port=None, verbose=2, progress_reporter=None, resume=False, queue_trials=False, reuse_actors=False, trial_executor=None, raise_on_failed_trial=True, concurrent=True, callbacks=None): """Runs and blocks until all trials finish. Examples: >>> experiment_spec = Experiment("experiment", my_func) >>> run_experiments(experiments=experiment_spec) >>> experiment_spec = {"experiment": {"run": my_func}} >>> run_experiments(experiments=experiment_spec) Returns: List of Trial objects, holding data for each executed trial. """ # This is important to do this here # because it schematize the experiments # and it conducts the implicit registration. experiments = convert_to_experiment_list(experiments) if concurrent: return run(experiments, server_port=server_port, verbose=verbose, progress_reporter=progress_reporter, resume=resume, queue_trials=queue_trials, reuse_actors=reuse_actors, trial_executor=trial_executor, raise_on_failed_trial=raise_on_failed_trial, scheduler=scheduler, callbacks=callbacks).trials else: trials = [] for exp in experiments: trials += run(exp, server_port=server_port, verbose=verbose, progress_reporter=progress_reporter, resume=resume, queue_trials=queue_trials, reuse_actors=reuse_actors, trial_executor=trial_executor, raise_on_failed_trial=raise_on_failed_trial, scheduler=scheduler, callbacks=callbacks).trials return trials
def network_debug(self): logger = logging.getLogger("detectron2.trainer") # inference SearchTrainer.test_policies(self.cfg, self.model, None, self.k_th, self.K_fold) # search by explore and exploit logger.info("Step2: search best policies") name = "search_fold%d" % (self.k_th) register_trainable(name, lambda augs, rpt: search_debug(augs, rpt)) # search algorithm algo = HyperOptSearch(self.space, max_concurrent=4 * 20, metric=self.metric, mode=self.mode) # top1_valid or minus_loss # experiments configuration exp_config = { name: { 'run': name, 'num_samples': 4, 'resources_per_trial': self.resources_per_trial, 'stop': { 'training_iteration': self.num_policy }, 'config': { "cfg": self.cfg, "k_th": self.k_th, "K_fold": self.K_fold } } } # bayes optimization search # results = run_experiments(exp_config, search_alg=algo, scheduler=None, verbose=0, queue_trials=True, raise_on_failed_trial=False) results = run( convert_to_experiment_list(exp_config), name=name, search_alg=algo, resources_per_trial=self.resources_per_trial, return_trials=True, verbose=0, queue_trials=True, raise_on_failed_trial=False, ) # sort results = [x for x in results if x.last_result is not None] results = sorted(results, key=lambda x: x.last_result[self.metric], reverse=True) return []
def testConvertExperimentList(self): exp1 = Experiment(**{ "name": "foo", "run": "f1", "config": { "script_min_iter_time_s": 0 } }) result = convert_to_experiment_list([exp1, exp1]) self.assertEqual(len(result), 2) self.assertEqual(type(result), list)
def run_experiments(experiments, search_alg=None, scheduler=None, with_server=False, server_port=TuneServer.DEFAULT_PORT, verbose=2, resume=False, queue_trials=False, reuse_actors=False, trial_executor=None, raise_on_failed_trial=True): """Runs and blocks until all trials finish. Examples: >>> experiment_spec = Experiment("experiment", my_func) >>> run_experiments(experiments=experiment_spec) >>> experiment_spec = {"experiment": {"run": my_func}} >>> run_experiments(experiments=experiment_spec) >>> run_experiments( >>> experiments=experiment_spec, >>> scheduler=MedianStoppingRule(...)) >>> run_experiments( >>> experiments=experiment_spec, >>> search_alg=SearchAlgorithm(), >>> scheduler=MedianStoppingRule(...)) Returns: List of Trial objects, holding data for each executed trial. """ # This is important to do this here # because it schematize the experiments # and it conducts the implicit registration. experiments = convert_to_experiment_list(experiments) trials = [] for exp in experiments: trials += run( exp, search_alg=search_alg, scheduler=scheduler, with_server=with_server, server_port=server_port, verbose=verbose, resume=resume, queue_trials=queue_trials, reuse_actors=reuse_actors, trial_executor=trial_executor, raise_on_failed_trial=raise_on_failed_trial, return_trials=True) return trials
def add_configurations(self, experiments): """Chains generator given experiment specifications. Arguments: experiments (Experiment | list | dict): Experiments to run. """ experiment_list = convert_to_experiment_list(experiments) for experiment in experiment_list: self._trial_generator = itertools.chain( self._trial_generator, self._generate_trials(experiment.spec, experiment.name))
def __init__(self, experiments=None): """Constructs a generator given experiment specifications. Arguments: experiments (Experiment | list | dict): Experiments to run. """ experiment_list = convert_to_experiment_list(experiments) self._parser = make_parser() self._trial_generator = chain.from_iterable([ self._generate_trials(experiment.spec, experiment.name) for experiment in experiment_list ]) self._finished = False
def add_configurations(self, experiments): """Chains generator given experiment specifications. Multiplies the number of trials by the repeat factor. Arguments: experiments (Experiment | list | dict): Experiments to run. """ experiment_list = convert_to_experiment_list(experiments) for experiment in experiment_list: self._trial_generator = itertools.chain( self._trial_generator, self._generate_trials( experiment.spec.get("num_samples", 1) * self._repeat, experiment.spec, experiment.name))
def add_configurations( self, experiments: Union[Experiment, List[Experiment], Dict[str, Dict]]): """Chains generator given experiment specifications. Arguments: experiments (Experiment | list | dict): Experiments to run. """ experiment_list = convert_to_experiment_list(experiments) for experiment in experiment_list: self._trial_generator = itertools.chain( self._trial_generator, self._generate_trials( experiment.spec.get("num_samples", 1), experiment.spec, experiment.name))
def add_configurations(self, experiments): """Registers experiment specifications. Arguments: experiments (Experiment | list | dict): Experiments to run. """ logger.debug("added configurations") experiment_list = convert_to_experiment_list(experiments) assert len(experiment_list) == 1, ( "SearchAlgorithms can only support 1 experiment at a time.") self._experiment = experiment_list[0] experiment_spec = self._experiment.spec self._total_samples = experiment_spec.get("num_samples", 1) _warn_on_repeater(self.searcher, self._total_samples) if "run" not in experiment_spec: raise TuneError("Must specify `run` in {}".format(experiment_spec))
def testConvertExperimentJSON(self): experiment = { "name": { "run": "f1", "config": { "script_min_iter_time_s": 0 } }, "named": { "run": "f1", "config": { "script_min_iter_time_s": 0 } } } result = convert_to_experiment_list(experiment) self.assertEqual(len(result), 2) self.assertEqual(type(result), list)
def add_configurations(self, experiments: Union[Experiment, List[Experiment], Dict[str, Dict]]): """Chains generator given experiment specifications. Arguments: experiments (Experiment | list | dict): Experiments to run. """ experiment_list = convert_to_experiment_list(experiments) for experiment in experiment_list: points_to_evaluate = copy.deepcopy(self._points_to_evaluate) self._total_samples += count_variants(experiment.spec, points_to_evaluate) self._trial_generator = itertools.chain( self._trial_generator, self._generate_trials(experiment.spec.get("num_samples", 1), experiment.spec, experiment.dir_name, points_to_evaluate))
def train_SAC(env, eval_env, out_dir, seed=None, **kwargs): ray.init( local_mode=kwargs['local'], address=(kwargs['ray_address'] if 'ray_address' in kwargs else None), ignore_reinit_error=True, log_to_driver=False, webui_host="0.0.0.0", ) # Get the experiments from the configuration file experiments = convert_to_experiment_list(kwargs) if len(experiments) == 0: raise ValueError("No experiments found") elif len(experiments) > 1: raise ValueError("Multiple experiments not yet supported") # Get the first experiment experiment = experiments[0] # TODO: define callbacks # Create the callback field if it does not exist if "callbacks" not in experiment.spec["config"]: experiment.spec["config"]["callbacks"] = {} callbacks = experiment.spec["config"]["callbacks"] checkpoint = None if 'checkpoint' in kwargs: checkpoint = kwargs['checkpoint'] print(f"Running experiment:") pp.pprint(experiment.spec) trials = ray.tune.run( experiment, resume=kwargs['continue'] if 'continue' in kwargs else False, restore=checkpoint, return_trials=True, ) return trials
def run_experiments( experiments: Union[Experiment, Mapping, Sequence[Union[Experiment, Mapping]]], scheduler: Optional[TrialScheduler] = None, server_port: Optional[int] = None, verbose: Union[int, Verbosity] = Verbosity.V3_TRIAL_DETAILS, progress_reporter: Optional[ProgressReporter] = None, resume: bool = False, reuse_actors: bool = False, trial_executor: Optional[RayTrialExecutor] = None, raise_on_failed_trial: bool = True, concurrent: bool = True, # Deprecated args. queue_trials: Optional[bool] = None, callbacks: Optional[Sequence[Callback]] = None, _remote: Optional[bool] = None): """Runs and blocks until all trials finish. Examples: >>> experiment_spec = Experiment("experiment", my_func) >>> run_experiments(experiments=experiment_spec) >>> experiment_spec = {"experiment": {"run": my_func}} >>> run_experiments(experiments=experiment_spec) Returns: List of Trial objects, holding data for each executed trial. """ # To be removed in 1.9. if queue_trials is not None: raise DeprecationWarning( "`queue_trials` has been deprecated and is replaced by " "the `TUNE_MAX_PENDING_TRIALS_PG` environment variable. " "Per default at least one Trial is queued at all times, " "so you likely don't need to change anything other than " "removing this argument from your call to `tune.run()`") if _remote is None: _remote = ray.util.client.ray.is_connected() if _remote is True and trial_executor: raise ValueError("cannot use custom trial executor") if not trial_executor or isinstance(trial_executor, RayTrialExecutor): _ray_auto_init() if _remote: remote_run = ray.remote(num_cpus=0)(run_experiments) # Make sure tune.run_experiments is run on the server node. remote_run = force_on_current_node(remote_run) return ray.get( remote_run.remote( experiments, scheduler, server_port, verbose, progress_reporter, resume, reuse_actors, trial_executor, raise_on_failed_trial, concurrent, callbacks, _remote=False)) # This is important to do this here # because it schematize the experiments # and it conducts the implicit registration. experiments = convert_to_experiment_list(experiments) if concurrent: return run( experiments, server_port=server_port, verbose=verbose, progress_reporter=progress_reporter, resume=resume, reuse_actors=reuse_actors, trial_executor=trial_executor, raise_on_failed_trial=raise_on_failed_trial, scheduler=scheduler, callbacks=callbacks).trials else: trials = [] for exp in experiments: trials += run( exp, server_port=server_port, verbose=verbose, progress_reporter=progress_reporter, resume=resume, reuse_actors=reuse_actors, trial_executor=trial_executor, raise_on_failed_trial=raise_on_failed_trial, scheduler=scheduler, callbacks=callbacks).trials return trials
def run_experiments(experiments: Union[Experiment, Mapping, Sequence[Union[Experiment, Mapping]]], scheduler: Optional[TrialScheduler] = None, server_port: Optional[int] = None, verbose: Union[int, Verbosity] = Verbosity.V3_TRIAL_DETAILS, progress_reporter: Optional[ProgressReporter] = None, resume: bool = False, queue_trials: bool = False, reuse_actors: bool = False, trial_executor: Optional[RayTrialExecutor] = None, raise_on_failed_trial: bool = True, concurrent: bool = True, callbacks: Optional[Sequence[Callback]] = None, _remote: bool = None): """Runs and blocks until all trials finish. Examples: >>> experiment_spec = Experiment("experiment", my_func) >>> run_experiments(experiments=experiment_spec) >>> experiment_spec = {"experiment": {"run": my_func}} >>> run_experiments(experiments=experiment_spec) Returns: List of Trial objects, holding data for each executed trial. """ if _remote is None: _remote = ray.util.client.ray.is_connected() if _remote is True and trial_executor: raise ValueError("cannot use custom trial executor") if not trial_executor or isinstance(trial_executor, RayTrialExecutor): _ray_auto_init() if _remote: remote_run = ray.remote(num_cpus=0)(run_experiments) # Make sure tune.run_experiments is run on the server node. remote_run = force_on_current_node(remote_run) return ray.get( remote_run.remote(experiments, scheduler, server_port, verbose, progress_reporter, resume, queue_trials, reuse_actors, trial_executor, raise_on_failed_trial, concurrent, callbacks, _remote=False)) # This is important to do this here # because it schematize the experiments # and it conducts the implicit registration. experiments = convert_to_experiment_list(experiments) if concurrent: return run(experiments, server_port=server_port, verbose=verbose, progress_reporter=progress_reporter, resume=resume, queue_trials=queue_trials, reuse_actors=reuse_actors, trial_executor=trial_executor, raise_on_failed_trial=raise_on_failed_trial, scheduler=scheduler, callbacks=callbacks).trials else: trials = [] for exp in experiments: trials += run(exp, server_port=server_port, verbose=verbose, progress_reporter=progress_reporter, resume=resume, queue_trials=queue_trials, reuse_actors=reuse_actors, trial_executor=trial_executor, raise_on_failed_trial=raise_on_failed_trial, scheduler=scheduler, callbacks=callbacks).trials return trials
def testConvertExperimentIncorrect(self): self.assertRaises(TuneError, lambda: convert_to_experiment_list("hi"))
def testConvertExperimentNone(self): result = convert_to_experiment_list(None) self.assertEqual(len(result), 0) self.assertEqual(type(result), list)
def run_experiments(experiments, search_alg=None, scheduler=None, with_server=False, server_port=TuneServer.DEFAULT_PORT, verbose=True, resume=False, queue_trials=False, trial_executor=None, raise_on_failed_trial=True): """Runs and blocks until all trials finish. Args: experiments (Experiment | list | dict): Experiments to run. Will be passed to `search_alg` via `add_configurations`. search_alg (SearchAlgorithm): Search Algorithm. Defaults to BasicVariantGenerator. scheduler (TrialScheduler): Scheduler for executing the experiment. Choose among FIFO (default), MedianStopping, AsyncHyperBand, and HyperBand. with_server (bool): Starts a background Tune server. Needed for using the Client API. server_port (int): Port number for launching TuneServer. verbose (bool): How much output should be printed for each trial. resume (bool|"prompt"): If checkpoint exists, the experiment will resume from there. If resume is "prompt", Tune will prompt if checkpoint detected. queue_trials (bool): Whether to queue trials when the cluster does not currently have enough resources to launch one. This should be set to True when running on an autoscaling cluster to enable automatic scale-up. trial_executor (TrialExecutor): Manage the execution of trials. raise_on_failed_trial (bool): Raise TuneError if there exists failed trial (of ERROR state) when the experiments complete. Examples: >>> experiment_spec = Experiment("experiment", my_func) >>> run_experiments(experiments=experiment_spec) >>> experiment_spec = {"experiment": {"run": my_func}} >>> run_experiments(experiments=experiment_spec) >>> run_experiments( >>> experiments=experiment_spec, >>> scheduler=MedianStoppingRule(...)) >>> run_experiments( >>> experiments=experiment_spec, >>> search_alg=SearchAlgorithm(), >>> scheduler=MedianStoppingRule(...)) Returns: List of Trial objects, holding data for each executed trial. """ # This is important to do this here # because it schematize the experiments # and it conducts the implicit registration. experiments = convert_to_experiment_list(experiments) checkpoint_dir = _find_checkpoint_dir(experiments) runner = None restore = False if os.path.exists( os.path.join(checkpoint_dir, TrialRunner.CKPT_FILE_NAME)): if resume == "prompt": msg = ("Found incomplete experiment at {}. " "Would you like to resume it?".format(checkpoint_dir)) restore = click.confirm(msg, default=False) if restore: logger.info("Tip: to always resume, " "pass resume=True to run_experiments()") else: logger.info("Tip: to always start a new experiment, " "pass resume=False to run_experiments()") elif resume: restore = True else: logger.info( "Tip: to resume incomplete experiments, " "pass resume='prompt' or resume=True to run_experiments()") else: logger.info( "Did not find checkpoint file in {}.".format(checkpoint_dir)) if restore: runner = try_restore_runner(checkpoint_dir, search_alg, scheduler, trial_executor) else: logger.info("Starting a new experiment.") if not runner: if scheduler is None: scheduler = FIFOScheduler() if search_alg is None: search_alg = BasicVariantGenerator() search_alg.add_configurations(experiments) runner = TrialRunner( search_alg, scheduler=scheduler, metadata_checkpoint_dir=checkpoint_dir, launch_web_server=with_server, server_port=server_port, verbose=verbose, queue_trials=queue_trials, trial_executor=trial_executor) print(runner.debug_string(max_debug=99999)) last_debug = 0 while not runner.is_finished(): runner.step() if time.time() - last_debug > DEBUG_PRINT_INTERVAL: print(runner.debug_string()) last_debug = time.time() print(runner.debug_string(max_debug=99999)) wait_for_log_sync() errored_trials = [] for trial in runner.get_trials(): if trial.status != Trial.TERMINATED: errored_trials += [trial] if errored_trials: if raise_on_failed_trial: raise TuneError("Trials did not complete", errored_trials) else: logger.error("Trials did not complete: %s", errored_trials) return runner.get_trials()
def run_experiments(experiments, search_alg=None, scheduler=None, with_server=False, server_port=TuneServer.DEFAULT_PORT, verbose=2, resume=False, queue_trials=False, trial_executor=None, raise_on_failed_trial=True): """Runs and blocks until all trials finish. Args: experiments (Experiment | list | dict): Experiments to run. Will be passed to `search_alg` via `add_configurations`. search_alg (SearchAlgorithm): Search Algorithm. Defaults to BasicVariantGenerator. scheduler (TrialScheduler): Scheduler for executing the experiment. Choose among FIFO (default), MedianStopping, AsyncHyperBand, and HyperBand. with_server (bool): Starts a background Tune server. Needed for using the Client API. server_port (int): Port number for launching TuneServer. verbose (int): 0, 1, or 2. Verbosity mode. 0 = silent, 1 = only status updates, 2 = status and trial results. resume (bool|"prompt"): If checkpoint exists, the experiment will resume from there. If resume is "prompt", Tune will prompt if checkpoint detected. queue_trials (bool): Whether to queue trials when the cluster does not currently have enough resources to launch one. This should be set to True when running on an autoscaling cluster to enable automatic scale-up. trial_executor (TrialExecutor): Manage the execution of trials. raise_on_failed_trial (bool): Raise TuneError if there exists failed trial (of ERROR state) when the experiments complete. Examples: >>> experiment_spec = Experiment("experiment", my_func) >>> run_experiments(experiments=experiment_spec) >>> experiment_spec = {"experiment": {"run": my_func}} >>> run_experiments(experiments=experiment_spec) >>> run_experiments( >>> experiments=experiment_spec, >>> scheduler=MedianStoppingRule(...)) >>> run_experiments( >>> experiments=experiment_spec, >>> search_alg=SearchAlgorithm(), >>> scheduler=MedianStoppingRule(...)) Returns: List of Trial objects, holding data for each executed trial. """ # This is important to do this here # because it schematize the experiments # and it conducts the implicit registration. experiments = convert_to_experiment_list(experiments) checkpoint_dir = _find_checkpoint_dir(experiments) runner = None restore = False if TrialRunner.checkpoint_exists(checkpoint_dir): if resume == "prompt": msg = ("Found incomplete experiment at {}. " "Would you like to resume it?".format(checkpoint_dir)) restore = click.confirm(msg, default=False) if restore: logger.info("Tip: to always resume, " "pass resume=True to run_experiments()") else: logger.info("Tip: to always start a new experiment, " "pass resume=False to run_experiments()") elif resume: restore = True else: logger.info( "Tip: to resume incomplete experiments, " "pass resume='prompt' or resume=True to run_experiments()") else: logger.info( "Did not find checkpoint file in {}.".format(checkpoint_dir)) if restore: runner = try_restore_runner(checkpoint_dir, search_alg, scheduler, trial_executor) else: logger.info("Starting a new experiment.") if not runner: if scheduler is None: scheduler = FIFOScheduler() if search_alg is None: search_alg = BasicVariantGenerator() search_alg.add_configurations(experiments) runner = TrialRunner(search_alg, scheduler=scheduler, metadata_checkpoint_dir=checkpoint_dir, launch_web_server=with_server, server_port=server_port, verbose=bool(verbose > 1), queue_trials=queue_trials, trial_executor=trial_executor) if verbose: print(runner.debug_string(max_debug=99999)) last_debug = 0 while not runner.is_finished(): runner.step() if time.time() - last_debug > DEBUG_PRINT_INTERVAL: if verbose: print(runner.debug_string()) last_debug = time.time() if verbose: print(runner.debug_string(max_debug=99999)) wait_for_log_sync() errored_trials = [] for trial in runner.get_trials(): if trial.status != Trial.TERMINATED: errored_trials += [trial] if errored_trials: if raise_on_failed_trial: raise TuneError("Trials did not complete", errored_trials) else: logger.error("Trials did not complete: %s", errored_trials) return runner.get_trials()
def add_configurations(self, experiments): self.experiment_list = convert_to_experiment_list(experiments)
def search(self): logger = logging.getLogger("detectron2.trainer") logger.info("Step2: search best policies") name = "search_fold%d" % (self.k_th) # regist function register_trainable( name, lambda augs, rpt: search_func(self.model, self.K_fold, augs, rpt)) # search algorithm algo = HyperOptSearch( self.space, max_concurrent=4 * 20, metric=self.metric, mode=self.mode) # max top1_valid or min minus_loss # configuration exp_config = { name: { 'run': name, 'num_samples': 40 if self.smoke_test else self.num_search, "resources_per_trial": self.resources_per_trial, 'stop': { 'training_iteration': self.num_policy }, 'config': { "cfg": self.cfg, "k_th": self.k_th, "K_fold": self.K_fold, "num_policy": self.num_policy, "num_op": self.num_op, "ops_list": self.ops_list } } } # bayes optimization search # results = run_experiments(exp_config, search_alg=algo, scheduler=None, verbose=0, queue_trials=True, raise_on_failed_trial=False) results = run( convert_to_experiment_list(exp_config), name=name, search_alg=algo, resources_per_trial=self.resources_per_trial, return_trials=True, verbose=0, queue_trials=True, raise_on_failed_trial=False, ) # sort results = [x for x in results if x.last_result is not None] results = sorted(results, key=lambda x: x.last_result[self.metric], reverse=True) # get top N policies final_policy_set = [] for result in results[:self.num_final_policies]: # for result in results[:self.num_final_policies *5//self.K_fold]: # transform result to policies final_policy = policy_decoder(result.config, self.num_policy, self.num_op, self.ops_list) logger.info('k_th:%d | loss=%.12f top1_valid=%.4f %s' % (self.k_th, result.last_result['minus_loss'], result.last_result['top1_valid'], final_policy)) final_policy = self._remove_deplicates(final_policy) final_policy_set.extend(final_policy) return final_policy_set