def run_master(args): NS = hpns.NameServer(run_id=args.run_id, nic_name=args.nic_name, working_directory=args.bohb_root_path) ns_host, ns_port = NS.start() # Start a background worker for the master node if args.optimize_generalist: w = AggregateWorker(run_id=args.run_id, host=ns_host, nameserver=ns_host, nameserver_port=ns_port, working_directory=args.bohb_root_path, n_repeat=args.n_repeat, has_repeats_as_budget=args.n_repeat is None, time_budget=args.time_budget, time_budget_approx=args.time_budget_approx, performance_matrix=args.performance_matrix) else: w = SingleWorker(run_id=args.run_id, host=ns_host, nameserver=ns_host, nameserver_port=ns_port, working_directory=args.bohb_root_path, n_repeat=args.n_repeat, dataset=args.dataset, time_budget=args.time_budget, time_budget_approx=args.time_budget_approx) w.run(background=True) # Create an optimizer result_logger = hpres.json_result_logger(directory=args.bohb_root_path, overwrite=False) if args.previous_run_dir is not None: previous_result = hpres.logged_results_to_HBS_result( args.previous_run_dir) else: pervious_result = None logger = logging.getLogger(__file__) logging_level = getattr(logging, args.logger_level) logger.setLevel(logging_level) optimizer = BOHB(configspace=get_configspace(), run_id=args.run_id, host=ns_host, nameserver=ns_host, nameserver_port=ns_port, min_budget=args.n_repeat_lower_budget, max_budget=args.n_repeat_upper_budget, result_logger=result_logger, logger=logger, previous_result=previous_result) res = optimizer.run(n_iterations=args.n_iterations) # Shutdown optimizer.shutdown(shutdown_workers=True) NS.shutdown()
def runBOHB(cfg): run_id = "0" # assign random port in the 30000-40000 range to avoid using a blocked port because of a previous improper bohb shutdown port = int(30000 + random.random() * 10000) ns = hpns.NameServer(run_id=run_id, host="127.0.0.1", port=port) ns.start() w = BOHBWorker(cfg=cfg, nameserver="127.0.0.1", run_id=run_id, nameserver_port=port) w.run(background=True) result_logger = hpres.json_result_logger(directory=cfg["bohb_log_dir"], overwrite=True) bohb = BOHB( configspace=get_configspace(cfg['model']), run_id=run_id, min_budget=cfg["bohb_min_budget"], max_budget=cfg["bohb_max_budget"], eta=cfg["bohb_eta"], nameserver="127.0.0.1", nameserver_port=port, result_logger=result_logger, ) res = bohb.run(n_iterations=cfg["bohb_iterations"]) bohb.shutdown(shutdown_workers=True) ns.shutdown() return res
def optimize_hyperparameters(model_class, parameters, train_and_validate_fn, num_iterations, min_budget=0.01, working_dir="./bohby_workspace/"): # Make sure the working directory exists os.makedirs(working_dir, exist_ok=True) # Generate a configspace from the given parameters config_space = generate_configspace(parameters) # Start a local nameserver for communication NS = hpns.NameServer(run_id=_runid, nic_name="lo", working_directory=working_dir) ns_host, ns_port = NS.start() # Define the worker worker = WrapWorker(model_class, train_and_validate_fn, working_directory=working_dir, nameserver=ns_host, nameserver_port=ns_port, run_id=_runid) worker.run(background=True) # Enable live logging so a run can be canceled at any time and we can still recover the results result_logger = json_result_logger(directory=working_dir, overwrite=True) # Optimization bohb = BOHB(configspace=config_space, working_directory=working_dir, run_id=_runid, eta=2, min_budget=min_budget, max_budget=1, host=ns_host, nameserver=ns_host, nameserver_port=ns_port, ping_interval=3600, result_logger=result_logger) res = bohb.run(n_iterations=num_iterations) # Clean up bohb.shutdown(shutdown_workers=True) NS.shutdown() # Best found config run_results = hpres.logged_results_to_HB_result(working_dir) id2conf = run_results.get_id2config_mapping() incumbent_id = run_results.get_incumbent_id() incumbent_config = id2conf[incumbent_id]['config'] incumbent_runs = run_results.get_runs_by_id(incumbent_id) val_loss = incumbent_runs[-1].loss return val_loss, incumbent_config
def run_bohb(runtime, b, cs): min_budget = 4 max_budget = 108 hb_run_id = '0' NS = hpns.NameServer(run_id=hb_run_id, host='localhost', port=0) ns_host, ns_port = NS.start() num_workers = 1 workers = [] for i in range(num_workers): w = MyWorker(b=b, run_id=hb_run_id, id=i, nameserver=ns_host, nameserver_port=ns_port) w.run(background=True) workers.append(w) bohb = BOHB(configspace=cs, run_id=hb_run_id, min_budget=min_budget, max_budget=max_budget, nameserver=ns_host, nameserver_port=ns_port, ping_interval=10, min_bandwidth=0.3) n_iters = 300 results = bohb.run(n_iters, min_n_workers=num_workers) bohb.shutdown(shutdown_workers=True) NS.shutdown()
def optimize(cfg): logger = logging.getLogger(__name__) NS = hpns.NameServer(run_id=cfg.bohb.run_id, host=cfg.bohb.nameserver, port=None) NS.start() w = SAC_Worker(cfg.worker, nameserver=cfg.bohb.nameserver, run_id=cfg.bohb.run_id) w.run(background=True) bohb = BOHB( configspace = w.get_configspace(), run_id = cfg.bohb.run_id, nameserver=cfg.bohb.nameserver, min_budget=cfg.bohb.min_budget, max_budget=cfg.bohb.max_budget ) res = bohb.run(n_iterations=cfg.bohb.n_iterations) bohb.shutdown(shutdown_workers=True) NS.shutdown() id2config = res.get_id2config_mapping() incumbent = res.get_incumbent_id() # Store optimization results if not os.path.exists("optimization_results/"): os.makedirs("optimization_results/") with open(os.path.join("optimization_results/", "%s.pkl" % cfg.bohb.run_id), 'wb') as fh: pickle.dump(res, fh) logger.info('Best found configuration: %s' % id2config[incumbent]['config']) logger.info('A total of %i unique configurations where sampled.' % len(id2config.keys())) logger.info('A total of %i runs where executed.' % len(res.get_all_runs())) logger.info('Total budget corresponds to %.1f full function evaluations.'%(sum([r.budget for r in res.get_all_runs()])/cfg.bohb.max_budget))
def run_bohb(exp_name, log_dir='EXP', iterations=20): run_dir = 'bohb-{}-{}'.format(log_dir, exp_name) if not os.path.exists(run_dir): utils.create_exp_dir(run_dir, scripts_to_save=glob.glob('*.py')) # log_format = '%(asctime)s %(message)s' # logging.basicConfig(stream=sys.stdout, level=logging.INFO, # format=log_format, datefmt='%m/%d %I:%M:%S %p') # fh = logging.FileHandler(os.path.join(run_dir, 'log.txt')) # fh.setFormatter(logging.Formatter(log_format)) # logging.getLogger().addHandler(fh) result_logger = hpres.json_result_logger(directory=run_dir, overwrite=True) # Start a nameserver NS = hpns.NameServer(run_id=exp_name, host='127.0.0.1', port=0) ns_host, ns_port = NS.start() # Start a localserver worker = TorchWorker(run_id=exp_name, host='127.0.0.1', nameserver=ns_host, nameserver_port=ns_port, timeout=120, run_dir=run_dir) worker.run(background=True) # Initialise optimiser bohb = BOHB(configspace=worker.get_configspace(), run_id=exp_name, host='127.0.0.1', nameserver=ns_host, nameserver_port=ns_port, result_logger=result_logger, min_budget=2, max_budget=5, ) print('Worker running') res = bohb.run(n_iterations=iterations) # Store the results with open(os.path.join(run_dir, 'result.pkl'), 'wb') as file: pickle.dump(res, file) # Shutdown bohb.shutdown(shutdown_workers=True) NS.shutdown() # get all runs all_runs = res.get_all_runs() # get id to configuration mapping as dictionary id2conf = res.get_id2config_mapping() # get best/incubent run best_run = res.get_incumbent_id() best_config = id2conf[best_run]['config'] print(f"Best run id:{best_run}, \n Config:{best_config}") # Store all run info file = open(os.path.join(run_dir, 'summary.txt'), 'w') file.write(f"{all_runs}") file.close()
def test_local_nameserver_2(self): host, port = utils.start_local_nameserver(host=None, nic_name='lo') self.assertEqual(host, '127.0.0.1') ns = hpn.NameServer('0', host=host) ns_host, ns_port = ns.start() self.assertEqual(ns.host, '127.0.0.1') ns.shutdown()
def main(xargs, api): torch.set_num_threads(4) prepare_seed(xargs.rand_seed) logger = prepare_logger(args) logger.log('{:} use api : {:}'.format(time_string(), api)) api.reset_time() search_space = get_search_spaces(xargs.search_space, 'nats-bench') if xargs.search_space == 'tss': cs = get_topology_config_space(search_space) config2structure = config2topology_func() else: cs = get_size_config_space(search_space) config2structure = config2size_func(search_space) hb_run_id = '0' NS = hpns.NameServer(run_id=hb_run_id, host='localhost', port=0) ns_host, ns_port = NS.start() num_workers = 1 workers = [] for i in range(num_workers): w = MyWorker(nameserver=ns_host, nameserver_port=ns_port, convert_func=config2structure, dataset=xargs.dataset, api=api, run_id=hb_run_id, id=i) w.run(background=True) workers.append(w) start_time = time.time() bohb = BOHB(configspace=cs, run_id=hb_run_id, eta=3, min_budget=1, max_budget=12, nameserver=ns_host, nameserver_port=ns_port, num_samples=xargs.num_samples, random_fraction=xargs.random_fraction, bandwidth_factor=xargs.bandwidth_factor, ping_interval=10, min_bandwidth=xargs.min_bandwidth) results = bohb.run(xargs.n_iters, min_n_workers=num_workers) bohb.shutdown(shutdown_workers=True) NS.shutdown() # print('There are {:} runs.'.format(len(results.get_all_runs()))) # workers[0].total_times # workers[0].trajectory current_best_index = [] for idx in range(len(workers[0].trajectory)): trajectory = workers[0].trajectory[:idx+1] arch = max(trajectory, key=lambda x: x[0])[1] current_best_index.append(api.query_index_by_arch(arch)) best_arch = max(workers[0].trajectory, key=lambda x: x[0])[1] logger.log('Best found configuration: {:} within {:.3f} s'.format(best_arch, workers[0].total_times[-1])) info = api.query_info_str_by_arch(best_arch, '200' if xargs.search_space == 'tss' else '90') logger.log('{:}'.format(info)) logger.log('-'*100) logger.close() return logger.log_dir, current_best_index, workers[0].total_times
def start_hpbandster_process(method, configspace, loss, total_budget, max_budget_per_config, eta=3): """ Starts a server and a worker object needed for the HpBandSter optimization process :param method: (str) Specifies if we use BOHB or hyperband :param configspace: Hyper-parameter search space :param loss: Loss function to minimize :param total_budget: Total budget (in number of epochs) allowed for optimization :param max_budget_per_config: Maximal number of epochs allowed for one config :param eta: split size between every steps of successful halving :return: NameServer and HpBandSter optimizer """ # Start a nameserver: NS = hpns.NameServer(run_id=method) ns_host, ns_port = NS.start() # Start local worker w = MyWorker(run_id=method, nameserver=ns_host, nameserver_port=ns_port, timeout=120, loss_function=loss) w.run(background=True) if method == 'BOHB': optimizer = BOHB( configspace=configspace, run_id=method, nameserver=ns_host, nameserver_port=ns_port, min_budget=1, max_budget=max_budget_per_config, ) else: optimizer = HyperBand( configspace=configspace, run_id=method, nameserver=ns_host, nameserver_port=ns_port, min_budget=1, max_budget=max_budget_per_config, ) # We compute the maximal number of iteration to be exact with the original paper # (We divide the total budget by the fixed budget per successful halving iteration : (Smax+1)*bmax) max_iter = total_budget / ( int(-1 * (log(1 / max_budget_per_config)) / log(eta) + 1) * max_budget_per_config) return NS, max_iter, optimizer
def start(self): # type: () -> () """ Start the Optimizer controller function loop() If the calling process is stopped, the controller will stop as well. .. important:: This function returns only after optimization is completed or :meth:`stop` was called. """ # Step 1: Start a NameServer fake_run_id = 'OptimizerBOHB_{}'.format(time()) # default port is 9090, we must have one, this is how BOHB workers communicate (even locally) self._namespace = hpns.NameServer(run_id=fake_run_id, host='127.0.0.1', port=self._nameserver_port) self._namespace.start() # we have to scale the budget to the iterations per job, otherwise numbers might be too high budget_iteration_scale = self._max_iteration_per_job # Step 2: Start the workers workers = [] for i in range(self._num_concurrent_workers): w = _TrainsBandsterWorker( optimizer=self, sleep_interval=int(self.pool_period_minutes * 60), budget_iteration_scale=budget_iteration_scale, base_task_id=self._base_task_id, objective=self._objective_metric, queue_name=self._execution_queue, nameserver='127.0.0.1', nameserver_port=self._nameserver_port, run_id=fake_run_id, id=i) w.run(background=True) workers.append(w) # Step 3: Run an optimizer self._bohb = BOHB( configspace=self._convert_hyper_parameters_to_cs(), run_id=fake_run_id, # num_samples=self.total_max_jobs, # will be set by self._bohb_kwargs min_budget=float(self._min_iteration_per_job) / float(self._max_iteration_per_job), **self._bohb_kwargs) # scale the budget according to the successive halving iterations if self.budget.jobs.limit: self.budget.jobs.limit *= len(self._bohb.budgets) if self.budget.iterations.limit: self.budget.iterations.limit *= len(self._bohb.budgets) # start optimization self._res = self._bohb.run(n_iterations=self.total_max_jobs, min_n_workers=self._num_concurrent_workers) # Step 4: if we get here, Shutdown self.stop()
def run_bohb_parallel(id, run_id, bohb_workers): # get bohb params bohb_params = get_bohb_parameters() # get suitable interface (eth0 or lo) bohb_interface = get_bohb_interface() # get BOHB log directory working_dir = get_working_dir(run_id) # every process has to lookup the hostname host = hpns.nic_name_to_host(bohb_interface) os.makedirs(working_dir, exist_ok=True) if int(id) > 0: print('START NEW WORKER') time.sleep(10) w = BohbWorker(host=host, run_id=run_id, working_dir=working_dir) w.load_nameserver_credentials(working_directory=working_dir) w.run(background=False) exit(0) print('START NEW MASTER') ns = hpns.NameServer(run_id=run_id, host=host, port=0, working_directory=working_dir) ns_host, ns_port = ns.start() w = BohbWorker(host=host, nameserver=ns_host, nameserver_port=ns_port, run_id=run_id, working_dir=working_dir) w.run(background=True) result_logger = hpres.json_result_logger(directory=working_dir, overwrite=True) bohb = BohbWrapper(configspace=get_configspace(), run_id=run_id, eta=bohb_params['eta'], host=host, nameserver=ns_host, nameserver_port=ns_port, min_budget=bohb_params['min_budget'], max_budget=bohb_params['max_budget'], result_logger=result_logger) res = bohb.run(n_iterations=bohb_params['iterations'], min_n_workers=int(bohb_workers)) bohb.shutdown(shutdown_workers=True) ns.shutdown() return res
def robust_start(id_str, p=None): p = get_port(p) try: NS = hpns.NameServer(run_id=id_str, host='127.0.0.1', port=p) NS.start() logging.info("Starting nameserver on port {}".format(p)) return NS, p except Exception as e: logging.info("Exception CAUGHT: {}".format(e)) logging.info("Re-attempting to start nameserver") return robust_start(id_str, p)
def main(benchmark_name, dataset_name, dimensions, method_name, num_runs, run_start, num_iterations, eta, min_budget, max_budget, input_dir, output_dir): benchmark = make_benchmark(benchmark_name, dimensions=dimensions, dataset_name=dataset_name, input_dir=input_dir) name = make_name(benchmark_name, dimensions=dimensions, dataset_name=dataset_name) output_path = Path(output_dir).joinpath(name, method_name) output_path.mkdir(parents=True, exist_ok=True) options = dict(eta=eta, min_budget=min_budget, max_budget=max_budget) with output_path.joinpath("options.yaml").open('w') as f: yaml.dump(options, f) for run_id in range(run_start, num_runs): NS = hpns.NameServer(run_id=run_id, host='localhost', port=0) ns_host, ns_port = NS.start() num_workers = 1 workers = [] for worker_id in range(num_workers): w = BenchmarkWorker(benchmark=benchmark, nameserver=ns_host, nameserver_port=ns_port, run_id=run_id, id=worker_id) w.run(background=True) workers.append(w) rs = RandomSearch(configspace=benchmark.get_config_space(), run_id=run_id, nameserver=ns_host, nameserver_port=ns_port, ping_interval=10, **options) results = rs.run(num_iterations, min_n_workers=num_workers) rs.shutdown(shutdown_workers=True) NS.shutdown() data = HpBandSterLogs(results).to_frame() data.to_csv(output_path.joinpath(f"{run_id:03d}.csv")) return 0
def test_Timeout(self): host = hpn.nic_name_to_host('lo') with tempfile.TemporaryDirectory() as working_directory: # start up nameserver ns = hpn.NameServer(self.run_id, working_directory=working_directory, host=host) ns_host, ns_port = ns.start() # create workers and connect them to the nameserver workers = [] for i in range(3): w = TestWorker(run_id=self.run_id, sleep_duration=2, timeout=1, host=host, id=i) w.load_nameserver_credentials(working_directory) w.run(background=True) workers.append(w) # at this point all workers must still be alive alive = [w.thread.is_alive() for w in workers] self.assertTrue(all(alive)) opt = HyperBand(run_id=self.run_id, configspace=self.configspace, nameserver=ns_host, nameserver_port=ns_port, min_budget=1, max_budget=3, eta=3, ping_interval=1) opt.run(1, min_n_workers=3) # only one worker should be alive when the run is done alive = [w.thread.is_alive() for w in workers] self.assertEqual(1, sum(alive)) opt.shutdown() time.sleep(2) # at this point all workers should have finished alive = [w.thread.is_alive() for w in workers] self.assertFalse(any(alive)) # shutdown the nameserver before the temporary directory is gone ns.shutdown()
def get_parameters(train_data, kFold, iterations, save=False, filepath = './result/loss_time_bohb.csv'): parser = argparse.ArgumentParser(description='Example 1 - sequential and local execution.') parser.add_argument('--min_budget', type=float, help='Minimum budget used during the optimization.', default=1) parser.add_argument('--max_budget', type=float, help='Maximum budget used during the optimization.', default=1) parser.add_argument('--n_iterations', type=int, help='Number of iterations performed by the optimizer', default=iterations) # max value = 4 # parser.add_argument('--worker', help='Flag to turn this into a worker process', action='store_true') parser.add_argument('--shared_directory', type=str,help='A directory that is accessible for all processes, e.g. a NFS share.', default='./result') # parser.add_argument('--nic_name', type=str, default='lo') args = parser.parse_args() result_logger = hpres.json_result_logger(directory=args.shared_directory, overwrite=True) NS = hpns.NameServer(run_id='BOHB', host='127.0.0.1', port=None) NS.start() w = worker(train_data, kFold, nameserver='127.0.0.1', run_id='BOHB') w.run(background=True) bohb = BOHB(configspace=w.get_configspace(), run_id='BOHB', nameserver='127.0.0.1', min_budget=args.min_budget, max_budget=args.max_budget, result_logger=result_logger ) res = bohb.run(n_iterations=args.n_iterations) bohb.shutdown(shutdown_workers=True) NS.shutdown() id2config = res.get_id2config_mapping() incumbent = res.get_incumbent_id() info = res.get_runs_by_id(incumbent) parameter = id2config[incumbent]['config'] min_error = info[0]['loss'] if save: all_info = res.get_all_runs() timepoint_dic = [] loss_dic = [] for i in all_info: timepoint_dic.append(i['time_stamps']['finished']) loss_dic.append(i['loss']) save_to_csv.save(filepath, timepoint_dic, loss_dic) return parameter, min_error
def __init__( self, config_space: ConfigSpace, model: AbstractModel, x, y=None, max_iterations: int = 50, min_budget: Optional[int] = None, max_budget: Optional[int] = None, eta: int = 2, validation_split: float = 0.1, validation_split_shuffle: bool = True, run_id: str = None, nameserver_ip: str = '127.0.0.1', nameserver_port: Optional[int] = None, worker_kwargs: Optional[Dict] = None, optimizer_kwargs: Optional[Dict] = None, ): super().__init__(config_space, model, x, y, max_iterations, min_budget, max_budget, eta, validation_split, validation_split_shuffle, run_id) self.worker_kwargs = worker_kwargs if worker_kwargs is not None else {} self.optimizer_kwargs = optimizer_kwargs if optimizer_kwargs is not None else {} self.nameserver_ip = nameserver_ip self.nameserver_port = nameserver_port self.nameserver = hpns.NameServer(run_id=self.run_id, host=self.nameserver_ip, port=self.nameserver_port) self.nameserver.start() self.worker = ModelWorker(**({ 'model': self.model, 'evaluation_result_aggregator': self.evaluation_result_aggregator, 'x_train': self.x_train, 'y_train': self.y_train, 'x_valid': self.x_valid, 'y_valid': self.y_valid, 'run_id': self.run_id, 'nameserver': self.nameserver_ip, 'nameserver_port': self.nameserver_port, **self.worker_kwargs, })) self.worker.run(background=True) self.optimizer = self.create_optimizer()
def generate_bohb_data(): import warnings import hpbandster.core.nameserver as hpns import hpbandster.core.result as hpres from hpbandster.optimizers import BOHB as BOHB run_id = '0' # Every run has to have a unique (at runtime) id. NS = hpns.NameServer(run_id=run_id, host='localhost', port=0) ns_host, ns_port = NS.start() from neural_opt import MyWorker, get_configspace w = MyWorker( nameserver=ns_host, nameserver_port=ns_port, run_id=run_id, # same as nameserver's ) w.run(background=True) # Log the optimization results for later analysis result_logger = hpres.json_result_logger( directory='test/general_example/results/bohb_full_configspace', overwrite=True) bohb = BOHB( configspace=get_configspace(), run_id=run_id, # same as nameserver's eta=2, min_budget=5, max_budget=100, # Hyperband parameters nameserver=ns_host, nameserver_port=ns_port, result_logger=result_logger, ) # Then start the optimizer. The n_iterations parameter specifies # the number of iterations to be performed in this run with warnings.catch_warnings(): warnings.simplefilter("ignore") res = bohb.run(n_iterations=2) # After the run is finished, the services started above need to be shutdown. # This ensures that the worker, the nameserver and the master all properly exit # and no (daemon) threads keep running afterwards. # In particular we shutdown the optimizer (which shuts down all workers) and the nameserver. bohb.shutdown(shutdown_workers=True) NS.shutdown()
def run_bohb_serial(run_id, experiment_wrapper): # get bohb parameters bohb_params = experiment_wrapper.get_bohb_parameters() # get BOHB log directory working_dir = get_working_dir(run_id) # assign random port in the 30000-40000 range to avoid using a blocked port because of a previous improper bohb shutdown port = int(30000 + random.random() * 10000) ns = hpns.NameServer(run_id=run_id, host="127.0.0.1", port=port) ns.start() w = BohbWorker(nameserver="127.0.0.1", id=0, run_id=run_id, nameserver_port=port, working_dir=working_dir, experiment_wrapper = experiment_wrapper) w.run(background=True) result_logger = hpres.json_result_logger(directory=working_dir, overwrite=True) bohb = BohbWrapper( configspace=experiment_wrapper.get_configspace(), run_id=run_id, eta=bohb_params['eta'], min_budget=bohb_params['min_budget'], max_budget=bohb_params['max_budget'], random_fraction=bohb_params['random_fraction'], nameserver="127.0.0.1", nameserver_port=port, result_logger=result_logger) res = bohb.run(n_iterations=bohb_params['iterations']) bohb.shutdown(shutdown_workers=True) ns.shutdown() return res
def model_select_bow_vae(args): dd = datetime.datetime.now() id_str = dd.strftime("%Y-%m-%d_%H-%M-%S") ns_port = get_port() worker, log_dir = get_worker(args, args.budget, id_str, ns_port) worker.search_mode = True result_logger = hpres.json_result_logger(directory=log_dir, overwrite=True) logging.info("Starting nameserver on port {}".format(ns_port)) NS = hpns.NameServer(run_id=id_str, host='127.0.0.1', port=ns_port) NS.start() res = select_model(worker, args.config_space, args.iterations, result_logger, id_str, ns_port) id2config = res.get_id2config_mapping() incumbent = res.get_incumbent_id() logging.info('Best found configuration:', id2config[incumbent]['config']) logging.info( 'Total budget corresponds to %.1f full function evaluations.' % (sum([r.budget for r in res.get_all_runs()]) / 32)) inc_runs = res.get_runs_by_id(incumbent) inc_run = inc_runs[-1] inc_loss = inc_run.loss inc_config = id2config[incumbent]['config'] logging.info("Best configuration loss = {}".format(inc_loss)) logging.info("Best configuration {}".format(inc_config)) with open(os.path.join(log_dir, 'model_selection_results.pkl'), 'wb') as fh: pickle.dump(res, fh) with open(os.path.join(log_dir, 'best.model.config'), 'w') as fp: inc_config['training_epochs'] = args.budget specs = json.dumps(inc_config) fp.write(specs) worker.retrain_best_config(inc_config, inc_run.budget, args.seed, args.num_final_evals) dd_finish = datetime.datetime.now() logging.info("Model selection run FINISHED. Time: {}".format(dd_finish - dd)) NS.shutdown()
def runBohbSerial(run_id): # get BOHB log directory working_dir = get_working_dir(run_id) # assign random port in the 30000-40000 range to avoid using a blocked port because of a previous improper bohb shutdown port = int(30000 + random.random() * 10000) # select whether to process NLP or speech datasets use_nlp = 'NLP' in run_id ns = hpns.NameServer(run_id=run_id, host="127.0.0.1", port=port) ns.start() w = BOHBWorker(nameserver="127.0.0.1", run_id=run_id, nameserver_port=port, working_dir=working_dir, use_nlp=use_nlp) w.run(background=True) result_logger = hpres.json_result_logger(directory=working_dir, overwrite=True) bohb = BohbWrapper(configspace=get_configspace(use_nlp), run_id=run_id, eta=BOHB_ETA, min_budget=BOHB_MIN_BUDGET, max_budget=BOHB_MIN_BUDGET, nameserver="127.0.0.1", nameserver_port=port, result_logger=result_logger) res = bohb.run(n_iterations=BOHB_ITERATIONS) bohb.shutdown(shutdown_workers=True) ns.shutdown() return res
def test_optimizers(self): optimizers = [BOHB, H2BO, RandomSearch] for optimizer in optimizers: host = hpn.nic_name_to_host('lo') with tempfile.TemporaryDirectory() as working_directory: # start up nameserver ns = hpn.NameServer(self.run_id, working_directory=working_directory, host=host) ns_host, ns_port = ns.start() # create workers and connect them to the nameserver w = TestWorker(run_id=self.run_id, sleep_duration=2, timeout=1, host=host, id=1) w.load_nameserver_credentials(working_directory) w.run(background=True) opt = optimizer(run_id=self.run_id, configspace=self.configspace, nameserver=ns_host, nameserver_port=ns_port, min_budget=1, max_budget=3, eta=3, ping_interval=1) opt.run(1, min_n_workers=1) opt.shutdown() time.sleep(2) # shutdown the nameserver before the temporary directory is gone ns.shutdown()
def test_Timeout(self): class dummy_callback(object): def register_result(self, *args, **kwargs): pass host = hpn.nic_name_to_host('lo') w = TestWorker(run_id=self.run_id, sleep_duration=0, timeout=1, host=host) dc = dummy_callback() with tempfile.TemporaryDirectory() as working_directory: # start up nameserver ns = hpn.NameServer(self.run_id, working_directory=working_directory, host=host) ns_host, ns_port = ns.start() # connect worker to it w.load_nameserver_credentials(working_directory) w.run(background=True) # start a computation with a dummy callback and dummy id w.start_computation(dc, '0') # at this point the worker must still be alive self.assertTrue(w.thread.is_alive()) # as the timeout is only 1, after 2 seconds, the worker thread should be dead time.sleep(2) self.assertFalse(w.thread.is_alive()) # shutdown the nameserver before the temporary directory is gone ns.shutdown()
return cs parser = argparse.ArgumentParser(description='Example 1 - sequential and local execution.') parser.add_argument('--budget', type=float, help='Maximum budget used during the optimization, i.e the number of epochs.', default=6) parser.add_argument('--n_iterations', type=int, help='Number of iterations performed by the optimizer', default=50) args = parser.parse_args() # Step 1: Start a nameserver # Every run needs a nameserver. It could be a 'static' server with a # permanent address, but here it will be started for the local machine with the default port. # The nameserver manages the concurrent running workers across all possible threads or clusternodes. # Note the run_id argument. This uniquely identifies a run of any HpBandSter optimizer. NS = hpns.NameServer(run_id='example1', host='127.0.0.1', port=None) NS.start() # Step 2: Start a worker # Now we can instantiate a worker, providing the mandatory information # Besides the sleep_interval, we need to define the nameserver information and # the same run_id as above. After that, we can start the worker in the background, # where it will wait for incoming configurations to evaluate. w = MyWorker(nameserver='127.0.0.1', run_id='example1') w.run(background=True) # Step 3: Run an optimizer # Now we can create an optimizer object and start the run. # Here, we run RandomSearch, but that is not essential. # The run method will return the `Result` that contains all runs performed.
def main(xargs, nas_bench): assert torch.cuda.is_available(), "CUDA is not available." torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True torch.set_num_threads(xargs.workers) prepare_seed(xargs.rand_seed) logger = prepare_logger(args) if xargs.dataset == "cifar10": dataname = "cifar10-valid" else: dataname = xargs.dataset if xargs.data_path is not None: train_data, valid_data, xshape, class_num = get_datasets( xargs.dataset, xargs.data_path, -1) split_Fpath = "configs/nas-benchmark/cifar-split.txt" cifar_split = load_config(split_Fpath, None, None) train_split, valid_split = cifar_split.train, cifar_split.valid logger.log("Load split file from {:}".format(split_Fpath)) config_path = "configs/nas-benchmark/algos/R-EA.config" config = load_config(config_path, { "class_num": class_num, "xshape": xshape }, logger) # To split data train_data_v2 = deepcopy(train_data) train_data_v2.transform = valid_data.transform valid_data = train_data_v2 search_data = SearchDataset(xargs.dataset, train_data, train_split, valid_split) # data loader train_loader = torch.utils.data.DataLoader( train_data, batch_size=config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(train_split), num_workers=xargs.workers, pin_memory=True, ) valid_loader = torch.utils.data.DataLoader( valid_data, batch_size=config.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(valid_split), num_workers=xargs.workers, pin_memory=True, ) logger.log( "||||||| {:10s} ||||||| Train-Loader-Num={:}, Valid-Loader-Num={:}, batch size={:}" .format(xargs.dataset, len(train_loader), len(valid_loader), config.batch_size)) logger.log("||||||| {:10s} ||||||| Config={:}".format( xargs.dataset, config)) extra_info = { "config": config, "train_loader": train_loader, "valid_loader": valid_loader, } else: config_path = "configs/nas-benchmark/algos/R-EA.config" config = load_config(config_path, None, logger) logger.log("||||||| {:10s} ||||||| Config={:}".format( xargs.dataset, config)) extra_info = { "config": config, "train_loader": None, "valid_loader": None } # nas dataset load assert xargs.arch_nas_dataset is not None and os.path.isfile( xargs.arch_nas_dataset) search_space = get_search_spaces("cell", xargs.search_space_name) cs = get_configuration_space(xargs.max_nodes, search_space) config2structure = config2structure_func(xargs.max_nodes) hb_run_id = "0" NS = hpns.NameServer(run_id=hb_run_id, host="localhost", port=0) ns_host, ns_port = NS.start() num_workers = 1 # nas_bench = AANASBenchAPI(xargs.arch_nas_dataset) # logger.log('{:} Create NAS-BENCH-API DONE'.format(time_string())) workers = [] for i in range(num_workers): w = MyWorker( nameserver=ns_host, nameserver_port=ns_port, convert_func=config2structure, dataname=dataname, nas_bench=nas_bench, time_budget=xargs.time_budget, run_id=hb_run_id, id=i, ) w.run(background=True) workers.append(w) start_time = time.time() bohb = BOHB( configspace=cs, run_id=hb_run_id, eta=3, min_budget=12, max_budget=200, nameserver=ns_host, nameserver_port=ns_port, num_samples=xargs.num_samples, random_fraction=xargs.random_fraction, bandwidth_factor=xargs.bandwidth_factor, ping_interval=10, min_bandwidth=xargs.min_bandwidth, ) results = bohb.run(xargs.n_iters, min_n_workers=num_workers) bohb.shutdown(shutdown_workers=True) NS.shutdown() real_cost_time = time.time() - start_time id2config = results.get_id2config_mapping() incumbent = results.get_incumbent_id() logger.log("Best found configuration: {:} within {:.3f} s".format( id2config[incumbent]["config"], real_cost_time)) best_arch = config2structure(id2config[incumbent]["config"]) info = nas_bench.query_by_arch(best_arch, "200") if info is None: logger.log("Did not find this architecture : {:}.".format(best_arch)) else: logger.log("{:}".format(info)) logger.log("-" * 100) logger.log("workers : {:.1f}s with {:} archs".format( workers[0].time_budget, len(workers[0].seen_archs))) logger.close() return logger.log_dir, nas_bench.query_index_by_arch( best_arch), real_cost_time
config_space = CS.ConfigurationSpace() config_space.add_hyperparameter(CS.UniformFloatHyperparameter('x', lower=0, upper=1)) # Every run has to have a unique (at runtime) id. # This needs to be unique for concurrent runs, i.e. when multiple # instances run at the same time, they have to have different ids run_id = '0' # Step 1: # Every run needs a nameserver. It could be a 'static' server with a # permanent address, but here it will be started for the local machine # with a random port. # The nameserver manages the concurrent running workers across all possible threads or clusternodes. NS = hpns.NameServer(run_id=run_id, host='localhost', port=0) ns_host, ns_port = NS.start() # Step 2: # The worker implements the connection to the model to be evaluated. # Its 'compute'-method will be called later by the BOHB-optimizer repeatedly # with the sampled configurations and return for example the computed loss. # Further usages of the worker will be covered in a later example. worker = MyWorker(nameserver=ns_host, nameserver_port=ns_port, run_id=run_id) worker.run(background=True) # We will start the first run with a smaller budget, which we define here. # In the second run, we'll use three times as much. min_budget = 9
# Every process has to lookup the hostname host = hpns.nic_name_to_host(args.nic_name) if host_name == "synaptomes1": prefix = "syn1" else: prefix = "test_run" outputName = f"{prefix}_openml_d_{str(args.openml_dataid)}" # Step 1: Start a nameserver (see example_1) # Start a nameserver: # We now start the nameserver with the host name from above and a random open port (by setting the port to 0) NS = hpns.NameServer(run_id=args.run_id, host=host, port=0, working_directory=args.shared_directory) ns_host, ns_port = NS.start() # Step 2: workers # Most optimizers are so computationally inexpensive that we can affort to run a # worker in parallel to it. Note that this one has to run in the background to # not plock! w = MyWorker(sleep_interval = 0.5, run_id=args.run_id, host=host,\ nameserver=ns_host, nameserver_port=ns_port, n_jobs = args.n_jobs,\ dataID = args.openml_dataid) w.run(background=True) #if True: # time.sleep(5) # short artificial delay to make sure the nameserver is already running # w = MyWorker(sleep_interval = 0.5,run_id=args.run_id, host=host, dataID = args.openml_dataid)
def run_experiment(out_path, on_travis): settings = { 'min_budget': 1, 'max_budget': 9, # number of repetitions; this is the fidelity for this bench 'num_iterations': 10, # Set this to a low number for demonstration 'eta': 3, 'output_dir': Path(out_path) } if on_travis: settings.update(get_travis_settings('bohb')) b = Benchmark(rng=1) b.get_configuration_space(seed=1) settings.get('output_dir').mkdir(exist_ok=True) cs = b.get_configuration_space() seed = get_rng(rng=0) run_id = 'BOHB_on_cartpole' result_logger = hpres.json_result_logger(directory=str( settings.get('output_dir')), overwrite=True) ns = hpns.NameServer(run_id=run_id, host='localhost', working_directory=str(settings.get('output_dir'))) ns_host, ns_port = ns.start() worker = CustomWorker(seed=seed, nameserver=ns_host, nameserver_port=ns_port, run_id=run_id, max_budget=settings.get('max_budget')) worker.run(background=True) master = BOHB(configspace=cs, run_id=run_id, host=ns_host, nameserver=ns_host, nameserver_port=ns_port, eta=settings.get('eta'), min_budget=settings.get('min_budget'), max_budget=settings.get('max_budget'), result_logger=result_logger) result = master.run(n_iterations=settings.get('num_iterations')) master.shutdown(shutdown_workers=True) ns.shutdown() with open(settings.get('output_dir') / 'results.pkl', 'wb') as f: pickle.dump(result, f) id2config = result.get_id2config_mapping() incumbent = result.get_incumbent_id() inc_value = result.get_runs_by_id(incumbent)[-1]['loss'] inc_cfg = id2config[incumbent]['config'] logger.info(f'Inc Config:\n{inc_cfg}\n' f'with Performance: {inc_value:.2f}') if not on_travis: benchmark = Benchmark(container_source='library://phmueller/automl') incumbent_result = benchmark.objective_function_test( configuration=inc_cfg, fidelity={"budget": settings['max_budget']}) print(incumbent_result)
def run_optimization(args): """Runs the optimization process.""" print("Starting name server.") date_time = datetime.datetime.now().strftime('%Y-%m-%d-%H_%M_%S') # First start nameserver NS = hpns.NameServer(run_id=date_time, host='127.0.0.1', port=None) NS.start() print("Preparing result logger and loading previous run, if it exists.") # Also start result logger output_dir = os.path.join( args.output_dir, datetime.datetime.now().strftime('%Y_%m_%d--%H_%M_%S')) result_logger_path = os.path.join(output_dir, 'results_log.json') best_result_path = os.path.join(output_dir, 'best_config.txt') print("Result logger will be written to %s" % result_logger_path) if os.path.exists(result_logger_path): previous_run = hpres.logged_results_to_HBS_result(result_logger_path) else: previous_run = None result_logger = hpres.json_result_logger(directory=output_dir, overwrite=True) print("Starting search worker.\n") # Then start worker w = SearchWorker(args.data_path, iaa, os.path.join(output_dir, "logging"), nameserver='127.0.0.1', run_id=date_time) w.run(background=True) print("Initializing optimizer.") # Run the optimizer bohb = BOHB(configspace=w.get_configspace(), run_id=date_time, nameserver='127.0.0.1', result_logger=result_logger, min_budget=args.min_budget, max_budget=args.max_budget, previous_result=previous_run) print("Initialization complete. Starting optimization run.") res = bohb.run(n_iterations=args.iterations) print("Optimization complete.") output_fp = os.path.join(output_dir, 'results.pkl') id2config = res.get_id2config_mapping() incumbent = res.get_incumbent_id() print("Results will be saved at:\n{}".format(output_fp)) print("Best found configuration: ", id2config[incumbent]['config']) Slacker.send_message( "AutoML Optimization finished with minimum " "budget {}, maximum budget {}, and {} " "iterations.\n" "Output file has been written in {} \n".format( args.min_budget, args.max_budget, args.iterations, output_dir), "AutoML Optimization Finished!") sleep(2) Slacker.send_code("Best found configuration:", "{}".format(id2config[incumbent]['config'])) with open(best_result_path, mode='w') as file: lines = [ "Best results are as follows:\n", "{}".format(id2config[incumbent]['config']) ] file.writelines(lines) with open(output_fp, mode='wb') as file: pickle.dump(res, file) # Shutdown after completion bohb.shutdown(shutdown_workers=True) NS.shutdown()
def run_experiment(space, num_iterations, nic_name, run_id, work_dir, worker, min_budget, max_budget, eta, dest_dir, store_all_runs=False): """ Runs the optimization algorithm, and sets up a nameserver Parameters: -------- space: hpbandster ConfigSpace object containing the search space to sample from for each hyperparameter for hyperparameter optimization. num_iterations: int number of iterations to run optimization algorithm nic_name: string name of the network interface used for communication. Note: default is only for local execution on *nix! run_id: string A unique identifier of that Hyperband run. Use, for example, the cluster's JobID when running multiple concurrent runs to separate them work_dir: string The top level working directory accessible to all compute nodes(shared filesystem). worker: modified hpbandster Worker object Implements a user specified compute function, see MyWorker class. min_budget: float The smallest budget to consider. Needs to be positive! max_budget: float the largest budget to consider. Needs to be larger than min_budget! The budgets will be geometrically distributed $\sim \eta^k$ for $k\in [0, 1, ... , num_subsets - 1]$. eta: float In each iteration, a complete run of sequential halving is executed. In it, after evaluating each configuration on the same subset size, only a fraction of 1/eta of them 'advances' to the next round. Must be greater or equal to 2. dest_dir: string the destination directory. store_all_runs: bool Specifies whether to store all the results of each run. Returns: -------- A metric used to evaluate the performance of the current configuration. """ # make sure the working and dest directory exist os.makedirs(work_dir, exist_ok=True) os.makedirs(dest_dir, exist_ok=True) # setup a nameserver. Every run needs a nameserver. Here it will be started for the local machine with a random port NS = hpns.NameServer(run_id=run_id, host='localhost', port=0, working_directory=work_dir, nic_name=nic_name) ns_host, ns_port = NS.start() # start worker in the background worker.load_nameserver_credentials(work_dir) worker.run(background=True) print("host: {}".format(ns_host)) print("port: {}".format(ns_port)) BOHB = opt(configspace=space, run_id=run_id, eta=eta, min_budget=min_budget, max_budget=max_budget, nameserver=ns_host, working_directory=dest_dir, host=ns_host, nameserver_port=ns_port, ping_interval=3600, result_logger=None) result = BOHB.run(n_iterations=num_iterations) # shutdown the worker and the dispatcher BOHB.shutdown(shutdown_workers=True) NS.shutdown() with open(os.path.join(dest_dir, '{}_run_{}.pkl'.format(method, run_id)), 'wb') as fh: pickle.dump(extract_results_to_pickle(result), fh) if store_all_runs: with open( os.path.join(dest_dir, '{}_full_run_{}.pkl'.format(method, run_id)), 'wb') as fh: pickle.dump(extract_results_to_pickle(result), fh) # in case one wants to inspect the complete run return (result)
dilation_kernel_length, compression_filter_size, learning_rate ]) return cs # shared_dir = args.shared_dir # results logger result_logger = hpres.json_result_logger(directory=shared_dir, overwrite=False) # initialize name server run_id = args.run_id NS = hpns.NameServer( run_id=run_id, host='127.0.0.1', working_directory=shared_dir, ) ns_host, ns_port = NS.start() # define worker worker = KerasWorker(dataset=args.dataset, use_bg=False, sequence_length=1000, host='127.0.0.1', run_id=run_id, nameserver=ns_host, nameserver_port=ns_port, timeout=120) worker.run(background=True) # define hp optimizer