def run_worker(self, pipeline_config, run_id, task_id, ns_credentials_dir, network_interface_name, X_train, Y_train, X_valid, Y_valid, dataset_info, shutdownables): if not task_id == -1: time.sleep(5) while not os.path.isdir(ns_credentials_dir): time.sleep(5) host = nic_name_to_host(network_interface_name) worker = ModuleWorker( pipeline=self.sub_pipeline, pipeline_config=pipeline_config, X_train=X_train, Y_train=Y_train, X_valid=X_valid, Y_valid=Y_valid, dataset_info=dataset_info, budget_type=self.budget_types[pipeline_config['budget_type']], max_budget=pipeline_config["max_budget"], host=host, run_id=run_id, id=task_id, shutdownables=shutdownables, use_pynisher=pipeline_config["use_pynisher"]) worker.load_nameserver_credentials(ns_credentials_dir) # run in background if not on cluster worker.run(background=(task_id <= 1))
def main(args): args.run_id = args.job_id or args.experiment_name args.host = hpns.nic_name_to_host(args.nic_name) args.bohb_root_path = str( Path("experiments", args.experiment_group, args.experiment_name)) args.dataset = args.experiment_name # Handle case of budget dictating n_repeat vs. n_repeat directly if args.n_repeat_lower_budget is not None and args.n_repeat_upper_budget is not None: args.n_repeat = None else: args.n_repeat_lower_budget = 1 args.n_repeat_upper_budget = 1 random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) tf.set_random_seed(args.seed) if args.worker: run_worker(args) else: run_master(args)
def run_worker(self, pipeline_config, constant_hyperparameter, run_id, task_id, ns_credentials_dir, network_interface_name, X_train, Y_train, X_valid, Y_valid): if not task_id == -1: time.sleep(5) while not os.path.isdir(ns_credentials_dir): time.sleep(5) host = nic_name_to_host(network_interface_name) worker = ModuleWorkerNoTimeLimit( pipeline=self.sub_pipeline, pipeline_config=pipeline_config, constant_hyperparameter=constant_hyperparameter, X_train=X_train, Y_train=Y_train, X_valid=X_valid, Y_valid=Y_valid, budget_type=pipeline_config['budget_type'], max_budget=pipeline_config["max_budget"], host=host, run_id=run_id, id=task_id, working_directory=pipeline_config["result_logger_dir"], permutations=self.permutations) worker.load_nameserver_credentials(ns_credentials_dir) # run in background if not on cluster worker.run(background=(task_id <= 1))
def run_worker(self, pipeline_config, run_id, task_id, ns_credentials_dir, network_interface_name, X_train, Y_train, X_valid, Y_valid, dataset_info, shutdownables): """ Run the AutoNetWorker Arguments: pipeline_config {dict} -- The configuration of the pipeline run_id {str} -- An id for the run task_id {int} -- An id for the worker ns_credentials_dir {str} -- path to nameserver credentials network_interface_name {str} -- the name of the network interface X_train {array} -- The data Y_train {array} -- The data X_valid {array} -- The data Y_valid {array} -- The data dataset_info {DatasetInfo} -- Object describing the dataset shutdownables {list} -- A list of objects that need to shutdown when the optimization is finished """ if not task_id == -1: time.sleep(5) while not os.path.isdir(ns_credentials_dir): time.sleep(5) host = nic_name_to_host(network_interface_name) worker = AutoNetWorker(pipeline=self.sub_pipeline, pipeline_config=pipeline_config, X_train=X_train, Y_train=Y_train, X_valid=X_valid, Y_valid=Y_valid, dataset_info=dataset_info, budget_type=self.budget_types[pipeline_config['budget_type']], max_budget=pipeline_config["max_budget"], host=host, run_id=run_id, id=task_id, shutdownables=shutdownables, use_pynisher=pipeline_config["use_pynisher"]) worker.load_nameserver_credentials(ns_credentials_dir) # run in background if not on cluster worker.run(background=(task_id <= 1))
def run_bohb_parallel(id, run_id, bohb_workers): # get bohb params bohb_params = get_bohb_parameters() # get suitable interface (eth0 or lo) bohb_interface = get_bohb_interface() # get BOHB log directory working_dir = get_working_dir(run_id) # every process has to lookup the hostname host = hpns.nic_name_to_host(bohb_interface) os.makedirs(working_dir, exist_ok=True) if int(id) > 0: print('START NEW WORKER') time.sleep(10) w = BohbWorker(host=host, run_id=run_id, working_dir=working_dir) w.load_nameserver_credentials(working_directory=working_dir) w.run(background=False) exit(0) print('START NEW MASTER') ns = hpns.NameServer(run_id=run_id, host=host, port=0, working_directory=working_dir) ns_host, ns_port = ns.start() w = BohbWorker(host=host, nameserver=ns_host, nameserver_port=ns_port, run_id=run_id, working_dir=working_dir) w.run(background=True) result_logger = hpres.json_result_logger(directory=working_dir, overwrite=True) bohb = BohbWrapper(configspace=get_configspace(), run_id=run_id, eta=bohb_params['eta'], host=host, nameserver=ns_host, nameserver_port=ns_port, min_budget=bohb_params['min_budget'], max_budget=bohb_params['max_budget'], result_logger=result_logger) res = bohb.run(n_iterations=bohb_params['iterations'], min_n_workers=int(bohb_workers)) bohb.shutdown(shutdown_workers=True) ns.shutdown() return res
def test_Timeout(self): host = hpn.nic_name_to_host('lo') with tempfile.TemporaryDirectory() as working_directory: # start up nameserver ns = hpn.NameServer(self.run_id, working_directory=working_directory, host=host) ns_host, ns_port = ns.start() # create workers and connect them to the nameserver workers = [] for i in range(3): w = TestWorker(run_id=self.run_id, sleep_duration=2, timeout=1, host=host, id=i) w.load_nameserver_credentials(working_directory) w.run(background=True) workers.append(w) # at this point all workers must still be alive alive = [w.thread.is_alive() for w in workers] self.assertTrue(all(alive)) opt = HyperBand(run_id=self.run_id, configspace=self.configspace, nameserver=ns_host, nameserver_port=ns_port, min_budget=1, max_budget=3, eta=3, ping_interval=1) opt.run(1, min_n_workers=3) # only one worker should be alive when the run is done alive = [w.thread.is_alive() for w in workers] self.assertEqual(1, sum(alive)) opt.shutdown() time.sleep(2) # at this point all workers should have finished alive = [w.thread.is_alive() for w in workers] self.assertFalse(any(alive)) # shutdown the nameserver before the temporary directory is gone ns.shutdown()
def fit(self, pipeline_config, result_loggers, shutdownables, refit=False): if refit or pipeline_config["ensemble_size"] == 0: return dict() es_credentials_file = os.path.join(pipeline_config["working_dir"], "es_credentials_%s.json" % pipeline_config["run_id"]) # start server if pipeline_config["task_id"] != 1 or pipeline_config["run_worker_on_master_node"]: host = nic_name_to_host(OptimizationAlgorithm.get_nic_name(pipeline_config)) host, port, process = start_server(host) pipeline_config["ensemble_server_credentials"] = (host, port) shutdownables = shutdownables + [process] result_loggers = [ensemble_logger(directory=pipeline_config["result_logger_dir"], overwrite=True)] + result_loggers return {"result_loggers": result_loggers, "shutdownables": shutdownables}
def test_optimizers(self): optimizers = [BOHB, H2BO, RandomSearch] for optimizer in optimizers: host = hpn.nic_name_to_host('lo') with tempfile.TemporaryDirectory() as working_directory: # start up nameserver ns = hpn.NameServer(self.run_id, working_directory=working_directory, host=host) ns_host, ns_port = ns.start() # create workers and connect them to the nameserver w = TestWorker(run_id=self.run_id, sleep_duration=2, timeout=1, host=host, id=1) w.load_nameserver_credentials(working_directory) w.run(background=True) opt = optimizer(run_id=self.run_id, configspace=self.configspace, nameserver=ns_host, nameserver_port=ns_port, min_budget=1, max_budget=3, eta=3, ping_interval=1) opt.run(1, min_n_workers=1) opt.shutdown() time.sleep(2) # shutdown the nameserver before the temporary directory is gone ns.shutdown()
def test_Timeout(self): class dummy_callback(object): def register_result(self, *args, **kwargs): pass host = hpn.nic_name_to_host('lo') w = TestWorker(run_id=self.run_id, sleep_duration=0, timeout=1, host=host) dc = dummy_callback() with tempfile.TemporaryDirectory() as working_directory: # start up nameserver ns = hpn.NameServer(self.run_id, working_directory=working_directory, host=host) ns_host, ns_port = ns.start() # connect worker to it w.load_nameserver_credentials(working_directory) w.run(background=True) # start a computation with a dummy callback and dummy id w.start_computation(dc, '0') # at this point the worker must still be alive self.assertTrue(w.thread.is_alive()) # as the timeout is only 1, after 2 seconds, the worker thread should be dead time.sleep(2) self.assertFalse(w.thread.is_alive()) # shutdown the nameserver before the temporary directory is gone ns.shutdown()
def runBohbParallel(id, run_id): # get suitable interface (eth0 or lo) bohb_interface = get_bohb_interface() # get BOHB log directory working_dir = get_working_dir(run_id) # select whether to process NLP or speech datasets use_nlp = 'NLP' in run_id # every process has to lookup the hostname host = hpns.nic_name_to_host(bohb_interface) os.makedirs(working_dir, exist_ok=True) if int(id) > 0: print('START NEW WORKER') time.sleep(10) w = BOHBWorker(host=host, run_id=run_id, working_dir=working_dir, use_nlp=use_nlp) w.load_nameserver_credentials(working_directory=working_dir) w.run(background=False) exit(0) print('START NEW MASTER') ns = hpns.NameServer(run_id=run_id, host=host, port=0, working_directory=working_dir) ns_host, ns_port = ns.start() w = BOHBWorker(host=host, nameserver=ns_host, nameserver_port=ns_port, run_id=run_id, working_dir=working_dir, use_nlp=use_nlp) w.run(background=True) result_logger = hpres.json_result_logger(directory=working_dir, overwrite=True) bohb = BohbWrapper(configspace=get_configspace(use_nlp), run_id=run_id, eta=BOHB_ETA, host=host, nameserver=ns_host, nameserver_port=ns_port, min_budget=BOHB_MIN_BUDGET, max_budget=BOHB_MAX_BUDGET, result_logger=result_logger) res = bohb.run(n_iterations=BOHB_ITERATIONS, min_n_workers=BOHB_WORKERS) # res = bohb.run(n_iterations=BOHB_ITERATIONS) bohb.shutdown(shutdown_workers=True) ns.shutdown() return res
def dqn_bohb_wrapper(**params): # Setup directories where live data is logged logdir = params["logdir"] dqn_output_dir = os.path.join(logdir, 'dqn_output') # if not os.path.isdir(dqn_output_dir): # os.makedirs(dqn_output_dir) params["logdir"] = dqn_output_dir bohb_output_dir = os.path.join(logdir, 'bohb_output') # if not os.path.isdir(bohb_output_dir): # os.makedirs(bohb_output_dir) logging.basicConfig(level=logging.INFO) # logging.DEBUG for debug output logger = logging.getLogger() logger.propagate = False # no duplicate logging outputs fh = logging.FileHandler(os.path.join(logdir, 'bohb.log')) fh.setLevel(logging.INFO) fh.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s:%(name)s: %(message)s')) logger.addHandler(fh) # Build configuration space and define all hyperparameters cs = ConfigurationSpace() epsilon = UniformFloatHyperparameter("epsilon", 0.2, 0.9, default_value=0.6) # initial epsilon epsilon_decay = UniformFloatHyperparameter("epsilon_decay", 0.2, 1, default_value=0.995) # decay rate lr = UniformFloatHyperparameter("lr", 0.0005, 0.01, default_value=0.005) units_shared_layer1 = UniformIntegerHyperparameter("units_layer1", 8, 100, default_value=24) units_shared_layer2 = UniformIntegerHyperparameter("units_layer2", 8, 100, default_value=24) units_policy_layer = UniformIntegerHyperparameter("units_layer3", 8, 100, default_value=24) activ_fcn = CategoricalHyperparameter("activ_fcn", ['relu6', 'elu', 'mixed'], default_value='relu6') gamma = UniformFloatHyperparameter("gamma", 0.6, 0.90, default_value=0.80) tau = UniformFloatHyperparameter("tau", 0.5, 1., default_value=0.7) # update_interval = UniformIntegerHyperparameter("update_interval", 1, 300, default_value=50) if params["architecture"] == 'lstm' or (params["architecture"] == 'gru'): trace_length = UniformIntegerHyperparameter("trace_length", 1, 20, default_value=8) # buffer_condition = LessThanCondition(child=trace_length, parent=params["buffer_size"]) # pa["batch_size"] = 5 cs.add_hyperparameters([units_shared_layer1, units_shared_layer2, units_policy_layer, epsilon, epsilon_decay, activ_fcn, lr, gamma, tau, trace_length]) else: params.pop("batch_size") batch_size = UniformIntegerHyperparameter("batch_size", 1, 100, default_value=30) # buffer_condition = LessThanCondition(child=batch_size, parent=params["buffer_size"], value=33) # InCondition(child=batch_size, value=33) cs.add_hyperparameters([units_shared_layer1, units_shared_layer2, units_policy_layer, epsilon, epsilon_decay, activ_fcn, lr, gamma, tau, batch_size]) logger.info('##############################################') logger.info('Run Optimization') logger.info('##############################################') if params["array_id"] == 1: # Setup directories where live data is logged # logdir = params["logdir"] # dqn_output_dir = os.path.join(logdir, 'dqn_output') if not os.path.isdir(dqn_output_dir): os.makedirs(dqn_output_dir) # params["logdir"] = dqn_output_dir # bohb_output_dir = os.path.join(logdir, 'bohb_output') if not os.path.isdir(bohb_output_dir): os.makedirs(bohb_output_dir) # start nameserver NS = hpns.NameServer(run_id=params["instance_id"], nic_name=params["nic_name"], working_directory=bohb_output_dir) ns_host, ns_port = NS.start() # stores information for workers to find in working directory # BOHB is usually so cheap, that we can affort to run a worker on the master node, too. worker = DQNWorker(nameserver=ns_host, nameserver_port=ns_port, run_id=params["instance_id"], **params) worker.run(background=True) # Create scenario object logger.info('##############################################') logger.info('Setup BOHB instance') logger.info('##############################################') logger.info('Output_dir: %s' % bohb_output_dir) HB = BOHB(configspace=cs, run_id=params["instance_id"], eta=3, min_budget=params["min_resource"], max_budget=params["max_resource"], host=ns_host, nameserver=ns_host, nameserver_port=ns_port, ping_interval=3600) res = HB.run(n_iterations=4, min_n_workers=4) # BOHB can wait until a minimum number of workers is online before starting # pickle result here for later analysis with open(os.path.join(bohb_output_dir, 'results.pkl'), 'wb') as f: pickle.dump(res, f) id2config = res.get_id2config_mapping() print('A total of %i unique configurations where sampled.' % len(id2config.keys())) print('A total of %i runs where executed.' % len(res.get_all_runs())) # incumbent_trajectory = res.get_incumbent_trajectory() # import matplotlib.pyplot as plt # plt.plot(incumbent_trajectory['times_finished'], incumbent_trajectory['losses']) # plt.xlabel('wall clock time [s]') # plt.ylabel('incumbent loss') # plt.show() # shutdown all workers HB.shutdown(shutdown_workers=True) # shutdown nameserver NS.shutdown() else: host = hpns.nic_name_to_host(params["nic_name"]) # workers only instantiate the MyWorker, find the nameserver and start serving w = DQNWorker(run_id=params["instance_id"], host=host, **params) w.load_nameserver_credentials(bohb_output_dir) # run worker in the forground, w.run(background=False)
def run_opt(args): # import PyTorchWorker as worker # Every process has to lookup the hostname host = hpns.nic_name_to_host(args.nic_name) result_logger = hpres.json_result_logger(directory=args.shared_directory, overwrite=True) # Start a nameserver: NS = hpns.NameServer(run_id=args.run_id, host=host, port=0, working_directory=args.shared_directory) ns_host, ns_port = NS.start() # Start local worker w = worker(run_id=args.run_id, host=host, nameserver=ns_host, nameserver_port=ns_port, timeout=120) w.run(background=True) if args.method == "BOHB": print("[RUNNER] method: BOHB") opt = BOHB( configspace=worker.get_configspace(), run_id=args.run_id, host=host, nameserver=ns_host, nameserver_port=ns_port, result_logger=result_logger, min_budget=args.min_budget, max_budget=args.max_budget, ) elif args.method == "random": print("[RUNNER] method: random") opt = RandomSearch( configspace=worker.get_configspace(), run_id=args.run_id, host=host, nameserver=ns_host, nameserver_port=ns_port, result_logger=result_logger, min_budget=args.min_budget, max_budget=args.max_budget, ) elif args.method == "BO": print("[RUNNER] method: BO") opt = BO_Search( configspace=worker.get_configspace(), run_id=args.run_id, host=host, nameserver=ns_host, nameserver_port=ns_port, result_logger=result_logger, min_budget=args.min_budget, max_budget=args.max_budget, ) elif args.method == "HB": opt = HyperBand( configspace=worker.get_configspace(), run_id=args.run_id, host=host, nameserver=ns_host, nameserver_port=ns_port, result_logger=result_logger, min_budget=args.min_budget, max_budget=args.max_budget, ) res = opt.run(n_iterations=args.n_iterations) # store results with open(os.path.join(args.shared_directory, 'results.pkl'), 'wb') as fh: pickle.dump(res, fh) # shutdown opt.shutdown(shutdown_workers=True) NS.shutdown()
# RUN BOHB import os working_dir = "./results/BOHB/" result_file = os.path.join(working_dir, 'bohb_result.pkl') nic_name = 'lo0' port = 0 run_id = 'bohb_run_1' n_bohb_iterations = 10 min_budget = 500 max_budget = 3500 try: # Start a nameserver ##### # get host try: host = hpns.nic_name_to_host(nic_name) except ValueError as e: host = "localhost" print(e) print("ValueError getting host from nic_name {}, " "setting to localhost.".format(nic_name)) ns = hpns.NameServer(run_id=run_id, host=host, port=port, working_directory=working_dir) ns_host, ns_port = ns.start() print(ns_host) print() print(ns_port)
def main(): parser = argparse.ArgumentParser( parents=[get_train_parser()], description='Parallel execution of hyper-tuning', ) parser.add_argument('--run-id', required=True, help='Name of the run') parser.add_argument('--min-budget', type=float, help='Minimum budget used during the optimization', default=1) parser.add_argument('--max-budget', type=float, help='Maximum budget used during the optimization', default=64) parser.add_argument('--n-iterations', type=int, help='Number of iterations performed by the optimizer', default=3) parser.add_argument('--n-workers', type=int, help='Number of workers to run in parallel', default=3) parser.add_argument('--eta', type=int, help='Parameter of the hyper-tuning algorithm', default=4) parser.add_argument('--worker', help='Flag to turn this into a worker process', action='store_true') parser.add_argument('--hostname', default=None, help='IP of name server.') parser.add_argument('--shared-directory', type=str, help=('A directory that is accessible ' 'for all processes, e.g. a NFS share'), default='output/hypertune') args = parser.parse_args() print(args) MyWorker = WORKERS[args.model_type] if not args.hostname and socket.gethostname().lower().startswith('lenovo'): # If we are on cluster set IP args.hostname = hpns.nic_name_to_host('eno1') elif not args.hostname: args.hostname = '127.0.0.1' logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"), format='%(asctime)s %(message)s', datefmt='%I:%M:%S') args.callbacks = ['learning-rate-scheduler', 'early-stopping'] if args.worker: # Start a worker in listening mode (waiting for jobs from master) w = MyWorker( args, run_id=args.run_id, host=args.hostname, ) w.load_nameserver_credentials(working_directory=args.shared_directory) w.run(background=False) exit(0) result_logger = hpres.json_result_logger( directory=args.shared_directory, overwrite=True, ) # Start a name server name_server = hpns.NameServer( run_id=args.run_id, host=args.hostname, port=0, working_directory=args.shared_directory, ) ns_host, ns_port = name_server.start() # Run and optimizer bohb = HyperBand( configspace=MyWorker.get_configspace(), # model can be an arg here? run_id=args.run_id, result_logger=result_logger, eta=args.eta, host=args.hostname, nameserver=ns_host, nameserver_port=ns_port, min_budget=args.min_budget, max_budget=args.max_budget, ) res = bohb.run(n_iterations=args.n_iterations, min_n_workers=args.n_workers) # After the optimizer run, we must shutdown the master and the nameserver. bohb.shutdown(shutdown_workers=True) name_server.shutdown() id2config = res.get_id2config_mapping() incumbent = res.get_incumbent_id() inc_runs = res.get_runs_by_id(incumbent) inc_run = inc_runs[-1] all_runs = res.get_all_runs() print("Best loss {:6.2f}".format(inc_run.loss)) print('A total of %i unique configurations where sampled.' % len(id2config.keys())) print('A total of %i runs where executed.' % len(all_runs)) print('Total budget corresponds to %.1f full function evaluations.' % (sum([r.budget for r in all_runs]) / args.max_budget)) print('The run took %.1f seconds to complete.' % (all_runs[-1].time_stamps['finished'] - all_runs[0].time_stamps['started']))
def get_parameters(selected_x, selected_y, kFold, num_threads): parser = argparse.ArgumentParser( description='Example 1 - sequential and local execution.') parser.add_argument('--min_budget', type=float, help='Minimum budget used during the optimization.', default=1) parser.add_argument('--max_budget', type=float, help='Maximum budget used during the optimization.', default=1) parser.add_argument('--n_iterations', type=int, help='Number of iterations performed by the optimizer', default=100) # parser.add_argument('--worker', help='Flag to turn this into a worker process', action='store_true') parser.add_argument( '--shared_directory', type=str, help= 'A directory that is accessible for all processes, e.g. a NFS share.', default='./result') parser.add_argument('--nic_name', type=str, default='lo') args = parser.parse_args() # if args.worker: # import time # time.sleep(5) # short artificial delay to make sure the nameserver is already running # w = worker(data) # # # w.load_nameserver_credentials(working_directory=args.shared_directory) # w.run(background=False) # exit(0) host = hpns.nic_name_to_host(args.nic_name) result_logger = hpres.json_result_logger(directory=args.shared_directory, overwrite=False) # Step 1: Start a nameserver # Every run needs a nameserver. It could be a 'static' server with a # permanent address, but here it will be started for the local machine with the default port. # The nameserver manages the concurrent running workers across all possible threads or clusternodes. # Note the run_id argument. This uniquely identifies a run of any HpBandSter optimizer. NS = hpns.NameServer(run_id='example1', host=host, port=0) ns_host, ns_port = NS.start() # Step 2: Start a worker # Now we can instantiate a worker, providing the mandatory information # Besides the sleep_interval, we need to define the nameserver information and # the same run_id as above. After that, we can start the worker in the background, # where it will wait for incoming configurations to evaluate. w = worker(selected_x, selected_y, kFold, num_threads, host=host, run_id='example1', nameserver=ns_host, nameserver_port=ns_port) w.run(background=True) # Step 3: Run an optimizer # Now we can create an optimizer object and start the run. # Here, we run BOHB, but that is not essential. # The run method will return the `Result` that contains all runs performed. bohb = BOHB(configspace=w.get_configspace(), run_id='example1', host=host, nameserver=ns_host, nameserver_port=ns_port, result_logger=result_logger, min_budget=args.min_budget, max_budget=args.max_budget) res = bohb.run(n_iterations=args.n_iterations) # Step 4: Shutdown # After the optimizer run, we must shutdown the master and the nameserver. bohb.shutdown(shutdown_workers=True) NS.shutdown() # Step 5: Analysis # Each optimizer returns a hpbandster.core.result.Result object. # It holds informations about the optimization run like the incumbent (=best) configuration. # For further details about the Result object, see its documentation. # Here we simply print out the best config and some statistics about the performed runs. id2config = res.get_id2config_mapping() incumbent = res.get_incumbent_id() info = res.get_runs_by_id(incumbent) parameter = id2config[incumbent]['config'] min_error = info[0]['loss'] #booster = info[0]['info'] return parameter, min_error #, booster
def run_BOHB(working_dir, result_file, n_bohb_iter=12, min_budget=1, max_budget=9, genotype="genotypes.KMNIST", warmstart=False, dataset='KMNIST'): nic_name = 'lo' port = 0 run_id = 'bohb_run_1' previous_run = None if (warmstart): previous_run = hpres.logged_results_to_HBS_result(working_dir) try: # Start a nameserver host = hpns.nic_name_to_host(nic_name) ns = hpns.NameServer(run_id=run_id, host=host, port=port, working_directory=working_dir) ns_host, ns_port = ns.start() # Start local worker worker = PyTorchWorker(dataset=dataset, run_id=run_id, host=host, nameserver=ns_host, nameserver_port=ns_port, timeout=300) worker.genotype = genotype worker.run(background=True) bohb = None # Run an optimizer if (warmstart): bohb = BOHB(configspace=worker.get_configspace(), run_id=run_id, host=host, nameserver=ns_host, nameserver_port=ns_port, min_budget=min_budget, max_budget=max_budget, previous_result=previous_run) else: result_logger = hpres.json_result_logger(directory=working_dir, overwrite=True) bohb = BOHB(configspace=worker.get_configspace(), run_id=run_id, host=host, nameserver=ns_host, nameserver_port=ns_port, min_budget=min_budget, max_budget=max_budget, result_logger=result_logger) result = bohb.run(n_iterations=n_bohb_iter) logging.info("Write result to file {}".format(result_file)) with open(result_file, 'wb') as f: pickle.dump(result, f) finally: bohb.shutdown(shutdown_workers=True) ns.shutdown()
def a2c_bohb_wrapper(**params): # Setup directories where live data is logged logdir = params["logdir"] a2c_output_dir = os.path.join(logdir, 'a2c_output') params["logdir"] = a2c_output_dir bohb_output_dir = os.path.join(logdir, 'bohb_output') logging.basicConfig(level=logging.INFO) # logging.DEBUG for debug output logger = logging.getLogger() logger.propagate = False # no duplicate logging outputs fh = logging.FileHandler(os.path.join(logdir, 'bohb.log')) fh.setLevel(logging.INFO) fh.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s:%(name)s: %(message)s')) logger.addHandler(fh) # Build configuration space and define all hyperparameters cs = ConfigurationSpace() lr = UniformFloatHyperparameter("lr", 1e-4, 1e-2, default_value=1e-3) units_shared_layer1 = UniformIntegerHyperparameter("units_shared_layer1", 8, 100, default_value=24) units_shared_layer2 = UniformIntegerHyperparameter("units_shared_layer2", 8, 100, default_value=24) units_policy_layer = UniformIntegerHyperparameter("units_policy_layer", 8, 100, default_value=24) vf_coeff = UniformFloatHyperparameter("vf_coeff", 1e-2, 0.5, default_value=0.1) ent_coeff = UniformFloatHyperparameter("ent_coeff", 5e-6, 1e-4, default_value=1e-5) gamma = UniformFloatHyperparameter("gamma", 0.6, 1., default_value=0.90) activ_fcn = CategoricalHyperparameter("activ_fcn", ['relu6', 'elu', 'mixed'], default_value='relu6') cs.add_hyperparameters([units_shared_layer1, units_shared_layer2, units_policy_layer, vf_coeff, ent_coeff, gamma, lr, activ_fcn]) # batch_size logger.info('##############################################') logger.info('Run Optimization') logger.info('##############################################') if params["array_id"] == 1: # Setup directories where live data is logged if not os.path.isdir(a2c_output_dir): os.makedirs(a2c_output_dir) if not os.path.isdir(bohb_output_dir): os.makedirs(bohb_output_dir) # start nameserver NS = hpns.NameServer(run_id=params["instance_id"], nic_name=params["nic_name"], working_directory=bohb_output_dir) ns_host, ns_port = NS.start() # stores information for workers to find in working directory # BOHB is usually so cheap, that we can affort to run a worker on the master node, too. worker = A2CWorker(nameserver=ns_host, nameserver_port=ns_port, run_id=params["instance_id"], **params) worker.run(background=True) # Create scenario object logger.info('##############################################') logger.info('Setup BOHB instance') logger.info('##############################################') logger.info('Output_dir: %s' % bohb_output_dir) HB = BOHB(configspace=cs, run_id=params["instance_id"], eta=3, min_budget=params["min_resource"], max_budget=params["max_resource"], host=ns_host, nameserver=ns_host, nameserver_port=ns_port, ping_interval=3600) res = HB.run(n_iterations=4, min_n_workers=4) # BOHB can wait until a minimum number of workers is online before starting # pickle result here for later analysis with open(os.path.join(bohb_output_dir, 'results.pkl'), 'wb') as f: pickle.dump(res, f) id2config = res.get_id2config_mapping() print('A total of %i unique configurations where sampled.' % len(id2config.keys())) print('A total of %i runs where executed.' % len(res.get_all_runs())) # shutdown all workers HB.shutdown(shutdown_workers=True) # shutdown nameserver NS.shutdown() else: host = hpns.nic_name_to_host(params["nic_name"]) # workers only instantiate the MyWorker, find the nameserver and start serving w = A2CWorker(run_id=params["instance_id"], host=host, **params) w.load_nameserver_credentials(bohb_output_dir) # run worker in the forground, w.run(background=False)
def main(): # Check quantities of train, validation and test images train_images = np.array(glob("data/train/*/*")) valid_images = np.array(glob("data/valid/*/*")) test_images = np.array(glob("data/test/*/*")) # Check relative percentages of image types train_images_mel = np.array(glob("data/train/melanoma/*")) train_images_nev = np.array(glob("data/train/nevus/*")) train_images_seb = np.array(glob("data/train/seborrheic_keratosis/*")) valid_images_mel = np.array(glob("data/valid/melanoma/*")) valid_images_nev = np.array(glob("data/valid/nevus/*")) valid_images_seb = np.array(glob("data/valid/seborrheic_keratosis/*")) test_images_mel = np.array(glob("data/test/melanoma/*")) test_images_nev = np.array(glob("data/test/nevus/*")) test_images_seb = np.array(glob("data/test/seborrheic_keratosis/*")) print( "There are {} training images, {} validation images and {} test images." .format(len(train_images), len(valid_images), len(test_images))) print( "For the training images, {mel:=.1f}% ({mel2}) are of melanoma, {nev:=.1f}% ({nev2}) are of nevus and {seb:=.1f}% ({seb2}) are for seborrheic keratosis." .format(mel=len(train_images_mel) / len(train_images) * 100, mel2=len(train_images_mel), nev=len(train_images_nev) / len(train_images) * 100, nev2=len(train_images_nev), seb=len(train_images_seb) / len(train_images) * 100, seb2=len(train_images_seb))) print( "For the validation images, {mel:=.1f}% ({mel2}) are of melanoma, {nev:=.1f}% ({nev2}) are of nevus and {seb:=.1f}% ({seb2}) are for seborrheic keratosis." .format(mel=len(valid_images_mel) / len(valid_images) * 100, mel2=len(valid_images_mel), nev=len(valid_images_nev) / len(valid_images) * 100, nev2=len(valid_images_nev), seb=len(valid_images_seb) / len(valid_images) * 100, seb2=len(valid_images_seb))) print( "For the test images, {mel:=.1f}% ({mel2}) are of melanoma, {nev:=.1f}% ({nev2}) are of nevus and {seb:=.1f}% ({seb2}) are for seborrheic keratosis." .format(mel=len(test_images_mel) / len(test_images) * 100, mel2=len(test_images_mel), nev=len(test_images_nev) / len(test_images) * 100, nev2=len(test_images_nev), seb=len(test_images_seb) / len(test_images) * 100, seb2=len(test_images_seb))) # Set HpBandSter logging logging.basicConfig(level=logging.DEBUG) # Define the parser. Note that key parametres are the min_budget, max_budget, shared_directory and n_iterations. parser = argparse.ArgumentParser( description='ISIC2017 - CNN on Derm Dataset') parser.add_argument('--min_budget', type=float, help='Minimum number of epochs for training.', default=1) parser.add_argument('--max_budget', type=float, help='Maximum number of epochs for training.', default=3) parser.add_argument('--n_iterations', type=int, help='Number of iterations performed by the optimizer', default=16) parser.add_argument('--worker', help='Flag to turn this into a worker process', action='store_true') parser.add_argument( '--run_id', type=str, help= 'A unique run id for this optimization run. An easy option is to use the job id of the clusters scheduler.' ) parser.add_argument( '--nic_name', type=str, help='Which network interface to use for communication.', default='lo') parser.add_argument( '--shared_directory', type=str, help= 'A directory that is accessible for all processes, e.g. a NFS share.', default='/home/ubuntu/src/derm-ai/data') parser.add_argument( '--backend', help= 'Toggles which worker is used. Choose between a pytorch and a keras implementation.', choices=['pytorch', 'keras'], default='pytorch') args = parser.parse_args([]) host = hpns.nic_name_to_host(args.nic_name) # This example shows how to log live results. This is most useful # for really long runs, where intermediate results could already be # interesting. The core.result submodule contains the functionality to # read the two generated files (results.json and configs.json) and # create a Result object. result_logger = hpres.json_result_logger(directory=args.shared_directory, overwrite=True) # Start a nameserver: NS = hpns.NameServer(run_id=args.run_id, host=host, port=0, working_directory=args.shared_directory) ns_host, ns_port = NS.start() # Start local worker w = worker(run_id=args.run_id, host=host, nameserver=ns_host, nameserver_port=ns_port, timeout=120) w.run(background=True) bohb = BOHB( configspace=w.get_configspace(), run_id=args.run_id, host=host, nameserver=ns_host, nameserver_port=ns_port, result_logger=result_logger, min_budget=args.min_budget, max_budget=args.max_budget, ) # Run an optimizer res = bohb.run(n_iterations=args.n_iterations) bohb.shutdown(shutdown_workers=True) NS.shutdown()
def main(): parser = argparse.ArgumentParser(description='Tensorforce hyperparameter tuner') parser.add_argument( 'environment', help='Environment (name, configuration JSON file, or library module)' ) parser.add_argument( '-l', '--level', type=str, default=None, help='Level or game id, like `CartPole-v1`, if supported' ) parser.add_argument( '-m', '--max-repeats', type=int, default=1, help='Maximum number of repetitions' ) parser.add_argument( '-n', '--num-iterations', type=int, default=1, help='Number of BOHB iterations' ) parser.add_argument( '-d', '--directory', type=str, default='tuner', help='Output directory' ) parser.add_argument( '-r', '--restore', type=str, default=None, help='Restore from given directory' ) parser.add_argument('--id', type=str, default='worker', help='Unique worker id') args = parser.parse_args() if args.level is None: environment = Environment.create(environment=args.environment) else: environment = Environment.create(environment=args.environment, level=args.level) if False: host = nic_name_to_host(nic_name=None) port = 123 else: host = 'localhost' port = None server = NameServer(run_id=args.id, working_directory=args.directory, host=host, port=port) nameserver, nameserver_port = server.start() worker = TensorforceWorker( environment=environment, run_id=args.id, nameserver=nameserver, nameserver_port=nameserver_port, host=host ) # TensorforceWorker(run_id, nameserver=None, nameserver_port=None, logger=None, host=None, id=None, timeout=None) # logger: logging.logger instance, logger used for debugging output # id: anything with a __str__method, if multiple workers are started in the same process, you MUST provide a unique id for each one of them using the `id` argument. # timeout: int or float, specifies the timeout a worker will wait for a new after finishing a computation before shutting down. Towards the end of a long run with multiple workers, this helps to shutdown idling workers. We recommend a timeout that is roughly half the time it would take for the second largest budget to finish. The default (None) means that the worker will wait indefinitely and never shutdown on its own. worker.run(background=True) # config = cs.sample_configuration().get_dictionary() # print(config) # res = worker.compute(config=config, budget=1, working_directory='.') # print(res) if args.restore is None: previous_result = None else: previous_result = logged_results_to_HBS_result(directory=args.restore) result_logger = json_result_logger(directory=args.directory, overwrite=True) # ??? optimizer = BOHB( configspace=worker.get_configspace(), min_budget=0.5, max_budget=float(args.max_repeats), run_id=args.id, working_directory=args.directory, nameserver=nameserver, nameserver_port=nameserver_port, host=host, result_logger=result_logger, previous_result=previous_result ) # BOHB(configspace=None, eta=3, min_budget=0.01, max_budget=1, min_points_in_model=None, top_n_percent=15, num_samples=64, random_fraction=1 / 3, bandwidth_factor=3, min_bandwidth=1e-3, **kwargs) # Master(run_id, config_generator, working_directory='.', ping_interval=60, nameserver='127.0.0.1', nameserver_port=None, host=None, shutdown_workers=True, job_queue_sizes=(-1,0), dynamic_queue_size=True, logger=None, result_logger=None, previous_result = None) # logger: logging.logger like object, the logger to output some (more or less meaningful) information results = optimizer.run(n_iterations=args.num_iterations) # optimizer.run(n_iterations=1, min_n_workers=1, iteration_kwargs={}) # min_n_workers: int, minimum number of workers before starting the run optimizer.shutdown(shutdown_workers=True) server.shutdown() environment.close() with open(os.path.join(args.directory, 'results.pkl'), 'wb') as filehandle: pickle.dump(results, filehandle) print('Best found configuration:', results.get_id2config_mapping()[results.get_incumbent_id()]['config']) print('Runs:', results.get_runs_by_id(config_id=results.get_incumbent_id())) print('A total of {} unique configurations where sampled.'.format(len(results.get_id2config_mapping()))) print('A total of {} runs where executed.'.format(len(results.get_all_runs()))) print('Total budget corresponds to {:.1f} full function evaluations.'.format( sum([r.budget for r in results.get_all_runs()]) / args.max_repeats) )
def run_experiment(args, worker, dest_dir, smac_deterministic, store_all_runs=False): print("Running experiment (args: %s)" % str(args)) # make sure the working and dest directory exist os.makedirs(args.working_directory, exist_ok=True) os.makedirs(dest_dir, exist_ok=True) if args.opt_method in ['randomsearch', 'bohb', 'hyperband']: print("Using hpbandster-optimizer (%s)" % args.opt_method) # Every process has to lookup the hostname host = hpns.nic_name_to_host(args.nic_name) print("Host: %s" % str(host)) # setup a nameserver NS = hpns.NameServer(run_id=args.run_id, nic_name=args.nic_name, port=0, host=host, working_directory=args.working_directory) ns_host, ns_port = NS.start() print("Initialized nameserver (ns_host: %s; ns_port: %s)" % (str(ns_host), str(ns_port))) if args.worker: print("This is a pure worker-thread.") worker = get_worker(args, host=host) worker.load_nameserver_credentials( working_directory=args.working_directory) worker.run(background=False) print("Exiting...") exit(0) print( "This is the name-server thread, however there will be a worker running in the background." ) worker = get_worker(args, host=host) # start worker in the background worker.load_nameserver_credentials( working_directory=args.working_directory) worker.run(background=True) if args.exp_name == 'paramnet_surrogates': print( "This is the paramnet_surrogates experiment, so any custom budgets will be replaced by the " "dataset-specific budgets.") args.min_budget, args.max_budget = worker.budgets[ args.dataset_paramnet_surrogates] print( "Background-worker is running, grabbing configspace from worker and initializing result_logger " "(with dest_dir %s)" % dest_dir) configspace = worker.configspace result_logger = hpres.json_result_logger(directory=dest_dir, overwrite=True) print("Getting optimizer.") opt = get_optimizer( args, configspace, working_directory=args.working_directory, run_id=args.run_id, min_budget=args.min_budget, max_budget=args.max_budget, host=host, nameserver=ns_host, nameserver_port=ns_port, ping_interval=30, result_logger=result_logger, ) print("Initialization successful, starting optimization.") from ConfigSpace.read_and_write import pcs_new with open(os.path.join(dest_dir, 'configspace.pcs'), 'w') as fh: fh.write(pcs_new.write(opt.config_generator.configspace)) result = opt.run(n_iterations=args.num_iterations, min_n_workers=args.n_workers) print("Finished optimization") # shutdown the worker and the dispatcher opt.shutdown(shutdown_workers=True) NS.shutdown() if args.exp_name == 'paramnet_surrogates': # This if block is necessary to set budgets for paramnet_surrogates - for nothing else args_tmp = copy.deepcopy(args) args_tmp.opt_method = 'bohb' worker = get_worker(args_tmp) args.min_budget, args.max_budget = worker.budgets[ args.dataset_paramnet_surrogates] # the number of iterations for the blackbox optimizers must be increased so they have comparable total budgets bb_iterations = int(args.num_iterations * (1 + (np.log(args.max_budget) - np.log(args.min_budget)) / np.log(args.eta))) #if args.opt_method == 'tpe': # result = worker.run_tpe(bb_iterations) if args.opt_method == 'smac': result = worker.run_smac(bb_iterations, deterministic=smac_deterministic, working_directory=args.dest_dir) if result is None: raise ValueError("Unknown method %s!" % args.method) return result
help='The ensemble size to consider.') parser.add_argument('--scheme', type=str, default='nes_re', help='scheme name') parser.add_argument('--severity_list', type=str, default='0 5', help='Severity levels to sample from during evolution') parser.add_argument('--esa', type=str, default='beam_search', help='Ensemble selection algorithm') args = parser.parse_args() np.random.seed(args.seed) torch.manual_seed(args.seed) host = nic_name_to_host(args.nic_name) if args.array_id == 1: os.makedirs(args.working_directory, exist_ok=True) with open(os.path.join(args.working_directory, 'settings.txt'), 'w') as f: f.write(str(args)) NS = NameServer(run_id=args.run_id, host=host, working_directory=args.working_directory) ns_host, ns_port = NS.start() # Regularized Evolution is usually so cheap, that we can afford to run a # worker on the master node as a background process worker = Worker(nameserver=ns_host, nameserver_port=ns_port,
def main(args): extra_string = '' if args.dataset == 'flights': if args.n_split == 0: extra_string += '_2M' elif args.n_split == 1: extra_string += '_800k' else: raise Exception( 'Only Valid values for flight splits are 0 (2M) or 1 (800k)') extra_string += '_valprop_' + str(args.valprop) elif args.dataset in [ 'boston', 'concrete', 'energy', 'power', 'wine', 'yacht', 'kin8nm', 'naval', 'protein', 'boston_gap', 'concrete_gap', 'energy_gap', 'power_gap', 'wine_gap', 'yacht_gap', 'kin8nm_gap', 'naval_gap', 'protein_gap' ]: extra_string += '_split_' + str(args.n_split) extra_string += '_valprop_' + str(args.valprop) working_dir = args.result_folder + '/' + args.dataset + extra_string + '/' + args.method +\ ('-' + args.network if args.network != "ResNet" else '') + '/' + str(args.width) + '/' + str(args.batch_size) +\ '/' + args.run_id print("WORKING DIR") print(working_dir) # Create data dir if necessary if not os.path.exists(args.data_folder): mkdir(args.data_folder) # Every process has to lookup the hostname host = hpns.nic_name_to_host(args.nic_name) result_logger = hpres.json_result_logger(directory=working_dir, overwrite=False) # Start a nameserver: NS = hpns.NameServer(run_id=args.run_id, host=host, port=0, working_directory=working_dir) ns_host, ns_port = NS.start() workers = [] for i in range(args.num_workers): print("CREATING WORKER:", i) if args.dataset == 'spirals': worker_class = create_SpiralsWorker(args.method, args.network, args.width, args.batch_size) worker = worker_class(early_stop=args.early_stop, run_id=args.run_id, host=host, nameserver=ns_host, nameserver_port=ns_port, timeout=600, id=i) elif args.dataset == 'flights': worker_class = create_FlightWorker(args.method, args.network, args.width, args.batch_size) worker = worker_class(base_dir=args.data_folder, prop_val=args.valprop, k800=(args.n_split == 1), early_stop=args.early_stop, run_id=args.run_id, host=host, nameserver=ns_host, nameserver_port=ns_port, timeout=600, id=i) elif args.dataset in [ 'boston', 'concrete', 'energy', 'power', 'wine', 'yacht', 'kin8nm', 'naval', 'protein', 'boston_gap', 'concrete_gap', 'energy_gap', 'power_gap', 'wine_gap', 'yacht_gap', 'kin8nm_gap', 'naval_gap', 'protein_gap' ]: worker_class = create_UCIWorker(args.method, args.network, args.width, args.batch_size) worker = worker_class(dname=args.dataset, base_dir=args.data_folder, prop_val=args.valprop, n_split=args.n_split, early_stop=args.early_stop, run_id=args.run_id, host=host, nameserver=ns_host, nameserver_port=ns_port, timeout=600, id=i) else: raise ValueError('Dataset not implemented yet!') worker.run(background=True) workers.append(worker) n_iterations = args.n_iterations previous_run = None if args.previous_result_folder is not None: try: previous_run = hpres.logged_results_to_HBS_result( args.previous_result_folder) except Exception as e: print(e) # Run an optimizer bohb = BOHB( configspace=worker.get_configspace(), run_id=args.run_id, host=host, nameserver=ns_host, nameserver_port=ns_port, result_logger=result_logger, min_budget=args.min_budget, max_budget=args.max_budget, previous_result=previous_run, ) res = bohb.run(n_iterations=n_iterations, min_n_workers=args.num_workers) # store results with open(os.path.join(working_dir, 'results.pkl'), 'wb') as fh: pickle.dump(res, fh) # shutdown bohb.shutdown(shutdown_workers=True) NS.shutdown() id2config = res.get_id2config_mapping() incumbent = res.get_incumbent_id() id2config = res.get_id2config_mapping() incumbent = res.get_incumbent_id() all_runs = res.get_all_runs() print('Best found configuration:', id2config[incumbent]['config']) print('A total of %i unique configurations where sampled.' % len(id2config.keys())) print('A total of %i runs where executed.' % len(res.get_all_runs())) print('Total budget corresponds to %.1f full function evaluations.' % (sum([r.budget for r in all_runs]) / args.max_budget)) print('The run took %.1f seconds to complete.' % (all_runs[-1].time_stamps['finished'] - all_runs[0].time_stamps['started']))
def main(): args = parse_args() # Set log level logging.basicConfig(level={ 'critical': logging.CRITICAL, 'warning': logging.WARNING, 'info': logging.INFO, 'debug': logging.DEBUG }[args.loglevel]) # Name for the current experiment (optimization, not single training) exp_name = args.exp_name or get_default_exp_name(args) + (('_' + args.exp_suffix) if args.exp_suffix else '') logdir = os.path.join(args.logdir, exp_name) shared_dir = os.path.join(logdir, 'master') os.makedirs(shared_dir, exist_ok=True) # Also creates logdir if it does not exist host = hpns.nic_name_to_host(args.nic_name) # If this is meant to be a worker process, launch it if args.worker: w = AbasWorker(run_id=exp_name, source=args.source, target=args.target, net=args.net, load_workers=args.load_workers, max_iter=args.max_iter, logdir=args.logdir, ds_root=args.data_root, no_tqdm=args.no_tqdm, gpu=args.gpu, run_n_avg=args.run_n_avg, da_method=args.da, model_criterion=args.criterion, run_model_criterion=args.run_criterion or args.criterion, kill_diverging=args.kill_diverging, host=host, timeout=args.timeout) w.load_nameserver_credentials(working_directory=shared_dir) w.run(background=False) # Nothing to do, exit print("Done") exit(0) # If we are here we expect to be a master if not args.master: print("Nothing to do (not a master nor a worker process)") exit(1) # Running as master! # Log info Logger(logdir=logdir, run_name='master', use_tqdm=False, use_tb=False) # Init the nameserver (random port) ns = hpns.NameServer(run_id=exp_name, host=host, port=0, working_directory=shared_dir) ns_host, ns_port = ns.start() print("Nameserver on {}:{}".format(ns_host, ns_port)) # These hyperparameters are passed through the command line and are not optimized hp = { 'base.lr': args.lr, 'base.bs': args.bs, 'base.wd': args.wd, } # Load previous runs previous_res = None if args.previous != '': if os.path.isdir(args.previous): previous_res = hpres.logged_results_to_HBS_result(args.previous) else: with open(args.previous, 'rb') as fp: previous_res = pickle.load(fp) # Safe file removal remove_file(os.path.join(shared_dir, 'config.json')) remove_file(os.path.join(shared_dir, 'results.json')) # Launch BOHB opt_logger = hpres.json_result_logger(directory=shared_dir, overwrite=False) bohb = BOHB( configspace=AbasWorker.get_configspace(hp), previous_result=previous_res, run_id=exp_name, min_budget=args.min_budget, max_budget=args.max_budget, eta=args.eta, host=host, nameserver=ns_host, nameserver_port=ns_port, ping_interval=15, result_logger=opt_logger ) res = bohb.run(n_iterations=args.num_iterations, min_n_workers=args.num_workers) # Done bohb.shutdown(shutdown_workers=True) ns.shutdown() # Save results id2config = res.get_id2config_mapping() incumbent = res.get_incumbent_id() all_runs = res.get_all_runs() with open(os.path.join(logdir, 'result_{}.pkl'.format(exp_name)), 'wb') as fp: pickle.dump(res, fp) print(f"Best found configuration: {id2config[incumbent]['config']}") print(f"Total number of sampled unique configurations: {len(id2config.keys())}") print(f"Total runs {len(res.get_all_runs())}") print("ABAS run took {:.1f} seconds".format( all_runs[-1].time_stamps['finished'] - all_runs[0].time_stamps['started']))
def get_parameters(data, target_feature_index): parser = argparse.ArgumentParser( description='Example 1 - sequential and local execution.') parser.add_argument('--min_budget', type=float, help='Minimum budget used during the optimization.', default=9) parser.add_argument('--max_budget', type=float, help='Maximum budget used during the optimization.', default=243) parser.add_argument('--n_iterations', type=int, help='Number of iterations performed by the optimizer', default=4) parser.add_argument('--n_workers', type=int, help='Number of workers to run in parallel.', default=2) parser.add_argument('--worker', help='Flag to turn this into a worker process', action='store_true') parser.add_argument( '--run_id', type=str, help= 'A unique run id for this optimization run. An easy option is to use the job id of the clusters scheduler.' ) parser.add_argument( '--nic_name', type=str, help='Which network interface to use for communication.', default='lo') parser.add_argument( '--shared_directory', type=str, help= 'A directory that is accessible for all processes, e.g. a NFS share.', default='/home/lchen/parameters/result') args = parser.parse_args() host = hpns.nic_name_to_host(args.nic_name) if args.worker: time.sleep( 5 ) # short artificial delay to make sure the nameserver is already running w = worker(0.5, data, target_feature_index, run_id=args.run_id, host=host) w.load_nameserver_credentials(working_directory=args.shared_directory) w.run(background=False) exit(0) result_logger = hpres.json_result_logger(directory=args.shared_directory, overwrite=True) # Step 1: Start a nameserver # Every run needs a nameserver. It could be a 'static' server with a # permanent address, but here it will be started for the local machine with the default port. # The nameserver manages the concurrent running workers across all possible threads or clusternodes. # Note the run_id argument. This uniquely identifies a run of any HpBandSter optimizer. NS = hpns.NameServer(run_id='test1', host=host, port=0, working_directory=args.shared_directory) ns_host, ns_port = NS.start() # Step 2: Start a worker # Now we can instantiate a worker, providing the mandatory information # Besides the sleep_interval, we need to define the nameserver information and # the same run_id as above. After that, we can start the worker in the background, # where it will wait for incoming configurations to evaluate. w = worker(0.5, data, target_feature_index, run_id='test1', host=host, nameserver=ns_host, nameserver_port=ns_port) w.run(background=True) # Step 3: Run an optimizer # Now we can create an optimizer object and start the run. # Here, we run BOHB, but that is not essential. # The run method will return the `Result` that contains all runs performed. bohb = BOHB(configspace=worker.get_configspace(), run_id=args.run_id, host=host, nameserver=ns_host, nameserver_port=ns_port, result_logger=result_logger, min_budget=args.min_budget, max_budget=args.max_budget) print("daozhele5") res = bohb.run(n_iterations=args.n_iterations) print("daozhele6") bohb.shutdown(shutdown_workers=True) NS.shutdown() # Step 5: Analysis # Each optimizer returns a hpbandster.core.result.Result object. # It holds informations about the optimization run like the incumbent (=best) configuration. # For further details about the Result object, see its documentation. # Here we simply print out the best config and some statistics about the performed runs. id2config = res.get_id2config_mapping() incumbent = res.get_incumbent_id() info = res.get_runs_by_id(incumbent) parameter = id2config[incumbent]['config'] min_error = info[0]['loss'] feature_importance_dict = info[0]['info'] with open(os.path.join(args.shared_directory, 'results.pkl'), 'wb') as fh: pickle.dump(res, fh) return parameter, min_error, feature_importance_dict
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--n_iterations', type=int, help='Number of iterations performed by the optimizer', default=4) parser.add_argument('--worker', help='Flag to turn this into a worker process', action='store_true') parser.add_argument( '--run_id', type=str, help= 'A unique run id for this optimization run. An easy option is to use' ' the job id of the clusters scheduler.') parser.add_argument( '--shared_directory', type=str, help= 'A directory that is accessible for all processes, e.g. a NFS share.') parser.add_argument('--interface', type=str, help='Which network interface to use', default="eth1") args = parser.parse_args() try: os.mkdir(args.shared_directory) except FileExistsError: pass # Every process has to lookup the hostname host = hpns.nic_name_to_host(args.interface) if args.worker: time.sleep( 60 ) # short artificial delay to make sure the nameserver is already running w = MyWorker(run_id=args.run_id, host=host) w.load_nameserver_credentials(working_directory=args.shared_directory) w.run(background=False) exit(0) # Write the configspace cs = MyWorker.get_configspace() with open(os.path.join(args.shared_directory, 'configspace.json'), "w") as fh: fh.write(pcs_out.write(cs)) result_logger = hpres.json_result_logger(directory=args.shared_directory, overwrite=True) NS = hpns.NameServer(run_id=args.run_id, host=host, port=0, working_directory=args.shared_directory) ns_host, ns_port = NS.start() w = MyWorker(run_id=args.run_id, host=host, nameserver=ns_host, nameserver_port=ns_port) w.run(background=True) # Run an optimizer # We now have to specify the host, and the nameserver information bohb = BOHB(configspace=cs, run_id=args.run_id, host=host, nameserver=ns_host, nameserver_port=ns_port, eta=3, result_logger=result_logger, min_budget=1, max_budget=9) res = bohb.run(n_iterations=args.n_iterations, min_n_workers=1) # In a cluster environment, you usually want to store the results for later analysis. # One option is to simply pickle the Result object with open(os.path.join(args.shared_directory, 'results.pkl'), 'wb') as fh: pickle.dump(res, fh) # Step 4: Shutdown # After the optimizer run, we must shutdown the master and the nameserver. bohb.shutdown(shutdown_workers=True) NS.shutdown()
def main(): parser = argparse.ArgumentParser( description= 'Tensorforce hyperparameter tuner, using BOHB optimizer (Bayesian Optimization ' 'and Hyperband)') # Environment arguments (from run.py) parser.add_argument( '-e', '--environment', type=str, help='Environment (name, configuration JSON file, or library module)') parser.add_argument( '-l', '--level', type=str, default=None, help='Level or game id, like `CartPole-v1`, if supported') parser.add_argument('-m', '--max-episode-timesteps', type=int, default=None, help='Maximum number of timesteps per episode') parser.add_argument( '--import-modules', type=str, default=None, help='Import comma-separated modules required for environment') # Runner arguments (from run.py) parser.add_argument('-n', '--episodes', type=int, help='Number of episodes') parser.add_argument( '-p', '--num-parallel', type=int, default=None, help='Number of environment instances to execute in parallel') # Tuner arguments parser.add_argument( '-r', '--runs-per-round', type=str, default='1,2,5,10', help= 'Comma-separated number of runs per optimization round, each with a successively ' 'smaller number of candidates') parser.add_argument( '-s', '--selection-factor', type=int, default=3, help= 'Selection factor n, meaning that one out of n candidates in each round advances to ' 'the next optimization round') parser.add_argument( '-i', '--num-iterations', type=int, default=1, help= 'Number of optimization iterations, each consisting of a series of optimization ' 'rounds with an increasingly reduced candidate pool') parser.add_argument('-d', '--directory', type=str, default='tuner', help='Output directory') parser.add_argument('--restore', type=str, default=None, help='Restore from given directory') parser.add_argument('--id', type=str, default='worker', help='Unique worker id') args = parser.parse_args() if args.import_modules is not None: for module in args.import_modules.split(','): importlib.import_module(name=module) environment = dict(environment=args.environment) if args.level is not None: environment['level'] = args.level if False: host = nic_name_to_host(nic_name=None) port = 123 else: host = 'localhost' port = None runs_per_round = tuple(int(x) for x in args.runs_per_round.split(',')) print('Bayesian Optimization and Hyperband optimization') print( f'{args.num_iterations} iterations of each {len(runs_per_round)} rounds:' ) for n, num_runs in enumerate(runs_per_round, start=1): num_candidates = round( math.pow(args.selection_factor, len(runs_per_round) - n)) print(f'round {n}: {num_candidates} candidates, each {num_runs} runs') print() server = NameServer(run_id=args.id, working_directory=args.directory, host=host, port=port) nameserver, nameserver_port = server.start() worker = TensorforceWorker( environment=environment, max_episode_timesteps=args.max_episode_timesteps, num_episodes=args.episodes, base=args.selection_factor, runs_per_round=runs_per_round, num_parallel=args.num_parallel, run_id=args.id, nameserver=nameserver, nameserver_port=nameserver_port, host=host) worker.run(background=True) if args.restore is None: previous_result = None else: previous_result = logged_results_to_HBS_result(directory=args.restore) result_logger = json_result_logger(directory=args.directory, overwrite=True) optimizer = BOHB(configspace=worker.get_configspace(), eta=args.selection_factor, min_budget=0.9, max_budget=math.pow(args.selection_factor, len(runs_per_round) - 1), run_id=args.id, working_directory=args.directory, nameserver=nameserver, nameserver_port=nameserver_port, host=host, result_logger=result_logger, previous_result=previous_result) # BOHB(configspace=None, eta=3, min_budget=0.01, max_budget=1, min_points_in_model=None, # top_n_percent=15, num_samples=64, random_fraction=1 / 3, bandwidth_factor=3, # min_bandwidth=1e-3, **kwargs) # Master(run_id, config_generator, working_directory='.', ping_interval=60, # nameserver='127.0.0.1', nameserver_port=None, host=None, shutdown_workers=True, # job_queue_sizes=(-1,0), dynamic_queue_size=True, logger=None, result_logger=None, # previous_result = None) # logger: logging.logger like object, the logger to output some (more or less meaningful) # information results = optimizer.run(n_iterations=args.num_iterations) # optimizer.run(n_iterations=1, min_n_workers=1, iteration_kwargs={}) # min_n_workers: int, minimum number of workers before starting the run optimizer.shutdown(shutdown_workers=True) server.shutdown() with open(os.path.join(args.directory, 'results.pkl'), 'wb') as filehandle: pickle.dump(results, filehandle) print('Best found configuration: {}'.format( results.get_id2config_mapping()[results.get_incumbent_id()]['config'])) print('Runs:', results.get_runs_by_id(config_id=results.get_incumbent_id())) print('A total of {} unique configurations where sampled.'.format( len(results.get_id2config_mapping()))) print('A total of {} runs where executed.'.format( len(results.get_all_runs())))
def __init__(self, num_workers, num_iterations, run_id, array_id, working_dir, nic_name, network, min_budget, max_budget, eta): x, y, categorical = model.get_dataset() feature_type = determine_feature_type(categorical) nr_features = x.shape[1] if network == 'fcresnet': config_space = get_fixed_conditional_fcresnet_config( nr_features, feature_type, num_res_blocks=4, super_blocks=2, nr_units=64) else: config_space = get_fixed_conditional_fc_config(nr_features, feature_type, max_nr_layers=9) if array_id == 1: result_logger = hpres.json_result_logger(directory=working_dir, overwrite=True) # start nameserver ns = hpns.NameServer(run_id=run_id, nic_name=nic_name, working_directory=working_dir) ns_host, ns_port = ns.start( ) # stores information for workers to find in working_directory # BOHB is usually so cheap, that we can affort to run a worker on the master node, too. worker = Slave(nameserver=ns_host, nameserver_port=ns_port, run_id=run_id) worker.run(background=True) hb = BOHB(configspace=config_space, run_id=run_id, eta=eta, min_budget=min_budget, max_budget=max_budget, host=ns_host, nameserver=ns_host, result_logger=result_logger, nameserver_port=ns_port, ping_interval=3600) # BOHB can wait until a minimum number of workers # is online before starting res = hb.run(n_iterations=num_iterations, min_n_workers=num_workers) # pickle result here for later analysis with open(os.path.join(working_dir, 'results.pkl'), 'wb') as fh: pickle.dump(res, fh) # shutdown all workers hb.shutdown(shutdown_workers=True) # and the nameserver ns.shutdown() else: host = hpns.nic_name_to_host(nic_name) # workers only instantiate the Slave, find the nameserver and start serving w = Slave(run_id=run_id, host=host) while True: try: w.load_nameserver_credentials(working_dir) break except RuntimeError as e: # do nothing # wait until configuration is # found pass # run worker in the foreground, w.run(background=False)
parser.add_argument('--nic_name', type=str, help='Which network interface to use for communication.', default="en4") parser.add_argument('--run_id', type=int, help='run ID.', default=1) parser.add_argument( '--shared_directory', type=str, help='A directory that is accessible for all processes, e.g. a NFS share.', default="output") #parser.add_argument('--worker', help='Flag to turn this into a worker process', action='store_true') args = parser.parse_args() # Every process has to lookup the hostname host = hpns.nic_name_to_host(args.nic_name) if host_name == "synaptomes1": prefix = "syn1" else: prefix = "test_run" outputName = f"{prefix}_openml_d_{str(args.openml_dataid)}" # Step 1: Start a nameserver (see example_1) # Start a nameserver: # We now start the nameserver with the host name from above and a random open port (by setting the port to 0) NS = hpns.NameServer(run_id=args.run_id, host=host, port=0, working_directory=args.shared_directory)
def optimize_in_model(path, args, n_gram, tokens): articles = ['1', '2', '3', '5', '6', '8', '10', '11', '13', '34', 'p1'] for article in articles: if 'dnn' in path: from model_workers import KerasWorker as worker elif 'random_forest' in path: from model_workers import RandomForestWorker as worker elif 'xgboost' in path: from model_workers import XGBoostWorker as worker elif 'svm' in path: from model_workers import SVMWorker as worker elif 'extratrees' in path: from model_workers import ExtraTreesWorker as worker for inter in ni.interfaces(): try: # Every process has to lookup the hostname host = hpns.nic_name_to_host(inter) if args.worker: import time time.sleep( 5 ) # short artificial delay to make sure the nameserver is already running w = worker(run_id=args.run_id, host=host, timeout=120, article=article, flavor=args.flavor) w.load_nameserver_credentials(working_directory=path) w.run(background=False) exit(0) # This example shows how to log live results. This is most useful # for really long runs, where intermediate results could already be # interesting. The core.result submodule contains the functionality to # read the two generated files (results.json and configs.json) and # create a Result object. result_logger = hpres.json_result_logger(directory=path, overwrite=True) # Start a nameserver: NS = hpns.NameServer(run_id=args.run_id, host=host, port=0, working_directory=path) ns_host, ns_port = NS.start() break except gaierror: continue # Start local worker w = worker(run_id=args.run_id, host=host, nameserver=ns_host, nameserver_port=ns_port, timeout=120, article=article, flavor=args.flavor, id=1) w.run(background=True) # Run an optimizer bohb = BOHB( configspace=worker.get_configspace(), run_id=args.run_id, host=host, nameserver=ns_host, nameserver_port=ns_port, result_logger=result_logger, min_budget=args.min_budget, max_budget=args.max_budget, ) res = bohb.run(n_iterations=args.n_iterations) # store results with open(os.path.join(path, 'results.pkl'), 'wb') as fh: pickle.dump(res, fh) # shutdown bohb.shutdown(shutdown_workers=True) NS.shutdown() # Keep track of the best results store_best_result_and_config(model_path=path, article=article, n_gram=n_gram, tokens=tokens, flavor=args.flavor, preprocessed=0, pipeline=None) move_results_to_storage(n_gram=n_gram, tokens=tokens, article=article, flavor=args.flavor, model=path.split('/')[-2])