def test_init(self): # TODO: Somehow test in which folder the experiment is created # TODO: Remove the case that it is always saved automatically exp = Experiment.Experiment(".", "test_exp") _sanity_check(exp) exp.title = "test" del exp # Make sure reloading works exp = Experiment.Experiment(".", "test_exp") self.assertEqual(exp.title, "test") del exp
def test_get_arg_best_NaNs(self): experiment = Experiment.Experiment(".", "test_exp", folds=2) [experiment.add_job({"x": i}) for i in range(10)] [experiment.set_one_fold_running(i, 0) for i in range(10)] [experiment.set_one_fold_complete(i, 0, 10 - i, 1) for i in range(10)] [experiment.set_one_fold_running(i, 1) for i in range(10)] [experiment.set_one_fold_complete(i, 1, i, 1) for i in range(1, 10)] self.assertEqual(experiment.get_arg_best(), 1)
def test_add_job(self): exp = Experiment.Experiment(".", "test_exp", folds=5) self.assertEqual(len(exp.trials), 0) self.assertEqual(len(exp.instance_order), 0) _id = exp.add_job({"x": 1, "y": 2}) trial = exp.get_trial_from_id(_id) self.assertEqual(len(exp.trials), 1) self.assertEqual(len(exp.instance_order), 0) self.assertDictEqual(trial['params'], {"x": 1, "y": 2}) _sanity_check(exp)
def test_set_one_fold_crashed(self): experiment = Experiment.Experiment(".", "test_exp", folds=1) experiment.add_job({"x": 0}) experiment.set_one_fold_running(0, 0) experiment.set_one_fold_crashed(0, 0, 1000, 0) self.assertEqual(len(experiment.trials), 1) self.assertEqual(experiment.trials[0]['instance_status'][0], Experiment.BROKEN_STATE) # Test that the target fold was actually running self.assertRaises(AssertionError, experiment.set_one_fold_crashed, 0, 0, 1000, 0)
def test_get_trial_index_nocv(self): try: os.remove("test_get_trial_index.pkl") except OSError: pass try: os.remove("test_get_trial_index.pkl.lock") except OSError: pass experiment = Experiment.Experiment(".", "test_get_trial_index", folds=1) params0 = {"x": "1"} params1 = {"x": "2"} params2 = {"x": "3"} params3 = {"x": "4"} params4 = {"x": "5"} trial_index0 = optimization_interceptor.get_trial_index( experiment, 0, params0) self.assertEqual(trial_index0, 0) experiment.set_one_fold_running(trial_index0, 0) experiment.set_one_fold_complete(trial_index0, 0, 1, 1) trial_index1 = optimization_interceptor.get_trial_index( experiment, 0, params1) self.assertEqual(trial_index1, 1) experiment.set_one_fold_running(trial_index1, 0) experiment.set_one_fold_complete(trial_index1, 0, 1, 1) trial_index2 = optimization_interceptor.get_trial_index( experiment, 0, params2) self.assertEqual(trial_index2, 2) experiment.set_one_fold_running(trial_index2, 0) experiment.set_one_fold_complete(trial_index2, 0, 1, 1) trial_index3 = optimization_interceptor.get_trial_index( experiment, 0, params3) self.assertEqual(trial_index3, 3) experiment.set_one_fold_running(trial_index3, 0) experiment.set_one_fold_complete(trial_index3, 0, 1, 1) trial_index4 = optimization_interceptor.get_trial_index( experiment, 0, params4) self.assertEqual(trial_index4, 4) experiment.set_one_fold_running(trial_index4, 0) experiment.set_one_fold_complete(trial_index4, 0, 1, 1) self.assertEqual( 5, optimization_interceptor.get_trial_index(experiment, 0, params2))
def test_additional_data(self): experiment = Experiment.Experiment(".", "test_exp", folds=1) id0 = experiment.add_job({"x": 0}) experiment.set_one_fold_running(id0, 0) experiment.set_one_fold_complete(id0, 0, 0.1, 0, additional_data="A") self.assertEqual( "A", experiment.get_trial_from_id(id0)['additional_data'][0]) id1 = experiment.add_job({"x": 1}) experiment.set_one_fold_running(id1, 0) experiment.set_one_fold_crashed(id1, 0, 1, 0, additional_data="B") self.assertEqual( "B", experiment.get_trial_from_id(id1)['additional_data'][0])
def test_status_getters(self): experiment = Experiment.Experiment(".", "test_exp", folds=2) # Candidate jobs experiment.add_job({"x": "0"}) experiment.add_job({"x": "1"}) # Complete jobs experiment.add_job({"x": "2"}) experiment.set_one_fold_running(2, 0) experiment.set_one_fold_complete(2, 0, 1, 1) experiment.set_one_fold_running(2, 1) experiment.set_one_fold_complete(2, 1, 1, 1) experiment.add_job({"x": "3"}) experiment.set_one_fold_running(3, 0) experiment.set_one_fold_complete(3, 0, 1, 1) experiment.set_one_fold_running(3, 1) experiment.set_one_fold_complete(3, 1, 1, 1) # Incomplete jobs experiment.add_job({"x": "4"}) experiment.set_one_fold_running(4, 0) experiment.set_one_fold_complete(4, 0, 1, 1) experiment.add_job({"x": "5"}) experiment.set_one_fold_running(5, 0) experiment.set_one_fold_complete(5, 0, 1, 1) # Running Jobs experiment.add_job({"x": "6"}) experiment.set_one_fold_running(6, 0) experiment.add_job({"x": "7"}) experiment.set_one_fold_running(7, 0) # Broken Jobs experiment.add_job({"x": "8"}) experiment.set_one_fold_running(8, 0) experiment.set_one_fold_crashed(8, 0, 1000, 1) experiment.add_job({"x": "9"}) experiment.set_one_fold_running(9, 0) experiment.set_one_fold_crashed(9, 0, 1000, 1) experiment.set_one_fold_running(9, 1) experiment.set_one_fold_crashed(9, 1, 1000, 1) self.assertEqual(len(experiment.get_candidate_jobs()), 2) self.assertEqual(len(experiment.get_complete_jobs()), 2) self.assertEqual(len(experiment.get_incomplete_jobs()), 3) self.assertEqual(len(experiment.get_running_jobs()), 2) self.assertEqual(len(experiment.get_broken_jobs()), 1) self.assertEqual(experiment.trials[9]['result'], 1000) self.assertNotEqual(experiment.trials[8]['result'], np.NaN)
def test_add_job(self): exp = Experiment.Experiment(".", "test_exp", folds=5) self.assertEqual(len(exp.trials), 0) self.assertEqual(len(exp.instance_order), 0) _id = exp.add_job({"x": 1, "y": 2}) self.assertEqual(len(exp.trials), 1) self.assertEqual(len(exp.instance_order), 0) self.assertEqual( exp.get_trial_from_id(_id)['instance_results'].shape, (5, )) self.assertEqual( exp.get_trial_from_id(_id)['instance_durations'].shape, (5, )) self.assertEqual( exp.get_trial_from_id(_id)['instance_status'].shape, (5, )) self.assertDictEqual( exp.get_trial_from_id(_id)['params'], { "x": 1, "y": 2 }) _sanity_check(exp)
def test_clean_test_outputs(self): experiment = Experiment.Experiment(".", "test_exp", folds=2) for i in range(2): _id = experiment.add_job({"x": i}) experiment.set_one_fold_running(_id, 0) experiment.set_one_fold_running(_id, 1) experiment.set_one_fold_complete(_id, 0, 1, 1, "") experiment.set_one_fold_complete(_id, 1, 2, 2, "") experiment.set_one_test_fold_running(_id, 0) experiment.set_one_test_fold_complete(_id, 0, 1, 5, "") self.assertEqual(experiment.total_wallclock_time, 16) experiment.clean_test_outputs(0) trial = experiment.get_trial_from_id(0) self.assertEqual(experiment.total_wallclock_time, 11) self.assertFalse(np.isfinite(trial['test_duration'])) self.assertFalse(np.isfinite(trial['test_result'])) self.assertFalse(np.isfinite(trial['test_std'])) self.assertEqual(trial['test_status'], 0) self.assertFalse(all(np.isfinite(trial['test_instance_durations']))) self.assertFalse(all(np.isfinite(trial['test_instance_results']))) self.assertEqual(np.sum(trial['test_instance_status']), 0)
def test_remove_all_but_first_runs(self): experiment = Experiment.Experiment(".", "test_exp", folds=5) for i in range(5): experiment.add_job({"x": i}) experiment.set_one_fold_running(i, i) experiment.set_one_fold_complete(i, i, 1, 1) experiment.set_one_fold_running(2, 3) experiment.set_one_fold_complete(2, 3, 1, 1) self.assertEqual(len(experiment.get_incomplete_jobs()), 5) self.assertEqual(len(experiment.instance_order), 6) experiment.remove_all_but_first_runs(3) self.assertEqual(len(experiment.get_incomplete_jobs()), 3) self.assertEqual(len(experiment.instance_order), 3) self.assertTrue( (experiment.get_trial_from_id(2)["instance_status"] == [ 0, 0, 3, 0, 0 ]).all()) _sanity_check(experiment)
def test_one_fold_workflow(self): experiment = Experiment.Experiment(".", "test_exp", folds=5) trial_index = experiment.add_job({"x": 5}) experiment.set_one_fold_running(trial_index, 0) self.assertEqual(len(experiment.get_broken_jobs()), 0) self.assertEqual(len(experiment.get_complete_jobs()), 0) self.assertEqual(len(experiment.get_running_jobs()), 1) self.assertEqual( experiment.get_trial_from_id(trial_index)['instance_status'][0], Experiment.RUNNING_STATE) experiment.set_one_fold_complete(trial_index, 0, 1, 1) self.assertEqual(len(experiment.get_complete_jobs()), 0) self.assertEqual(len(experiment.get_incomplete_jobs()), 1) self.assertEqual( experiment.get_trial_from_id(trial_index)['instance_status'][0], Experiment.COMPLETE_STATE) experiment.set_one_fold_running(trial_index, 1) experiment.set_one_fold_complete(trial_index, 1, 2, 1) self.assertEqual(len(experiment.get_incomplete_jobs()), 1) experiment.set_one_fold_running(trial_index, 2) experiment.set_one_fold_complete(trial_index, 2, 3, 1) self.assertEqual(len(experiment.get_incomplete_jobs()), 1) experiment.set_one_fold_running(trial_index, 3) experiment.set_one_fold_complete(trial_index, 3, 4, 1) self.assertEqual(len(experiment.get_incomplete_jobs()), 1) experiment.set_one_fold_running(trial_index, 4) experiment.set_one_fold_complete(trial_index, 4, 5, 1) self.assertEqual(len(experiment.trials), 1) self.assertTrue( (experiment.get_trial_from_id(trial_index)["instance_results"] == [ 1, 2, 3, 4, 5 ]).all()) self.assertEqual(len(experiment.get_complete_jobs()), 1) self.assertEqual( experiment.get_trial_from_id(trial_index)['status'], Experiment.COMPLETE_STATE) trial_index1 = experiment.add_job({"x": 6}) self.assertEqual(len(experiment.get_complete_jobs()), 1) self.assertEqual(len(experiment.get_candidate_jobs()), 1) experiment.set_one_fold_running(trial_index1, 3) self.assertTrue( (experiment.get_trial_from_id(trial_index1)['instance_status'] == [ 0, 0, 0, 2, 0 ]).all()) experiment.set_one_fold_complete(trial_index1, 3, 1, 1) self.assertTrue( (experiment.get_trial_from_id(trial_index1)['instance_status'] == [ 0, 0, 0, 3, 0 ]).all()) self.assertEqual(experiment.instance_order, [(0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (1, 3)]) self.assertEqual(experiment.total_wallclock_time, 6) self.assertTrue((experiment.get_trial_from_id(trial_index) ["instance_durations"] == [1, 1, 1, 1, 1]).all()) # Check that check_cv_finished kicked in self.assertEqual( experiment.get_trial_from_id(trial_index)["result"], 3.0) self.assertAlmostEqual( experiment.get_trial_from_id(trial_index)['std'], 1.4142135623730951) # Check that check_cv_finished did not kick in self.assertNotEqual( experiment.get_trial_from_id(trial_index1)["result"], experiment.get_trial_from_id(trial_index1)["result"]) self.assertNotEqual( experiment.get_trial_from_id(trial_index1)['std'], experiment.get_trial_from_id(trial_index1)['std']) self.assertEqual(len(experiment.trials), 2) _sanity_check(experiment)
def load_experiment_file(): optimizer = get_optimizer() experiment = Experiment.Experiment(".", optimizer) return experiment
def main(): """Start an optimization of the HPOlib. For documentation see the comments inside this function and the general HPOlib documentation.""" args, unknown_arguments = use_arg_parser() if args.working_dir: experiment_dir = args.working_dir elif args.restore: args.restore = os.path.abspath(args.restore) + "/" experiment_dir = args.restore else: experiment_dir = os.getcwd() formatter = logging.Formatter( '[%(levelname)s] [%(asctime)s:%(name)s] %(' 'message)s', datefmt='%H:%M:%S') handler = logging.StreamHandler(sys.stdout) handler.setFormatter(formatter) hpolib_logger.addHandler(handler) hpolib_logger.setLevel(1) # First of all print the infodevel if IS_DEVELOPMENT: logger.critical(INFODEVEL) args, unknown_arguments = use_arg_parser() # Convert the path to the optimizer to be an absolute path, which is # necessary later when we change the working directory optimizer = args.optimizer print("opti:", optimizer) if not os.path.isabs(optimizer): relative_path = optimizer optimizer = os.path.abspath(optimizer) logger.info( "Converting relative optimizer path %s to absolute " "optimizer path %s.", relative_path, optimizer) os.chdir(experiment_dir) experiment_dir = os.getcwd() check_before_start.check_first(experiment_dir) # Now we can safely import non standard things import numpy as np import HPOlib.Experiment as Experiment # Wants numpy and scipy # Check how many optimizer versions are present and if all dependencies # are installed also dynamically load optimizer obj optimizer_version, opt_obj = check_before_start.check_optimizer(optimizer) logger.warning( "You called -o %s, I am using optimizer defined in " "%sDefault.cfg", optimizer, optimizer_version) optimizer = os.path.basename(optimizer_version) config = wrapping_util.get_configuration(experiment_dir, optimizer_version, unknown_arguments, opt_obj) # DO NOT LOG UNTIL HERE UNLESS SOMETHING DRAMATIC HAS HAPPENED!!! loglevel = config.getint("HPOLIB", "HPOlib_loglevel") hpolib_logger.setLevel(loglevel) if args.silent: hpolib_logger.setLevel(60) if args.verbose: hpolib_logger.setLevel(10) # Saving the config file is down further at the bottom, as soon as we get # hold of the new optimizer directory # wrapping_dir = os.path.dirname(os.path.realpath(__file__)) # Load optimizer try: optimizer_dir = os.path.dirname(os.path.realpath(optimizer_version)) optimizer_module = imp.load_source(optimizer_dir, optimizer_version + ".py") except (ImportError, IOError): logger.critical("Optimizer module %s not found", optimizer) import traceback logger.critical(traceback.format_exc()) sys.exit(1) # So the optimizer module can acces the seed from the config and config.set("HPOLIB", "seed", str(args.seed)) experiment_directory_prefix = config.get("HPOLIB", "experiment_directory_prefix") optimizer_call, optimizer_dir_in_experiment = \ opt_obj.main(config=config, options=args, experiment_dir=experiment_dir) # experiment_directory_prefix=experiment_directory_prefix) cmd = optimizer_call # Start the server for logging from subprocesses here, because its port must # be written to the config file. logging_host = config.get("HPOLIB", "logging_host") if logging_host: logging_receiver_thread = None default_logging_port = DEFAULT_TCP_LOGGING_PORT for logging_port in range(default_logging_port, 65535): try: logging_receiver = logging_server.LoggingReceiver( host=logging_host, port=logging_port, handler=logging_server.LogRecordStreamHandler) logging_receiver_thread = Thread( target=logging_receiver.serve_forever) logging_receiver_thread.daemon = True logger.info('%s started at %s' % (logging_receiver.__class__.__name__, logging_receiver.server_address)) logging_receiver_thread.start() break # TODO I did not find any useful documentation about which Exceptions # I should catch here... except Exception as e: logger.debug(e) logger.debug(e.message) if logging_receiver_thread is None: logger.critical( "Could not create the logging server. Going to shut " "down.") sys.exit(1) config.set("HPOLIB", "logging_port", str(logging_port)) with open(os.path.join(optimizer_dir_in_experiment, "config.cfg"), "w") as f: config.set("HPOLIB", "is_not_original_config_file", "True") wrapping_util.save_config_to_file(f, config, write_nones=True) # initialize/reload pickle file if args.restore: try: os.remove( os.path.join(optimizer_dir_in_experiment, optimizer + ".pkl.lock")) except OSError: pass folds = config.getint('HPOLIB', 'number_cv_folds') trials = Experiment.Experiment( expt_dir=optimizer_dir_in_experiment, expt_name=experiment_directory_prefix + optimizer, folds=folds, max_wallclock_time=config.get('HPOLIB', 'cpu_limit'), title=args.title) trials.optimizer = optimizer_version optimizer_output_file = os.path.join( optimizer_dir_in_experiment, optimizer + wrapping_util.get_time_string() + "_" + str(args.seed) + ".out") if args.restore: # noinspection PyBroadException try: restored_runs = optimizer_module.restore( config=config, optimizer_dir=optimizer_dir_in_experiment, cmd=cmd) except: logger.critical("Could not restore runs for %s", args.restore) import traceback logger.critical(traceback.format_exc()) sys.exit(1) logger.info("Restored %d runs", restored_runs) trials.remove_all_but_first_runs(restored_runs) fh = open(optimizer_output_file, "a") fh.write("#" * 80 + "\n" + "Restart! Restored %d runs.\n" % restored_runs) fh.close() if len(trials.endtime) < len(trials.starttime): trials.endtime.append(trials.cv_endtime[-1]) trials.starttime.append(time.time()) else: trials.starttime.append(time.time()) # noinspection PyProtectedMember trials._save_jobs() del trials sys.stdout.flush() # Run call if args.printcmd: logger.info(cmd) return 0 else: # Create a second formatter and handler to customize the optimizer # output optimization_formatter = logging.Formatter( '[%(levelname)s] [%(asctime)s:%(optimizer)s] %(message)s', datefmt='%H:%M:%S') optimization_handler = logging.StreamHandler(sys.stdout) optimization_handler.setFormatter(optimization_formatter) optimization_logger = logging.getLogger(optimizer) optimization_logger.addHandler(optimization_handler) optimizer_loglevel = config.getint("HPOLIB", "optimizer_loglevel") optimization_logger.setLevel(optimizer_loglevel) # Use a flag which is set to true as soon as all children are # supposed to be killed exit_ = wrapping_util.Exit() signal.signal(signal.SIGTERM, exit_.signal_callback) signal.signal(signal.SIGABRT, exit_.signal_callback) signal.signal(signal.SIGINT, exit_.signal_callback) signal.signal(signal.SIGHUP, exit_.signal_callback) # Change into the current experiment directory # Some optimizer might expect this dir_before_exp = os.getcwd() temporary_output_dir = config.get("HPOLIB", "temporary_output_directory") if temporary_output_dir: last_part = os.path.split(optimizer_dir_in_experiment)[1] temporary_output_dir = os.path.join(temporary_output_dir, last_part) # Replace any occurence of the path in the command cmd = cmd.replace(optimizer_dir_in_experiment, temporary_output_dir) optimizer_output_file = optimizer_output_file.replace( optimizer_dir_in_experiment, temporary_output_dir) shutil.copytree(optimizer_dir_in_experiment, temporary_output_dir) # shutil.rmtree does not work properly with NFS # https://github.com/hashdist/hashdist/issues/113 # Idea from https://github.com/ahmadia/hashdist/ for rmtree_iter in range(5): try: shutil.rmtree(optimizer_dir_in_experiment) break except OSError, e: time.sleep(rmtree_iter) optimizer_dir_in_experiment = temporary_output_dir # call target_function.setup() fn_setup = config.get("HPOLIB", "function_setup") if fn_setup: # if temporary_output_dir: # logger.critical("The options 'temporary_output_directory' " # "and 'function_setup' cannot be used " # "together.") # sys.exit(1) fn_setup_output = os.path.join(optimizer_dir_in_experiment, "function_setup.out") runsolver_cmd = runsolver_wrapper._make_runsolver_command( config, fn_setup_output) setup_cmd = runsolver_cmd + " " + fn_setup # runsolver_output = subprocess.STDOUT runsolver_output = open("/dev/null") runsolver_wrapper._run_command_with_shell(setup_cmd, runsolver_output) os.chdir(optimizer_dir_in_experiment) logger.info(cmd) output_file = optimizer_output_file fh = open(output_file, "a") cmd = shlex.split(cmd) print cmd # See man 7 credentials for the meaning of a process group id # This makes wrapping.py useable with SGEs default behaviour, # where qdel sends a SIGKILL to a whole process group # logger.info(os.getpid()) # os.setpgid(os.getpid(), os.getpid()) # same as os.setpgid(0, 0) # TODO: figure out why shell=True was removed in commit f47ac4bb3ffe7f70b795d50c0828ca7e109d2879 # maybe it has something todo with the previous behaviour where a # session id was set... proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) global child_process_pid child_process_pid = proc.pid process = psutil.Process(os.getpid()) logger.info( "-----------------------RUNNING----------------------------------") # http://stackoverflow.com/questions/375427/non-blocking-read-on-a-subprocess-pipe-in-python # How often is the experiment pickle supposed to be opened? if config.get("HPOLIB", "total_time_limit"): optimizer_end_time = time.time() + config.getint( "HPOLIB", "total_time_limit") else: optimizer_end_time = sys.float_info.max sent_SIGINT = False sent_SIGINT_time = np.inf sent_SIGTERM = False sent_SIGTERM_time = np.inf sent_SIGKILL = False sent_SIGKILL_time = np.inf children_to_kill = list() def enqueue_output(out, queue): for line in iter(out.readline, b''): queue.put(line) out.close() stderr_queue = Queue() stdout_queue = Queue() stderr_thread = Thread(target=enqueue_output, args=(proc.stderr, stderr_queue)) stdout_thread = Thread(target=enqueue_output, args=(proc.stdout, stdout_queue)) stderr_thread.daemon = True stdout_thread.daemon = True stderr_thread.start() stdout_thread.start() if not (args.verbose or args.silent): logger.info('Optimizer runs with PID: %d', proc.pid) logger.info('We start in directory %s', os.getcwd()) while True: # this implements the total runtime limit if time.time() > optimizer_end_time and not sent_SIGINT: logger.info("Reached total_time_limit, going to shutdown.") exit_.true() # necessary, otherwise HPOlib-run takes 100% of one processor time.sleep(0.25) try: while True: line = stdout_queue.get_nowait() fh.write(line) fh.flush() optimization_logger.info(line.replace("\n", ""), extra={'optimizer': optimizer}) except Empty: pass try: while True: line = stderr_queue.get_nowait() fh.write(line) fh.flush() optimization_logger.error(line.replace("\n", ""), extra={'optimizer': optimizer}) except Empty: pass ret = proc.poll() if ret is not None: # This does not include wrapping.py children = process.children() if len(children) == 0: break # TODO: what happens if we have a ret but something is still # running? if exit_.get_exit() == True and not sent_SIGINT: logger.critical("Shutdown procedure: Sending SIGINT") wrapping_util.kill_processes(signal.SIGINT) sent_SIGINT_time = time.time() sent_SIGINT = True if exit_.get_exit() == True and not sent_SIGTERM and time.time() \ > sent_SIGINT_time + 5: logger.critical("Shutdown procedure: Sending SIGTERM") wrapping_util.kill_processes(signal.SIGTERM) sent_SIGTERM_time = time.time() sent_SIGTERM = True if exit_.get_exit() == True and not sent_SIGKILL and time.time() \ > sent_SIGTERM_time + 5: logger.critical("Shutdown procedure: Sending SIGKILL") wrapping_util.kill_processes(signal.SIGKILL) sent_SIGKILL_time = time.time() sent_SIGKILL = True logger.info( "-----------------------END--------------------------------------") ret = proc.returncode logger.info("Finished with return code: %d", ret) del proc fh.close() # Change back into to directory os.chdir(dir_before_exp) # call target_function.setup() fn_teardown = config.get("HPOLIB", "function_teardown") if fn_teardown: # if temporary_output_dir: # logger.critical("The options 'temporary_output_directory' " # "and 'function_teardown' cannot be used " # "together.") # sys.exit(1) fn_teardown_output = os.path.join(optimizer_dir_in_experiment, "function_teardown.out") runsolver_cmd = runsolver_wrapper._make_runsolver_command( config, fn_teardown_output) teardown_cmd = runsolver_cmd + " " + fn_teardown # runsolver_output = subprocess.STDOUT runsolver_output = open("/dev/null") runsolver_wrapper._run_command_with_shell(teardown_cmd, runsolver_output) if temporary_output_dir: # We cannot be sure that the directory # optimizer_dir_in_experiment in dir_before_exp got deleted # properly, therefore we append an underscore to the end of the # filename last_part = os.path.split(optimizer_dir_in_experiment)[1] new_dir = os.path.join(dir_before_exp, last_part) try: shutil.copytree(optimizer_dir_in_experiment, new_dir) except OSError as e: new_dir += "_" shutil.copytree(optimizer_dir_in_experiment, new_dir) # shutil.rmtree does not work properly with NFS # https://github.com/hashdist/hashdist/issues/113 # Idea from https://github.com/ahmadia/hashdist/ for rmtree_iter in range(5): try: shutil.rmtree(optimizer_dir_in_experiment) break except OSError, e: time.sleep(rmtree_iter) optimizer_dir_in_experiment = new_dir
new_dir += "_" shutil.copytree(optimizer_dir_in_experiment, new_dir) # shutil.rmtree does not work properly with NFS # https://github.com/hashdist/hashdist/issues/113 # Idea from https://github.com/ahmadia/hashdist/ for rmtree_iter in range(5): try: shutil.rmtree(optimizer_dir_in_experiment) break except OSError, e: time.sleep(rmtree_iter) optimizer_dir_in_experiment = new_dir trials = Experiment.Experiment(optimizer_dir_in_experiment, experiment_directory_prefix + optimizer) trials.endtime.append(time.time()) # noinspection PyProtectedMember trials._save_jobs() # trials.finish_experiment() total_time = 0 logger.info("Best result %f", trials.get_best()) logger.info("Durations") try: for starttime, endtime in zip(trials.starttime, trials.endtime): total_time += endtime - starttime logger.info(" Needed a total of %f seconds", total_time) logger.info(" The optimizer %s took %10.5f seconds", optimizer, float(calculate_optimizer_time(trials))) logger.info(" The overhead of HPOlib is %f seconds", calculate_wrapping_overhead(trials))
def main(): """Start an optimization of the HPOlib. For documentation see the comments inside this function and the general HPOlib documentation.""" args, unknown_arguments = use_arg_parser() # Convert the path to the optimizer to be an absolute path, which is # necessary later when we change the working directory optimizer = args.optimizer if not os.path.isabs(optimizer): relative_path = optimizer optimizer = os.path.abspath(optimizer) logger.info( "Converting relative optimizer path %s to absolute " "optimizer path %s.", relative_path, optimizer) if args.working_dir: os.chdir(args.working_dir) experiment_dir = os.getcwd() check_before_start.check_first(experiment_dir) # Now we can safely import non standard things import numpy as np import HPOlib.Experiment as Experiment # Wants numpy and scipy # Check how many optimizer versions are present and if all dependencies # are installed optimizer_version = check_before_start.check_optimizer(optimizer) logger.warning( "You called -o %s, I am using optimizer defined in " "%sDefault.cfg", optimizer, optimizer_version) optimizer = os.path.basename(optimizer_version) config = wrapping_util.get_configuration(experiment_dir, optimizer_version, unknown_arguments) # Saving the config file is down further at the bottom, as soon as we get # hold of the new optimizer directory wrapping_dir = os.path.dirname(os.path.realpath(__file__)) # Load optimizer try: optimizer_dir = os.path.dirname(os.path.realpath(optimizer_version)) optimizer_module = imp.load_source(optimizer_dir, optimizer_version + ".py") except (ImportError, IOError): logger.critical("Optimizer module %s not found", optimizer) import traceback logger.critical(traceback.format_exc()) sys.exit(1) experiment_directory_prefix = config.get("HPOLIB", "experiment_directory_prefix") optimizer_call, optimizer_dir_in_experiment = optimizer_module.main( config=config, options=args, experiment_dir=experiment_dir, experiment_directory_prefix=experiment_directory_prefix) cmd = optimizer_call config.set("HPOLIB", "seed", str(args.seed)) with open(os.path.join(optimizer_dir_in_experiment, "config.cfg"), "w") as f: config.set("HPOLIB", "is_not_original_config_file", "True") wrapping_util.save_config_to_file(f, config, write_nones=True) # initialize/reload pickle file if args.restore: try: os.remove( os.path.join(optimizer_dir_in_experiment, optimizer + ".pkl.lock")) except OSError: pass folds = config.getint('HPOLIB', 'number_cv_folds') trials = Experiment.Experiment(optimizer_dir_in_experiment, experiment_directory_prefix + optimizer, folds=folds, max_wallclock_time=config.get( 'HPOLIB', 'cpu_limit'), title=args.title) trials.optimizer = optimizer_version if args.restore: #noinspection PyBroadException try: restored_runs = optimizer_module.restore( config=config, optimizer_dir=optimizer_dir_in_experiment, cmd=cmd) except: logger.critical("Could not restore runs for %s", args.restore) import traceback logger.critical(traceback.format_exc()) sys.exit(1) logger.info("Restored %d runs", restored_runs) trials.remove_all_but_first_runs(restored_runs) fh = open( os.path.join(optimizer_dir_in_experiment, optimizer + ".out"), "a") fh.write("#" * 80 + "\n" + "Restart! Restored %d runs.\n" % restored_runs) fh.close() if len(trials.endtime) < len(trials.starttime): trials.endtime.append(trials.cv_endtime[-1]) trials.starttime.append(time.time()) else: trials.starttime.append(time.time()) #noinspection PyProtectedMember trials._save_jobs() del trials sys.stdout.flush() # Run call if args.printcmd: logger.info(cmd) return 0 else: # call target_function.setup() fn_setup = config.get("HPOLIB", "function_setup") if fn_setup: try: logger.info(fn_setup) fn_setup = shlex.split(fn_setup) output = subprocess.check_output(fn_setup, stderr=subprocess.STDOUT) #, #shell=True, executable="/bin/bash") logger.debug(output) except subprocess.CalledProcessError as e: logger.critical(e.output) sys.exit(1) except OSError as e: logger.critical(e.message) logger.critical(e.filename) sys.exit(1) logger.info(cmd) output_file = os.path.join(optimizer_dir_in_experiment, optimizer + ".out") fh = open(output_file, "a") cmd = shlex.split(cmd) print cmd # Use a flag which is set to true as soon as all children are # supposed to be killed exit_ = Exit() signal.signal(signal.SIGTERM, exit_.signal_callback) signal.signal(signal.SIGABRT, exit_.signal_callback) signal.signal(signal.SIGINT, exit_.signal_callback) signal.signal(signal.SIGHUP, exit_.signal_callback) # Change into the current experiment directory # Some optimizer might expect this dir_before_exp = os.getcwd() os.chdir(optimizer_dir_in_experiment) # See man 7 credentials for the meaning of a process group id # This makes wrapping.py useable with SGEs default behaviour, # where qdel sends a SIGKILL to a whole process group logger.info(os.getpid()) os.setpgid(os.getpid(), os.getpid()) # TODO: figure out why shell=True was removed in commit f47ac4bb3ffe7f70b795d50c0828ca7e109d2879 # maybe it has something todo with the previous behaviour where a # session id was set... proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) global child_process_pid child_process_pid = proc.pid logger.info( "-----------------------RUNNING----------------------------------") # http://stackoverflow.com/questions/375427/non-blocking-read-on-a-subprocess-pipe-in-python # How often is the experiment pickle supposed to be opened? if config.get("HPOLIB", "total_time_limit"): optimizer_end_time = time.time() + config.getint( "HPOLIB", "total_time_limit") else: optimizer_end_time = sys.float_info.max console_output_delay = config.getfloat("HPOLIB", "console_output_delay") printed_start_configuration = list() printed_end_configuration = list() sent_SIGINT = False sent_SIGINT_time = np.inf sent_SIGTERM = False sent_SIGTERM_time = np.inf sent_SIGKILL = False sent_SIGKILL_time = np.inf def enqueue_output(out, queue): for line in iter(out.readline, b''): queue.put(line) out.close() stderr_queue = Queue() stdout_queue = Queue() stderr_thread = Thread(target=enqueue_output, args=(proc.stderr, stderr_queue)) stdout_thread = Thread(target=enqueue_output, args=(proc.stdout, stdout_queue)) stderr_thread.daemon = True stdout_thread.daemon = True stderr_thread.start() stdout_thread.start() if not (args.verbose or args.silent): lock = thread.allocate_lock() thread.start_new_thread( output_experiment_pickle, (console_output_delay, printed_start_configuration, printed_end_configuration, optimizer_dir_in_experiment, optimizer, experiment_directory_prefix, lock, Experiment, np, False)) logger.info('Optimizer runs with PID: %d', proc.pid) while True: # this implements the total runtime limit if time.time() > optimizer_end_time and not sent_SIGINT: logger.info("Reached total_time_limit, going to shutdown.") exit_.true() # necessary, otherwise HPOlib-run takes 100% of one processor time.sleep(0.2) try: while True: line = stdout_queue.get_nowait() fh.write(line) # Write to stdout only if verbose is on if args.verbose: sys.stdout.write(line) sys.stdout.flush() except Empty: pass try: while True: line = stderr_queue.get_nowait() fh.write(line) # Write always, except silent is on if not args.silent: sys.stderr.write("[ERR]:" + line) sys.stderr.flush() except Empty: pass ret = proc.poll() running = get_all_p_for_pgid() if ret is not None and len(running) == 0: break # TODO: what happens if we have a ret but something is still # running? if exit_.get_exit() == True and not sent_SIGINT: logger.info("Sending SIGINT") kill_children(signal.SIGINT) sent_SIGINT_time = time.time() sent_SIGINT = True if exit_.get_exit() == True and not sent_SIGTERM and time.time() \ > sent_SIGINT_time + 100: logger.info("Sending SIGTERM") kill_children(signal.SIGTERM) sent_SIGTERM_time = time.time() sent_SIGTERM = True if exit_.get_exit() == True and not sent_SIGKILL and time.time() \ > sent_SIGTERM_time + 100: logger.info("Sending SIGKILL") kill_children(signal.SIGKILL) sent_SIGKILL_time = time.time() sent_SIGKILL = True ret = proc.returncode del proc if not (args.verbose or args.silent): output_experiment_pickle(console_output_delay, printed_start_configuration, printed_end_configuration, optimizer_dir_in_experiment, optimizer, experiment_directory_prefix, lock, Experiment, np, True) logger.info( "-----------------------END--------------------------------------") fh.close() # Change back into to directory os.chdir(dir_before_exp) # call target_function.teardown() fn_teardown = config.get("HPOLIB", "function_teardown") if fn_teardown: try: fn_teardown = shlex.split(fn_teardown) output = subprocess.check_output(fn_teardown, stderr=subprocess.STDOUT) #, #shell=True, executable="/bin/bash") except subprocess.CalledProcessError as e: logger.critical(e.output) sys.exit(1) except OSError as e: logger.critical(e.message) logger.critical(e.filename) sys.exit(1) trials = Experiment.Experiment(optimizer_dir_in_experiment, experiment_directory_prefix + optimizer) trials.endtime.append(time.time()) #noinspection PyProtectedMember trials._save_jobs() # trials.finish_experiment() total_time = 0 logger.info("Best result") logger.info(trials.get_best()) logger.info("Durations") try: for starttime, endtime in zip(trials.starttime, trials.endtime): total_time += endtime - starttime logger.info("Needed a total of %f seconds", total_time) logger.info("The optimizer %s took %10.5f seconds", optimizer, float(calculate_optimizer_time(trials))) logger.info("The overhead of HPOlib is %f seconds", calculate_wrapping_overhead(trials)) logger.info("The benchmark itself took %f seconds" % \ trials.total_wallclock_time) except Exception as e: logger.error(HPOlib.wrapping_util.format_traceback(sys.exc_info())) logger.error( "Experiment itself went fine, but calculating " "durations of optimization failed: %s %s", sys.exc_info()[0], e) del trials logger.info("Finished with return code: " + str(ret)) return ret
def test_set_one_fold_crashed(self): experiment = Experiment.Experiment(".", "test_exp", folds=1) experiment.add_job({"x": 0}) experiment.set_one_fold_running(0, 0) experiment.set_one_fold_crashed(0, 0, 1000, 0)
def test_status_getters(self): experiment = Experiment.Experiment(".", "test_exp", folds=2) # Candidate jobs experiment.add_job({"x": "0"}) experiment.add_job({"x": "1"}) # Complete jobs experiment.add_job({"x": "2"}) experiment.set_one_fold_running(2, 0) experiment.set_one_fold_complete(2, 0, 1, 1) experiment.set_one_fold_running(2, 1) experiment.set_one_fold_complete(2, 1, 1, 1) experiment.add_job({"x": "3"}) experiment.set_one_fold_running(3, 0) experiment.set_one_fold_complete(3, 0, 1, 1) experiment.set_one_fold_running(3, 1) experiment.set_one_fold_complete(3, 1, 1, 1) # Incomplete jobs experiment.add_job({"x": "4"}) experiment.set_one_fold_running(4, 0) experiment.set_one_fold_complete(4, 0, 1, 1) experiment.add_job({"x": "5"}) experiment.set_one_fold_running(5, 0) experiment.set_one_fold_complete(5, 0, 1, 1) # Running Jobs experiment.add_job({"x": "6"}) experiment.set_one_fold_running(6, 0) experiment.add_job({"x": "7"}) experiment.set_one_fold_running(7, 0) # Broken Jobs experiment.add_job({"x": "8"}) experiment.set_one_fold_running(8, 0) experiment.set_one_fold_crashed(8, 0, 1000, 1) experiment.add_job({"x": "9"}) experiment.set_one_fold_running(9, 0) experiment.set_one_fold_crashed(9, 0, 1000, 1) experiment.set_one_fold_running(9, 1) experiment.set_one_fold_crashed(9, 1, 1000, 1) self.assertEqual(len(experiment.get_candidate_jobs()), 2) self.assertEqual(len(experiment.get_complete_jobs()), 2) self.assertEqual(len(experiment.get_incomplete_jobs()), 3) self.assertEqual(len(experiment.get_running_jobs()), 2) self.assertEqual(len(experiment.get_broken_jobs()), 1) self.assertEqual(experiment.trials[9]['result'], 1000) self.assertNotEqual(experiment.trials[8]['result'], np.NaN) # and now the same thing for getting the test results... # Complete jobs experiment.set_one_test_fold_running(2, 0) experiment.set_one_test_fold_complete(2, 0, 1, 1) self.assertRaises(ValueError, experiment.set_one_test_fold_running, 2, 1) self.assertRaises(ValueError, experiment.set_one_test_fold_complete, 2, 1, 1, 1) # Running Jobs experiment.set_one_test_fold_running(6, 0) experiment.set_one_test_fold_running(7, 0) # Broken Jobs experiment.set_one_test_fold_running(8, 0) experiment.set_one_test_fold_crashed(8, 0, 1000, 1) experiment.set_one_test_fold_running(9, 0) experiment.set_one_test_fold_crashed(9, 0, 1000, 1) self.assertEqual(len(experiment.get_candidate_test_jobs()), 5) self.assertEqual(len(experiment.get_complete_test_jobs()), 1) # Actually, this cannot happen right now... self.assertEqual(len(experiment.get_incomplete_test_jobs()), 0) self.assertEqual(len(experiment.get_running_test_jobs()), 2) self.assertEqual(len(experiment.get_broken_test_jobs()), 2) self.assertEqual(experiment.trials[9]['test_result'], 1000) self.assertNotEqual(experiment.trials[8]['test_result'], np.NaN)
def main(): """Test the best algorithm of a previous HPOlib optimization run.""" formatter = logging.Formatter('[%(levelname)s] [%(asctime)s:%(name)s] %(' 'message)s', datefmt='%H:%M:%S') handler = logging.StreamHandler() handler.setFormatter(formatter) hpolib_logger.addHandler(handler) args, unknown_arguments = use_arg_parser() if args.working_dir: experiment_dir = args.working_dir else: experiment_dir = os.getcwd() config = wrapping_util.get_configuration(experiment_dir, None, unknown_arguments) log_level = config.getint("HPOLIB", "HPOlib_loglevel") hpolib_logger.setLevel(log_level) os.chdir(experiment_dir) # TODO check if the testing directory exists check_before_start.check_first(experiment_dir) # Now we can safely import non standard things import numpy as np global np import HPOlib.Experiment as Experiment # Wants numpy and scipy global Experiment if not config.has_option("HPOLIB", "is_not_original_config_file"): logger.error("The directory you're in seems to be no directory in " "which an HPOlib run was executed: %s" % experiment_dir) exit(1) is_not_original_config_file = config.get("HPOLIB", "is_not_original_config_file") if not is_not_original_config_file: logger.error("The directory you're in seems to be no directory in " "which an HPOlib run was executed: %s" % experiment_dir) exit(1) if not config.has_option("HPOLIB", "test_function"): logger.error("The configuration file does not define a test " "function.") exit(1) experiment_directory_prefix = config.get("HPOLIB", "experiment_directory_prefix") optimizer = wrapping_util.get_optimizer() # This is a really bad hack... optimizer = optimizer.replace(experiment_directory_prefix, "") trials = Experiment.Experiment(expt_dir=".", expt_name=experiment_directory_prefix + optimizer) # TODO: do we need a setup for the testing? fn_setup = config.get("HPOLIB", "function_setup") if fn_setup: fn_setup_output = os.path.join(os.getcwd(), "function_setup.out") runsolver_cmd = runsolver_wrapper._make_runsolver_command( config, fn_setup_output) setup_cmd = runsolver_cmd + " " + fn_setup #runsolver_output = subprocess.STDOUT runsolver_output = open("/dev/null") runsolver_wrapper._run_command_with_shell(setup_cmd, runsolver_output) configurations_to_test = [] # Find the configurations to test on! if args.all: for idx in range(len(trials.trials)): configurations_to_test.append(idx) if args.redo_runs: trials.clean_test_outputs(idx) elif args.best: id_ = trials.get_arg_best(consider_incomplete=False) configurations_to_test.append(id_) trials.clean_test_outputs(id_) elif args.trajectory: raise NotImplementedError("Evaluating the runs along a trajectory is " "not implemented yet!") else: raise ValueError() trials._save_jobs() del trials pool = multiprocessing.Pool(processes=args.n_jobs) outputs = [] for id_ in configurations_to_test: for fold in range(1): outputs.append(pool.apply_async(run_test, [config, experiment_directory_prefix, fold, id_, optimizer])) pool.close() pool.join() # Look at the return states of the run_test function num_errors = 0 num_skip = 0 num_calculated = 0 for output in outputs: _value = output._value if _value < 0: num_errors += 1 elif _value == 0: num_skip += 1 else: num_calculated += 1 logger.info("Finished testing HPOlib runs.") logger.info("Errors: %d Skipped: %d Tested: %d" % (num_errors, num_skip, num_calculated)) # TODO: do we need a teardown for testing? fn_teardown = config.get("HPOLIB", "function_teardown") if fn_teardown: fn_teardown_output = os.path.join(os.getcwd(), "function_teardown.out") runsolver_cmd = runsolver_wrapper._make_runsolver_command( config, fn_teardown_output) teardown_cmd = runsolver_cmd + " " + fn_teardown runsolver_output = open("/dev/null") runsolver_wrapper._run_command_with_shell(teardown_cmd, runsolver_output) trials = Experiment.Experiment(expt_dir=".", expt_name=experiment_directory_prefix + optimizer) trials.endtime.append(time.time()) trials._save_jobs() del trials logger.info("Finished HPOlib-testbest.") return 0
def test_get_trial_index_cv(self): try: os.remove("test_get_trial_index.pkl") except OSError: pass try: os.remove("test_get_trial_index.pkl.lock") except OSError: pass experiment = Experiment.Experiment(".", "test_get_trial_index", folds=5) params0 = {"x": "1"} params1 = {"x": "2"} params2 = {"x": "3"} params3 = {"x": "4"} params4 = {"x": "5"} trial_index0 = optimization_interceptor.get_trial_index( experiment, 0, params0) self.assertEqual(trial_index0, 0) experiment.set_one_fold_running(trial_index0, 0) experiment.set_one_fold_complete(trial_index0, 0, 1, 1) self.assertEqual( trial_index0, optimization_interceptor.get_trial_index(experiment, 1, params0)) experiment.set_one_fold_running(trial_index0, 1) experiment.set_one_fold_complete(trial_index0, 1, 1, 1) self.assertEqual( trial_index0, optimization_interceptor.get_trial_index(experiment, 2, params0)) experiment.set_one_fold_running(trial_index0, 2) experiment.set_one_fold_complete(trial_index0, 2, 1, 1) self.assertEqual( trial_index0, optimization_interceptor.get_trial_index(experiment, 3, params0)) experiment.set_one_fold_running(trial_index0, 3) experiment.set_one_fold_complete(trial_index0, 3, 1, 1) self.assertEqual( trial_index0, optimization_interceptor.get_trial_index(experiment, 4, params0)) experiment.set_one_fold_running(trial_index0, 4) experiment.set_one_fold_complete(trial_index0, 4, 1, 1) trial_index1 = optimization_interceptor.get_trial_index( experiment, 0, params1) self.assertEqual(trial_index1, 1) experiment.set_one_fold_running(trial_index1, 0) experiment.set_one_fold_complete(trial_index1, 0, 1, 1) self.assertEqual( trial_index1, optimization_interceptor.get_trial_index(experiment, 1, params1)) experiment.set_one_fold_running(trial_index1, 1) experiment.set_one_fold_complete(trial_index1, 1, 1, 1) self.assertEqual( trial_index1, optimization_interceptor.get_trial_index(experiment, 2, params1)) experiment.set_one_fold_running(trial_index1, 2) experiment.set_one_fold_complete(trial_index1, 2, 1, 1) self.assertEqual( trial_index1, optimization_interceptor.get_trial_index(experiment, 3, params1)) experiment.set_one_fold_running(trial_index1, 3) experiment.set_one_fold_complete(trial_index1, 3, 1, 1) self.assertEqual( trial_index1, optimization_interceptor.get_trial_index(experiment, 4, params1)) experiment.set_one_fold_running(trial_index1, 4) experiment.set_one_fold_complete(trial_index1, 4, 1, 1) trial_index2 = optimization_interceptor.get_trial_index( experiment, 0, params2) self.assertEqual(trial_index2, 2) experiment.set_one_fold_running(trial_index2, 0) experiment.set_one_fold_complete(trial_index2, 0, 1, 1) trial_index3 = optimization_interceptor.get_trial_index( experiment, 0, params3) self.assertEqual(trial_index3, 3) experiment.set_one_fold_running(trial_index3, 0) experiment.set_one_fold_complete(trial_index3, 0, 1, 1) trial_index4 = optimization_interceptor.get_trial_index( experiment, 0, params4) self.assertEqual(trial_index4, 4) experiment.set_one_fold_running(trial_index4, 0) experiment.set_one_fold_complete(trial_index4, 0, 1, 1) self.assertEqual( trial_index2, optimization_interceptor.get_trial_index(experiment, 3, params2)) self.assertEqual( trial_index4, optimization_interceptor.get_trial_index(experiment, 4, params4)) # Since params1 were already evaluated, this should be a new trial_index trial_index_test1 = optimization_interceptor.get_trial_index( experiment, 0, params1) self.assertEqual(trial_index_test1, 5)
def test_get_arg_best_no_results(self): experiment = Experiment.Experiment(".", "test_exp", folds=2) [experiment.add_job({"x": i}) for i in range(10)] self.assertRaises(ValueError, experiment.get_arg_best)