def main(arguments, parameters, fold): """ If we are not called from cv means we are called from CLI. This means the optimizer itself handles crossvalidation (smac). To keep a nice .pkl we have to do some bookkeeping here """ cfg = wrapping_util.load_experiment_config_file() dispatch_function_name = cfg.get("HPOLIB", "dispatcher") dispatch_function_name = re.sub("(\.py)$", "", dispatch_function_name) try: dispatch_function = importlib.import_module("HPOlib.dispatcher.%s" % dispatch_function_name) additional_data, result, status, wallclock_time = \ dispatch_function.dispatch(cfg, fold, parameters) except ImportError: additional_data = "" result = float("NaN") status = "CRASHED" wallclock_time = 0. logger.error("Invalid value %s for HPOLIB:dispatcher" % dispatch_function_name) return status, wallclock_time, result, additional_data
def main(job_id, params): """Implement the Spearmint interface and then call HPOlib""" cfg = load_experiment_config_file() log_level = cfg.getint("HPOLIB", "HPOlib_loglevel") logging.basicConfig(format='[%(levelname)s] [%(asctime)s:%(name)s] %(' 'message)s', datefmt='%H:%M:%S') logger.setLevel(log_level) cli_target = "HPOlib.optimization_interceptor" result = command_line_function(params, cli_target) return result
def main(): """Implement the SMAC interface and then call HPOlib""" cfg = load_experiment_config_file() log_level = cfg.getint("HPOLIB", "HPOlib_loglevel") logging.basicConfig(format='[%(levelname)s] [%(asctime)s:%(name)s] %(' 'message)s', datefmt='%H:%M:%S') logger.setLevel(log_level) cli_target = "HPOlib.optimization_interceptor" fold, seed = parse_command_line() params = get_parameters() result, runtime = command_line_function(params, fold, cli_target) print format_return_string("SAT", runtime, 1, result, seed, "") return result
def main(*args, **kwargs): logger.critical('args: %s kwargs: %s', str(args), str(kwargs)) params = None if 'params' in kwargs.keys(): params = kwargs['params'] else: for arg in args: if type(arg) == dict: params = arg break if params is None: logger.critical( "No parameter dict found in cv.py.\n" "args: %s\n kwargs: %s", args, kwargs) # TODO: Hack for TPE and AUTOWeka params = args # Load the experiment to do time-keeping cv_starttime = time.time() experiment = load_experiment_file() # experiment.next_iteration() experiment.start_cv(cv_starttime) del experiment # cfg_filename = "config.cfg" cfg = load_experiment_config_file() # Load number of folds folds = cfg.getint('HPOLIB', 'number_cv_folds') params = flatten_parameter_dict(params) res = do_cv(params, folds=folds) logger.info("Result: %f", res) # Load the experiment to do time-keeping experiment = load_experiment_file() experiment.end_cv(time.time()) del experiment return res
def main(*args, **kwargs): logger.critical('args: %s kwargs: %s', str(args), str(kwargs)) params = None if 'params' in kwargs.keys(): params = kwargs['params'] else: for arg in args: if type(arg) == dict: params = arg break if params is None: logger.critical("No parameter dict found in cv.py.\n" "args: %s\n kwargs: %s", args, kwargs) # TODO: Hack for TPE and AUTOWeka params = args # Load the experiment to do time-keeping cv_starttime = time.time() experiment = load_experiment_file() # experiment.next_iteration() experiment.start_cv(cv_starttime) del experiment # cfg_filename = "config.cfg" cfg = load_experiment_config_file() # Load number of folds folds = cfg.getint('HPOLIB', 'number_cv_folds') params = flatten_parameter_dict(params) res = do_cv(params, folds=folds) logger.info("Result: %f", res) # Load the experiment to do time-keeping experiment = load_experiment_file() experiment.end_cv(time.time()) del experiment return res
def do_cv(params, folds=10): logger.info("Starting Cross validation") sys.stdout.flush() optimizer = get_optimizer() cfg = load_experiment_config_file() # Store the results to hand them back to tpe and spearmint results = [] try: logger.info("%s", params) param_array = [ "-" + str(param_name) + " " + str(params[param_name]) for param_name in params ] param_string = " ".join(param_array) for fold in range(folds): # "Usage: runsolver_wrapper <instancename> " + \ # "<instancespecificinformation> <cutofftime> <cutofflength> " + \ # "<seed> <param> <param> <param>" # Cutofftime, cutofflength and seed can be safely ignored since they # are read in runsolver_wrapper runsolver_wrapper_script = "python " + \ os.path.join(os.path.dirname(os.path.realpath(__file__)), "runsolver_wrapper.py") cmd = "%s %d %s %d %d %d %s" % \ (runsolver_wrapper_script, fold, optimizer, 0, 0, 0, param_string) logger.info("Calling command:\n%s", cmd) process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, executable="/bin/bash") logger.info( "--------------RUNNING RUNSOLVER_WRAPPER--------------") stdoutdata, stderrdata = process.communicate() if stdoutdata: logger.info(stdoutdata) if stderrdata: logger.error(stderrdata) # Read the runsolver_wrapper output lines = stdoutdata.split("\n") result_string = None for line in lines: pos = line.find("Result for ParamILS: SAT") if pos != -1: result_string = line[pos:] result_array = result_string.split() results.append(float(result_array[6].strip(","))) break if result_string is None: raise NotImplementedError( "No result string available or result string doesn't contain SAT" ) # If a specified number of runs crashed, quit the whole cross validation # in order to save time. worst_possible = cfg.getfloat("HPOLIB", "result_on_terminate") crashed_runs = np.nansum( [0 if res != worst_possible else 1 for res in results]) if crashed_runs >= cfg.getint("HPOLIB", "max_crash_per_cv"): logger.warning("Aborting CV because the number of crashes " "exceeds the configured max_crash_per_cv value") return worst_possible # TODO: Error Handling assert (len(results) == folds) mean = np.mean(results) except Exception as e: logger.error(format_traceback(sys.exc_info())) logger.error("CV failed %s %s", sys.exc_info()[0], e) # status = "CRASHED" # status = "SAT" mean = np.NaN # Do not return any kind of nan because this would break spearmint if not np.isfinite(mean): mean = float(cfg.get("HPOLIB", "result_on_terminate")) logger.info("Finished CV") return mean
def main(): prog = "python statistics.py WhatIsThis <manyPickles> WhatIsThis <manyPickles> [WhatIsThis <manyPickles>]" description = "Return some statistical information" parser = ArgumentParser(description=description, prog=prog) parser.add_argument("-p", "--space", dest="spaceFile", help="Where is the space.py located?") parser.add_argument("-m", "--maxEvals", dest="maxEvals", help="How many evaluations?") parser.add_argument("-s", "--seed", default="1", dest="seed", type=int, help="Seed for the TPE algorithm") parser.add_argument("-r", "--restore", action="store_true", dest="restore", help="When this flag is set state.pkl is restored in " + "the current working directory") parser.add_argument("--random", default=False, action="store_true", dest="random", help="Use a random search") parser.add_argument("--cwd", help="Change the working directory before " "optimizing.") args, unknown = parser.parse_known_args() if args.cwd: os.chdir(args.cwd) cfg = load_experiment_config_file() log_level = cfg.getint("HPOLIB", "HPOlib_loglevel") logging.basicConfig(format='[%(levelname)s] [%(asctime)s:%(name)s] %(' 'message)s', datefmt='%H:%M:%S') logger.setLevel(log_level) if not os.path.exists(args.spaceFile): logger.critical("Search space not found: %s" % args.spaceFile) sys.exit(1) # First remove ".py" space, ext = os.path.splitext(os.path.basename(args.spaceFile)) # Then load dict searchSpace and out function cv.py sys.path.append("./") sys.path.append("") module = import_module(space) search_space = module.space cli_target = "HPOlib.optimization_interceptor" fn = partial(command_line_function, cli_target=cli_target) if args.random: # We use a random search tpe_with_seed = partial(hyperopt.tpe.rand.suggest, seed=int(args.seed)) logger.info("Using Random Search") else: tpe_with_seed = partial(hyperopt.tpe.suggest, seed=int(args.seed)) # Now run TPE, emulate fmin.fmin() state_filename = "state.pkl" if args.restore: # We do not need to care about the state of the trials object since it # is only serialized in a synchronized state, there will never be a save # with a running experiment fh = open(state_filename) tmp_dict = cPickle.load(fh) domain = tmp_dict['domain'] trials = tmp_dict['trials'] print trials.__dict__ else: domain = hyperopt.Domain(fn, search_space, rseed=int(args.seed)) trials = hyperopt.Trials() fh = open(state_filename, "w") # By this we probably loose the seed; not too critical for a restart cPickle.dump({"trials": trials, "domain": domain}, fh) fh.close() for i in range(int(args.maxEvals) + 1): # in exhaust, the number of evaluations is max_evals - num_done rval = hyperopt.FMinIter(tpe_with_seed, domain, trials, max_evals=i) rval.exhaust() fh = open(state_filename, "w") cPickle.dump({"trials": trials, "domain": domain}, fh) fh.close() best = trials.argmin print "Best Value found for params:", best
def main(): """ If we are not called from cv means we are called from CLI. This means the optimizer itself handles crossvalidation (smac). To keep a nice .pkl we have to do some bookkeeping here """ cfg = wrappingUtil.load_experiment_config_file() called_from_cv = True if cfg.getint("HPOLIB", "handles_cv") == 1: # If Our Optimizer can handle crossvalidation, # we are called from CLI. To keep a sane nice .pkl # we have to do some bookkeeping here called_from_cv = False # This has to be done here for SMAC, since smac does not call cv.py if not called_from_cv: cv_starttime = time.time() experiment = load_experiment_file() experiment.start_cv(cv_starttime) del experiment fold, seed = parse_command_line() # Side-effect: removes all additional information like log and applies # transformations to the parameters params = get_parameters() param_string = " ".join([key + " " + str(params[key]) for key in params]) time_string = wrappingUtil.get_time_string() run_instance_output = os.path.join(os.getcwd(), time_string + "_run_instance.out") runsolver_output_file = os.path.join(os.getcwd(), time_string + "_runsolver.out") cmd = make_command(cfg, fold, param_string, run_instance_output) fh = open(runsolver_output_file, "w") experiment = load_experiment_file() # Side-effect: adds a job if it is not yet in the experiments file trial_index = get_trial_index(experiment, fold, params) experiment.set_one_fold_running(trial_index, fold) del experiment # release Experiment lock logger.debug("Calling: %s" % cmd) # sys.stdout.write(cmd + "\n") # sys.stdout.flush() process = subprocess.Popen(cmd, stdout=fh, stderr=fh, shell=True, executable="/bin/bash") logger.info("-----------------------RUNNING RUNSOLVER----------------------------") process.wait() fh.close() cpu_time, wallclock_time, status, result, additional_data = parse_output_files( cfg, run_instance_output, runsolver_output_file ) experiment = load_experiment_file() if status == "SAT": experiment.set_one_fold_complete(trial_index, fold, result, wallclock_time) elif status == "CRASHED" or status == "UNSAT": result = cfg.getfloat("HPOLIB", "result_on_terminate") experiment.set_one_fold_crashed(trial_index, fold, result, wallclock_time) status = "SAT" else: # TODO: We need a global stopping mechanism pass del experiment # release lock return_string = format_return_string(status, wallclock_time, 1, result, seed, additional_data) if not called_from_cv: experiment = load_experiment_file() experiment.end_cv(time.time()) del experiment logger.info(return_string) print return_string return return_string
def main(): prog = "python statistics.py WhatIsThis <manyPickles> WhatIsThis <manyPickles> [WhatIsThis <manyPickles>]" description = "Return some statistical information" parser = ArgumentParser(description=description, prog=prog) parser.add_argument("-p", "--space", dest="spaceFile", help="Where is the space.py located?") parser.add_argument("-m", "--maxEvals", dest="maxEvals", help="How many evaluations?") parser.add_argument("-s", "--seed", default="1", dest="seed", type=int, help="Seed for the TPE algorithm") parser.add_argument( "-r", "--restore", action="store_true", dest="restore", help="When this flag is set state.pkl is restored in " + "the current working directory") parser.add_argument("--random", default=False, action="store_true", dest="random", help="Use a random search") parser.add_argument("--cwd", help="Change the working directory before " "optimizing.") args, unknown = parser.parse_known_args() if args.cwd: os.chdir(args.cwd) cfg = load_experiment_config_file() log_level = cfg.getint("HPOLIB", "HPOlib_loglevel") logging.basicConfig(format='[%(levelname)s] [%(asctime)s:%(name)s] %(' 'message)s', datefmt='%H:%M:%S') logger.setLevel(log_level) if not os.path.exists(args.spaceFile): logger.critical("Search space not found: %s" % args.spaceFile) sys.exit(1) # First remove ".py" space, ext = os.path.splitext(os.path.basename(args.spaceFile)) # Then load dict searchSpace and out function cv.py sys.path.append("./") sys.path.append("") module = import_module(space) search_space = module.space cli_target = "HPOlib.optimization_interceptor" fn = partial(command_line_function, cli_target=cli_target) if args.random: # We use a random search tpe_with_seed = partial(hyperopt.tpe.rand.suggest, seed=int(args.seed)) logger.info("Using Random Search") else: tpe_with_seed = partial(hyperopt.tpe.suggest, seed=int(args.seed)) # Now run TPE, emulate fmin.fmin() state_filename = "state.pkl" if args.restore: # We do not need to care about the state of the trials object since it # is only serialized in a synchronized state, there will never be a save # with a running experiment fh = open(state_filename) tmp_dict = cPickle.load(fh) domain = tmp_dict['domain'] trials = tmp_dict['trials'] print trials.__dict__ else: domain = hyperopt.Domain(fn, search_space, rseed=int(args.seed)) trials = hyperopt.Trials() fh = open(state_filename, "w") # By this we probably loose the seed; not too critical for a restart cPickle.dump({"trials": trials, "domain": domain}, fh) fh.close() for i in range(int(args.maxEvals) + 1): # in exhaust, the number of evaluations is max_evals - num_done rval = hyperopt.FMinIter(tpe_with_seed, domain, trials, max_evals=i) rval.exhaust() fh = open(state_filename, "w") cPickle.dump({"trials": trials, "domain": domain}, fh) fh.close() best = trials.argmin print "Best Value found for params:", best
def do_cv(params, folds=10): logger.info("Starting Cross validation") sys.stdout.flush() optimizer = get_optimizer() cfg = load_experiment_config_file() # Store the results to hand them back to tpe and spearmint results = [] try: logger.info("%s", params) param_array = ["-" + str(param_name) + " " + str(params[param_name]) for param_name in params] param_string = " ".join(param_array) for fold in range(folds): # "Usage: runsolver_wrapper <instancename> " + \ # "<instancespecificinformation> <cutofftime> <cutofflength> " + \ # "<seed> <param> <param> <param>" # Cutofftime, cutofflength and seed can be safely ignored since they # are read in runsolver_wrapper runsolver_wrapper_script = "python " + \ os.path.join(os.path.dirname(os.path.realpath(__file__)), "runsolver_wrapper.py") cmd = "%s %d %s %d %d %d %s" % \ (runsolver_wrapper_script, fold, optimizer, 0, 0, 0, param_string) logger.info("Calling command:\n%s", cmd) process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, executable="/bin/bash") logger.info("--------------RUNNING RUNSOLVER_WRAPPER--------------") stdoutdata, stderrdata = process.communicate() if stdoutdata: logger.info(stdoutdata) if stderrdata: logger.error(stderrdata) # Read the runsolver_wrapper output lines = stdoutdata.split("\n") result_string = None for line in lines: pos = line.find("Result for ParamILS: SAT") if pos != -1: result_string = line[pos:] result_array = result_string.split() results.append(float(result_array[6].strip(","))) break if result_string is None: raise NotImplementedError("No result string available or result string doesn't contain SAT") # If a specified number of runs crashed, quit the whole cross validation # in order to save time. worst_possible = cfg.getfloat("HPOLIB", "result_on_terminate") crashed_runs = np.nansum([0 if res != worst_possible else 1 for res in results]) if crashed_runs >= cfg.getint("HPOLIB", "max_crash_per_cv"): logger.warning("Aborting CV because the number of crashes " "exceeds the configured max_crash_per_cv value") return worst_possible # TODO: Error Handling assert(len(results) == folds) mean = np.mean(results) except Exception as e: logger.error(format_traceback(sys.exc_info())) logger.error("CV failed %s %s", sys.exc_info()[0], e) # status = "CRASHED" # status = "SAT" mean = np.NaN # Do not return any kind of nan because this would break spearmint if not np.isfinite(mean): mean = float(cfg.get("HPOLIB", "result_on_terminate")) logger.info("Finished CV") return mean
def main(): """ If we are not called from cv means we are called from CLI. This means the optimizer itself handles crossvalidation (smac). To keep a nice .pkl we have to do some bookkeeping here """ cfg = wrappingUtil.load_experiment_config_file() called_from_cv = True if cfg.getint('HPOLIB', 'handles_cv') == 1: # If Our Optimizer can handle crossvalidation, # we are called from CLI. To keep a sane nice .pkl # we have to do some bookkeeping here called_from_cv = False # This has to be done here for SMAC, since smac does not call cv.py if not called_from_cv: cv_starttime = time.time() experiment = load_experiment_file() experiment.start_cv(cv_starttime) del experiment fold, seed = parse_command_line() # Side-effect: removes all additional information like log and applies # transformations to the parameters params = get_parameters() param_string = " ".join([key + " " + str(params[key]) for key in params]) time_string = wrappingUtil.get_time_string() run_instance_output = os.path.join(os.getcwd(), time_string + "_run_instance.out") runsolver_output_file = os.path.join(os.getcwd(), time_string + "_runsolver.out") cmd = make_command(cfg, fold, param_string, run_instance_output) fh = open(runsolver_output_file, "w") experiment = load_experiment_file() # Side-effect: adds a job if it is not yet in the experiments file trial_index = get_trial_index(experiment, fold, params) experiment.set_one_fold_running(trial_index, fold) del experiment # release Experiment lock logger.debug("Calling: %s" % cmd) #sys.stdout.write(cmd + "\n") #sys.stdout.flush() process = subprocess.Popen(cmd, stdout=fh, stderr=fh, shell=True, executable="/bin/bash") logger.info( "-----------------------RUNNING RUNSOLVER----------------------------") process.wait() fh.close() cpu_time, wallclock_time, status, result, additional_data = \ parse_output_files(cfg, run_instance_output, runsolver_output_file) experiment = load_experiment_file() if status == "SAT": experiment.set_one_fold_complete(trial_index, fold, result, wallclock_time) elif status == "CRASHED" or status == "UNSAT": result = cfg.getfloat("HPOLIB", "result_on_terminate") experiment.set_one_fold_crashed(trial_index, fold, result, wallclock_time) status = "SAT" else: # TODO: We need a global stopping mechanism pass del experiment # release lock return_string = format_return_string(status, wallclock_time, 1, result, seed, additional_data) if not called_from_cv: experiment = load_experiment_file() experiment.end_cv(time.time()) del experiment logger.info(return_string) print return_string return return_string