def main(config, options, experiment_dir, experiment_directory_prefix, **kwargs): # config: Loaded .cfg file # options: Options containing seed, restore_dir, # experiment_dir: Experiment directory/Benchmark_directory # **kwargs: Nothing so far time_string = wrapping_util.get_time_string() cmd = "" path_to_optimizer = os.path.abspath(os.path.dirname(metalearner.__file__)) # Find experiment directory if options.restore: raise NotImplementedError() else: optimizer_dir = os.path.join(experiment_dir, experiment_directory_prefix + "metalearn_optimizer_" + str(options.seed) + "_" + time_string) # Build call cmd += build_metalearn_call(config, options, optimizer_dir) # Set up experiment directory if not os.path.exists(optimizer_dir): os.mkdir(optimizer_dir) # params = config.get('METALEARNING', 'params') # if not os.path.exists(os.path.join(optimizer_dir, params)): # os.symlink(os.path.join(experiment_dir, "metalearner", params), # os.path.join(optimizer_dir, params)) return cmd, optimizer_dir
def main(config, options, experiment_dir, experiment_directory_prefix, **kwargs): # config: Loaded .cfg file # options: Options containing seed, restore_dir, # experiment_dir: Experiment directory/Benchmark_directory # **kwargs: Nothing so far time_string = wrapping_util.get_time_string() optimizer_str = os.path.splitext(os.path.basename(__file__))[0] # Find experiment directory if options.restore: if not os.path.exists(options.restore): raise Exception("The restore directory does not exist") optimizer_dir = options.restore else: optimizer_dir = os.path.join(experiment_dir, experiment_directory_prefix + optimizer_str + "_" + str(options.seed) + "_" + time_string) # Set up experiment directory if not os.path.exists(optimizer_dir): os.mkdir(optimizer_dir) parent_space = os.path.join(experiment_dir, optimizer_str, "parset.R") if not os.path.exists(parent_space): raise Exception("mlrMBO search space not found. Searched at %s." % (parent_space)) cmd = build_mlrmbo_call(config, options, optimizer_dir, parent_space) logger.info("### INFORMATION ##############################################################################################") logger.info("# You're running %70s #" % config.get('mlrMBO', 'path_to_optimizer')) logger.info("# Parset file %70s #" % parent_space) logger.info("##############################################################################################################") return cmd, optimizer_dir
def main(config, options, experiment_dir, **kwargs): # config: Loaded .cfg file # options: Options containing seed, restore_dir, # experiment_dir: Experiment directory/Benchmark_directory # **kwargs: Nothing so far time_string = wrapping_util.get_time_string() cmd = "" path_to_optimizer = os.path.abspath(os.path.dirname(gridsearch.__file__)) # Find experiment directory if options.restore: raise NotImplementedError() else: optimizer_dir = os.path.join( experiment_dir, optimizer_str + "_" + str(options.seed) + "_" + time_string) # Build call cmd += build_gridsearch_call(config, options, optimizer_dir) # Set up experiment directory if not os.path.exists(optimizer_dir): os.mkdir(optimizer_dir) params = config.get('GRIDSEARCH', 'params') # Copy the SMAC search space if not os.path.exists(os.path.join(optimizer_dir, params)): os.symlink(os.path.join(experiment_dir, optimizer_str, params), os.path.join(optimizer_dir, params)) return cmd, optimizer_dir
def main(config, options, experiment_dir, experiment_directory_prefix, **kwargs): # config: Loaded .cfg file # options: Options containing seed, restore_dir, # experiment_dir: Experiment directory/Benchmark_directory # **kwargs: Nothing so far time_string = wrapping_util.get_time_string() cmd = "" path_to_optimizer = os.path.abspath(os.path.dirname(metalearner.__file__)) # Find experiment directory if options.restore: raise NotImplementedError() else: optimizer_dir = os.path.join( experiment_dir, experiment_directory_prefix + "metalearn_optimizer_" + str(options.seed) + "_" + time_string) # Build call cmd += build_metalearn_call(config, options, optimizer_dir) # Set up experiment directory if not os.path.exists(optimizer_dir): os.mkdir(optimizer_dir) # params = config.get('METALEARNING', 'params') # if not os.path.exists(os.path.join(optimizer_dir, params)): # os.symlink(os.path.join(experiment_dir, "metalearner", params), # os.path.join(optimizer_dir, params)) return cmd, optimizer_dir
def main(config, options, experiment_dir, experiment_directory_prefix, **kwargs): # config: Loaded .cfg file # options: Options containing seed, restore, # experiment_dir: Experiment directory/Benchmarkdirectory # **kwargs: Nothing so far time_string = wrappingUtil.get_time_string() cmd = '' # Add path_to_optimizer to PYTHONPATH and to sys.path if not 'PYTHONPATH' in os.environ: os.environ['PYTHONPATH'] = config.get('LR', 'path_to_optimizer') else: os.environ['PYTHONPATH'] = config.get( 'LR', 'path_to_optimizer') + os.pathsep + os.environ['PYTHONPATH'] sys.path.append(config.get('LR', 'path_to_optimizer')) optimizer_str = os.path.splitext(os.path.basename(__file__))[0] # Find experiment directory if options.restore: if not os.path.exists(options.restore): raise Exception('The restore directory does not exist') optimizer_dir = options.restore else: optimizer_dir = os.path.join( experiment_dir, experiment_directory_prefix + optimizer_str + '_' + str(options.seed) + '_' + time_string) # Build call cmd = build_lr_call(config, options, optimizer_dir) # Set up experiment directory if not os.path.exists(optimizer_dir): os.mkdir(optimizer_dir) space = config.get('LR', 'space') abs_space = os.path.abspath(space) parent_space = os.path.join(experiment_dir, optimizer_str, space) if os.path.exists(abs_space): space = abs_space elif os.path.exists(parent_space): space = parent_space else: raise Exception('LR search space not found. Searched at %s and ' '%s' % (abs_space, parent_space)) # Copy the hyperopt search space if not os.path.exists( os.path.join(optimizer_dir, os.path.basename(space))): os.symlink(os.path.join(experiment_dir, optimizer_str, space), os.path.join(optimizer_dir, os.path.basename(space))) return cmd, optimizer_dir
def main(config, options, experiment_dir, experiment_directory_prefix, **kwargs): # config: Loaded .cfg file # options: Options containing seed, restore, # experiment_dir: Experiment directory/Benchmarkdirectory # **kwargs: Nothing so far time_string = wrappingUtil.get_time_string() cmd = '' # Add path_to_optimizer to PYTHONPATH and to sys.path if not 'PYTHONPATH' in os.environ: os.environ['PYTHONPATH'] = config.get('LR', 'path_to_optimizer') else: os.environ['PYTHONPATH'] = config.get('LR', 'path_to_optimizer') + os.pathsep + os.environ['PYTHONPATH'] sys.path.append(config.get('LR', 'path_to_optimizer')) optimizer_str = os.path.splitext(os.path.basename(__file__))[0] # Find experiment directory if options.restore: if not os.path.exists(options.restore): raise Exception('The restore directory does not exist') optimizer_dir = options.restore else: optimizer_dir = os.path.join(experiment_dir, experiment_directory_prefix + optimizer_str + '_' + str(options.seed) + '_' + time_string) # Build call cmd = build_lr_call(config, options, optimizer_dir) # Set up experiment directory if not os.path.exists(optimizer_dir): os.mkdir(optimizer_dir) space = config.get('LR', 'space') abs_space = os.path.abspath(space) parent_space = os.path.join(experiment_dir, optimizer_str, space) if os.path.exists(abs_space): space = abs_space elif os.path.exists(parent_space): space = parent_space else: raise Exception('LR search space not found. Searched at %s and ' '%s' % (abs_space, parent_space)) # Copy the hyperopt search space if not os.path.exists(os.path.join(optimizer_dir, os.path.basename(space))): os.symlink(os.path.join(experiment_dir, optimizer_str, space), os.path.join(optimizer_dir, os.path.basename(space))) return cmd, optimizer_dir
def setUp(self): __file__ = inspect.getfile(TestOptimizers) self.hpolib_dir = os.path.join(os.path.dirname(__file__), "..", "..") self.optimizer_dir = os.path.join(self.hpolib_dir, "optimizers") self.benchmarks_dir = os.path.join(self.hpolib_dir, "benchmarks") # Add new optimizers only in this kind of format and keep in mind that # everything after the last '/' is treated as the optimizer name self.optimizers = ["smac/smac_2_06_01", "smac/smac_2_08_00", "spearmint/spearmint_april2013", "tpe/hyperopt", "tpe/random"] self.experiment_dir_prefix = wrapping_util.get_time_string() os.environ["PATH"] = os.environ["PATH"] + os.pathsep + \ os.path.join(self.hpolib_dir, "runsolver/src")
def main(self, config, options, experiment_dir, **kwargs): """This method sets up path name for directory where experiment is run, it then calls custum_setup() method to setup the directory with that name and all optimizer specific settings. Finally it calls build_call() method to create command line for executing optimizer with desired settings. Parameters ---------- config : Loaded .cfg file options : Options containing seed, restore_dir, experiment_dir : Experiment directory/Benchmark_directory kwargs : Nothing so far Returns ------- cmd : command used to run optimizer with all its parameter settings optimizer_dir : directory path where experiment is run """ time_string = wrapping_util.get_time_string() # optimizer_str = os.path.splitext(os.path.basename(__file__))[0] optimizer_str = self.optimizer_dir optimizer_dir = os.path.join( experiment_dir, optimizer_str + "_" + str(options.seed) + "_" + time_string) # setup directory where experiment will run optimizer_dir = self.custom_setup(config, options, experiment_dir, optimizer_dir) # Build call cmd = self.build_call(config, options, optimizer_dir) self.logger.info( "### INFORMATION ############################################################################" ) self.logger.info("# You're running %35s" % config.get(self.optimizer_name, 'path_to_optimizer')) self.logger.info("optimization dir %s" % optimizer_dir) self.logger.info("optimization str %s" % optimizer_str) self.logger.info( "##########################################################################################\n" ) # logger.info("exp dir:%s" % experiment_dir) return cmd, optimizer_dir
def main(config, options, experiment_dir, experiment_directory_prefix, **kwargs): # config: Loaded .cfg file # options: Options containing seed, restore_dir, # experiment_dir: Experiment directory/Benchmark_directory # **kwargs: Nothing so far time_string = wrapping_util.get_time_string() path_to_optimizer = os.path.abspath(os.path.dirname(__file__)) # Find experiment directory if options.restore: raise NotImplementedError("Restore is not implemented for the " "ConfigurationRunner") else: optimizer_dir = os.path.join( experiment_dir, experiment_directory_prefix + optimizer_str + "_" + time_string) # Set up experiment directory if not os.path.exists(optimizer_dir): os.mkdir(optimizer_dir) space = config.get('ConfigurationRunner', "configurations") abs_space = os.path.abspath(space) parent_space = os.path.join(experiment_dir, optimizer_str, space) if os.path.exists(abs_space): space = abs_space elif os.path.exists(parent_space): space = parent_space else: raise Exception( "Configurations for the ConfigurationRunner not found. " "Searched at %s and " "%s" % (abs_space, parent_space)) # if not os.path.exists(os.path.join(optimizer_dir, os.path.basename(space))): os.symlink(os.path.join(experiment_dir, optimizer_str, space), os.path.join(optimizer_dir, os.path.basename(space))) # Build call cmd = build_call(config, options, optimizer_dir) return cmd, optimizer_dir
def main(config, options, experiment_dir, experiment_directory_prefix, **kwargs): # config: Loaded .cfg file # options: Options containing seed, restore_dir, # experiment_dir: Experiment directory/Benchmark_directory # **kwargs: Nothing so far time_string = wrapping_util.get_time_string() path_to_optimizer = os.path.abspath(os.path.dirname(__file__)) # Find experiment directory if options.restore: raise NotImplementedError("Restore is not implemented for the " "ConfigurationRunner") else: optimizer_dir = os.path.join(experiment_dir, experiment_directory_prefix + optimizer_str + "_" + time_string) # Set up experiment directory if not os.path.exists(optimizer_dir): os.mkdir(optimizer_dir) space = config.get('ConfigurationRunner', "configurations") abs_space = os.path.abspath(space) parent_space = os.path.join(experiment_dir, optimizer_str, space) if os.path.exists(abs_space): space = abs_space elif os.path.exists(parent_space): space = parent_space else: raise Exception("Configurations for the ConfigurationRunner not found. " "Searched at %s and " "%s" % (abs_space, parent_space)) # if not os.path.exists(os.path.join(optimizer_dir, os.path.basename(space))): os.symlink(os.path.join(experiment_dir, optimizer_str, space), os.path.join(optimizer_dir, os.path.basename(space))) # Build call cmd = build_call(config, options, optimizer_dir) return cmd, optimizer_dir
def main(self, config, options, experiment_dir, **kwargs): """This method sets up path name for directory where experiment is run, it then calls custum_setup() method to setup the directory with that name and all optimizer specific settings. Finally it calls build_call() method to create command line for executing optimizer with desired settings. Parameters ---------- config : Loaded .cfg file options : Options containing seed, restore_dir, experiment_dir : Experiment directory/Benchmark_directory kwargs : Nothing so far Returns ------- cmd : command used to run optimizer with all its parameter settings optimizer_dir : directory path where experiment is run """ time_string = wrapping_util.get_time_string() # optimizer_str = os.path.splitext(os.path.basename(__file__))[0] optimizer_str = self.optimizer_dir optimizer_dir = os.path.join(experiment_dir, optimizer_str + "_" + str(options.seed) + "_" + time_string) # setup directory where experiment will run optimizer_dir = self.custom_setup(config, options, experiment_dir, optimizer_dir) # Build call cmd = self.build_call(config, options, optimizer_dir) self.logger.info("### INFORMATION ############################################################################") self.logger.info("# You're running %35s" % config.get(self.optimizer_name, 'path_to_optimizer')) self.logger.info("optimization dir %s" % optimizer_dir) self.logger.info("optimization str %s" % optimizer_str) self.logger.info("##########################################################################################\n") # logger.info("exp dir:%s" % experiment_dir) return cmd, optimizer_dir
def dispatch(cfg, fold, params, test=False): param_string = " ".join( ["-" + key + " " + str(params[key]) for key in params]) time_string = wrapping_util.get_time_string() run_instance_output = os.path.join(os.getcwd(), time_string + "_run_instance.out") runsolver_output_file = os.path.join(os.getcwd(), time_string + "_runsolver.out") cmd = make_command(cfg, fold, param_string, run_instance_output, test=test) starttime = time.time() fh = open(runsolver_output_file, "w") _run_command_with_shell(cmd, fh) fh.close() endtime = time.time() with open(run_instance_output, "r") as fh: run_instance_content = fh.readlines() with open(runsolver_output_file, "r") as fh: runsolver_output_content = fh.readlines() cpu_time, wallclock_time, status, result, additional_data = \ parse_output(cfg, run_instance_content, runsolver_output_content, measured_time=endtime - starttime) if status == "SAT": if cfg.getboolean("HPOLIB", "remove_target_algorithm_output"): os.remove(run_instance_output) os.remove(runsolver_output_file) if cfg.getboolean("HPOLIB", "store_target_algorithm_calls"): store_target_algorithm_calls(path=os.path.join( os.getcwd(), "target_algorithm_calls.csv"), wallclock_time=wallclock_time, result=result, additional_data=additional_data, call=cmd) return additional_data, result, status, wallclock_time
def dispatch(cfg, fold, params, test=False): param_string = " ".join(["-" + key + " " + str(params[key]) for key in params]) time_string = wrapping_util.get_time_string() run_instance_output = os.path.join(os.getcwd(), time_string + "_run_instance.out") runsolver_output_file = os.path.join(os.getcwd(), time_string + "_runsolver.out") cmd = make_command(cfg, fold, param_string, run_instance_output, test=test) starttime = time.time() fh = open(runsolver_output_file, "w") _run_command_with_shell(cmd, fh) fh.close() endtime = time.time() with open(run_instance_output, "r") as fh: run_instance_content = fh.readlines() with open(runsolver_output_file, "r") as fh: runsolver_output_content = fh.readlines() cpu_time, wallclock_time, status, result, additional_data = \ parse_output(cfg, run_instance_content, runsolver_output_content, measured_time=endtime - starttime) if status == "SAT": if cfg.getboolean("HPOLIB", "remove_target_algorithm_output"): os.remove(run_instance_output) os.remove(runsolver_output_file) if cfg.getboolean("HPOLIB", "store_target_algorithm_calls"): store_target_algorithm_calls( path=os.path.join(os.getcwd(), "target_algorithm_calls.csv"), wallclock_time=wallclock_time, result=result, additional_data=additional_data, call=cmd) return additional_data, result, status, wallclock_time
def main(config, options, experiment_dir, experiment_directory_prefix, **kwargs): # config: Loaded .cfg file # options: Options containing seed, restore_dir, # experiment_dir: Experiment directory/Benchmark_directory # **kwargs: Nothing so far time_string = wrapping_util.get_time_string() cmd = "" # Add path_to_optimizer to PYTHONPATH and to sys.path # Only for HYPEROPT if not 'PYTHONPATH' in os.environ: os.environ['PYTHONPATH'] = config.get('TPE', 'path_to_optimizer') else: os.environ['PYTHONPATH'] = config.get( 'TPE', 'path_to_optimizer') + os.pathsep + os.environ['PYTHONPATH'] sys.path.append(config.get('TPE', 'path_to_optimizer')) optimizer_str = os.path.splitext(os.path.basename(__file__))[0] # TODO: Check whether we might need this again # SYSTEM_WIDE = 0 # AUGUST_2013_MOD = 1 # try: # import hyperopt # version = SYSTEM_WIDE # except ImportError: # try: # cmd += "export PYTHONPATH=$PYTHONPATH:" + os.path.dirname(os.path.abspath(__file__)) + \ # "/optimizers/hyperopt_august2013_mod\n" # import optimizers.hyperopt_august2013_mod.hyperopt as hyperopt # except ImportError, e: # import HPOlib.optimizers.hyperopt_august2013_mod.hyperopt as hyperopt # version = AUGUST_2013_MOD # Find experiment directory if options.restore: if not os.path.exists(options.restore): raise Exception("The restore directory does not exist") optimizer_dir = options.restore else: optimizer_dir = os.path.join( experiment_dir, experiment_directory_prefix + optimizer_str + "_" + str(options.seed) + "_" + time_string) # Build call cmd += build_tpe_call(config, options, optimizer_dir) # Set up experiment directory if not os.path.exists(optimizer_dir): os.mkdir(optimizer_dir) space = config.get('TPE', 'space') abs_space = os.path.abspath(space) parent_space = os.path.join(experiment_dir, optimizer_str, space) if os.path.exists(abs_space): space = abs_space elif os.path.exists(parent_space): space = parent_space else: raise Exception("TPE search space not found. Searched at %s and " "%s" % (abs_space, parent_space)) # Copy the hyperopt search space if not os.path.exists( os.path.join(optimizer_dir, os.path.basename(space))): os.symlink(os.path.join(experiment_dir, optimizer_str, space), os.path.join(optimizer_dir, os.path.basename(space))) import hyperopt path_to_loaded_optimizer = os.path.abspath( os.path.dirname(os.path.dirname(hyperopt.__file__))) logger.info( "### INFORMATION ################################################################" ) logger.info( "# You are running: #" ) logger.info("# %76s #" % path_to_loaded_optimizer) if not os.path.samefile(path_to_loaded_optimizer, config.get('TPE', 'path_to_optimizer')): logger.warning("# BUT hyperopt_august2013_modDefault.cfg says:") logger.warning("# %76s #" % config.get('TPE', 'path_to_optimizer')) logger.warning( "# Found a global hyperopt version. This installation will be used! #" ) else: logger.info( "# To reproduce our results you need version 0.0.3.dev, which can be found here:#" ) logger.info("%s" % version_info) logger.info( "# A newer version might be available, but not yet built in. #" ) logger.info( "################################################################################" ) return cmd, optimizer_dir
def main(config, options, experiment_dir, experiment_directory_prefix, **kwargs): # config: Loaded .cfg file # options: Options containing seed, restore_dir, # experiment_dir: Experiment directory/Benchmark_directory # **kwargs: Nothing so far time_string = wrapping_util.get_time_string() optimizer_str = os.path.splitext(os.path.basename(__file__))[0] # Find experiment directory if options.restore: if not os.path.exists(options.restore): raise Exception("The restore directory does not exist") optimizer_dir = options.restore else: optimizer_dir = os.path.join(experiment_dir, experiment_directory_prefix + optimizer_str + "_" + str(options.seed) + "_" + time_string) # Build call cmd = build_smac_call(config, options, optimizer_dir) # Set up experiment directory if not os.path.exists(optimizer_dir): os.mkdir(optimizer_dir) # TODO: This can cause huge problems when the files are located # somewhere else? space = config.get('SMAC', "p") abs_space = os.path.abspath(space) parent_space = os.path.join(experiment_dir, optimizer_str, space) if os.path.exists(abs_space): space = abs_space elif os.path.exists(parent_space): space = parent_space else: raise Exception("SMAC search space not found. Searched at %s and " "%s" % (abs_space, parent_space)) if not os.path.exists(os.path.join(optimizer_dir, os.path.basename(space))): os.symlink(os.path.join(experiment_dir, optimizer_str, space), os.path.join(optimizer_dir, os.path.basename(space))) # Copy the smac search space and create the instance information fh = open(os.path.join(optimizer_dir, 'train.txt'), "w") for i in range(config.getint('HPOLIB', 'number_cv_folds')): fh.write(str(i) + "\n") fh.close() fh = open(os.path.join(optimizer_dir, 'test.txt'), "w") for i in range(config.getint('HPOLIB', 'number_cv_folds')): fh.write(str(i) + "\n") fh.close() fh = open(os.path.join(optimizer_dir, "scenario.txt"), "w") fh.close() logger.info("### INFORMATION ################################################################") logger.info("# You're running %40s #" % config.get('SMAC', 'path_to_optimizer')) for v in version_info: logger.info("# %76s #" % v) logger.info("# A newer version might be available, but not yet built in. #") logger.info("# Please use this version only to reproduce our results on automl.org #") logger.info("################################################################################") return cmd, optimizer_dir
def main(): """ If we are not called from cv means we are called from CLI. This means the optimizer itself handles crossvalidation (smac). To keep a nice .pkl we have to do some bookkeeping here """ cfg = wrappingUtil.load_experiment_config_file() called_from_cv = True if cfg.getint("HPOLIB", "handles_cv") == 1: # If Our Optimizer can handle crossvalidation, # we are called from CLI. To keep a sane nice .pkl # we have to do some bookkeeping here called_from_cv = False # This has to be done here for SMAC, since smac does not call cv.py if not called_from_cv: cv_starttime = time.time() experiment = load_experiment_file() experiment.start_cv(cv_starttime) del experiment fold, seed = parse_command_line() # Side-effect: removes all additional information like log and applies # transformations to the parameters params = get_parameters() param_string = " ".join([key + " " + str(params[key]) for key in params]) time_string = wrappingUtil.get_time_string() run_instance_output = os.path.join(os.getcwd(), time_string + "_run_instance.out") runsolver_output_file = os.path.join(os.getcwd(), time_string + "_runsolver.out") cmd = make_command(cfg, fold, param_string, run_instance_output) fh = open(runsolver_output_file, "w") experiment = load_experiment_file() # Side-effect: adds a job if it is not yet in the experiments file trial_index = get_trial_index(experiment, fold, params) experiment.set_one_fold_running(trial_index, fold) del experiment # release Experiment lock logger.debug("Calling: %s" % cmd) # sys.stdout.write(cmd + "\n") # sys.stdout.flush() process = subprocess.Popen(cmd, stdout=fh, stderr=fh, shell=True, executable="/bin/bash") logger.info("-----------------------RUNNING RUNSOLVER----------------------------") process.wait() fh.close() cpu_time, wallclock_time, status, result, additional_data = parse_output_files( cfg, run_instance_output, runsolver_output_file ) experiment = load_experiment_file() if status == "SAT": experiment.set_one_fold_complete(trial_index, fold, result, wallclock_time) elif status == "CRASHED" or status == "UNSAT": result = cfg.getfloat("HPOLIB", "result_on_terminate") experiment.set_one_fold_crashed(trial_index, fold, result, wallclock_time) status = "SAT" else: # TODO: We need a global stopping mechanism pass del experiment # release lock return_string = format_return_string(status, wallclock_time, 1, result, seed, additional_data) if not called_from_cv: experiment = load_experiment_file() experiment.end_cv(time.time()) del experiment logger.info(return_string) print return_string return return_string
def build_call(self, config, options, optimizer_dir): import HPOlib call = config.get('SMAC', 'path_to_optimizer') + "/smac" call = " ".join([call, '--numRun', str(options.seed), '--scenario-file', os.path.join(optimizer_dir, 'scenario.txt'), '--cutoffTime', config.get('SMAC', 'cutoff_time'), # The instance file does interfere with state restoration, it will only # be loaded if no state is restored (look further down in the code # '--instanceFile', config.get('SMAC', 'instanceFile'), '--intraInstanceObj', config.get('SMAC', 'intra_instance_obj'), '--runObj', config.get('SMAC', 'run_obj'), # '--testInstanceFile', config.get('SMAC', 'testInstanceFile'), '--algoExec', self.get_algo_exec(), '--execDir', optimizer_dir, '-p', os.path.join(optimizer_dir, os.path.basename(config.get('SMAC', 'p'))), # The experiment dir MUST not be specified when restarting, it is set # further down in the code # '--experimentDir', optimizer_dir, '--numIterations', config.get('SMAC', 'num_iterations'), '--totalNumRunsLimit', config.get('SMAC', 'total_num_runs_limit'), '--outputDirectory', optimizer_dir, '--numConcurrentAlgoExecs', config.get('SMAC', 'num_concurrent_algo_execs'), # '--runGroupName', config.get('SMAC', 'runGroupName'), '--maxIncumbentRuns', config.get('SMAC', 'max_incumbent_runs'), '--retryTargetAlgorithmRunCount', config.get('SMAC', 'retry_target_algorithm_run_count'), '--intensification-percentage', config.get('SMAC', 'intensification_percentage'), '--initial-incumbent', config.get('SMAC', 'initial_incumbent'), '--rf-split-min', config.get('SMAC', 'rf_split_min'), '--validation', config.get('SMAC', 'validation'), '--runtime-limit', config.get('SMAC', 'runtime_limit'), '--exec-mode', config.get('SMAC', 'exec_mode'), '--rf-num-trees', config.get('SMAC', 'rf_num_trees'), '--continous-neighbours', config.get('SMAC', 'continous_neighbours')]) if config.getboolean('SMAC', 'save_runs_every_iteration'): call = " ".join([call, '--save-runs-every-iteration true']) else: call = " ".join([call, '--save-runs-every-iteration false']) if config.getboolean('SMAC', 'deterministic'): call = " ".join([call, '--deterministic true']) if config.getboolean('SMAC', 'adaptive_capping') and \ config.get('SMAC', 'run_obj') == "RUNTIME": call = " ".join([call, '--adaptiveCapping true']) if config.getboolean('SMAC', 'rf_full_tree_bootstrap'): call = " ".join([call, '--rf-full-tree-bootstrap true']) if options.restore: state_run = self._get_state_run(optimizer_dir) restore_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.getcwd(), state_run) call = " ".join([call, "--restore-scenario", restore_path]) call = " ".join([call, "--rungroup restore_%s_" % wrapping_util.get_time_string()]) else: call = " ".join([call, '--instanceFile', os.path.join(optimizer_dir, 'train.txt'), '--testInstanceFile', os.path.join(optimizer_dir, 'test.txt')]) return call
def main(): """Start an optimization of the HPOlib. For documentation see the comments inside this function and the general HPOlib documentation.""" args, unknown_arguments = use_arg_parser() if args.working_dir: experiment_dir = args.working_dir elif args.restore: args.restore = os.path.abspath(args.restore) + "/" experiment_dir = args.restore else: experiment_dir = os.getcwd() formatter = logging.Formatter('[%(levelname)s] [%(asctime)s:%(name)s] %(' 'message)s', datefmt='%H:%M:%S') handler = logging.StreamHandler(sys.stdout) handler.setFormatter(formatter) hpolib_logger.addHandler(handler) hpolib_logger.setLevel(1) # First of all print the infodevel if IS_DEVELOPMENT: logger.critical(INFODEVEL) args, unknown_arguments = use_arg_parser() # Convert the path to the optimizer to be an absolute path, which is # necessary later when we change the working directory optimizer = args.optimizer print("opti:", optimizer) if not os.path.isabs(optimizer): relative_path = optimizer optimizer = os.path.abspath(optimizer) logger.info("Converting relative optimizer path %s to absolute " "optimizer path %s.", relative_path, optimizer) os.chdir(experiment_dir) experiment_dir = os.getcwd() check_before_start.check_first(experiment_dir) # Now we can safely import non standard things import numpy as np import HPOlib.Experiment as Experiment # Wants numpy and scipy # Check how many optimizer versions are present and if all dependencies # are installed also dynamically load optimizer obj optimizer_version, opt_obj = check_before_start.check_optimizer(optimizer) logger.warning("You called -o %s, I am using optimizer defined in " "%sDefault.cfg", optimizer, optimizer_version) optimizer = os.path.basename(optimizer_version) config = wrapping_util.get_configuration(experiment_dir, optimizer_version, unknown_arguments, opt_obj) # DO NOT LOG UNTIL HERE UNLESS SOMETHING DRAMATIC HAS HAPPENED!!! loglevel = config.getint("HPOLIB", "HPOlib_loglevel") hpolib_logger.setLevel(loglevel) if args.silent: hpolib_logger.setLevel(60) if args.verbose: hpolib_logger.setLevel(10) # Saving the config file is down further at the bottom, as soon as we get # hold of the new optimizer directory # wrapping_dir = os.path.dirname(os.path.realpath(__file__)) # Load optimizer try: optimizer_dir = os.path.dirname(os.path.realpath(optimizer_version)) optimizer_module = imp.load_source(optimizer_dir, optimizer_version + ".py") except (ImportError, IOError): logger.critical("Optimizer module %s not found", optimizer) import traceback logger.critical(traceback.format_exc()) sys.exit(1) # So the optimizer module can acces the seed from the config and config.set("HPOLIB", "seed", str(args.seed)) experiment_directory_prefix = config.get("HPOLIB", "experiment_directory_prefix") optimizer_call, optimizer_dir_in_experiment = \ opt_obj.main(config=config, options=args, experiment_dir=experiment_dir) # experiment_directory_prefix=experiment_directory_prefix) cmd = optimizer_call # Start the server for logging from subprocesses here, because its port must # be written to the config file. logging_host = config.get("HPOLIB", "logging_host") if logging_host: logging_receiver_thread = None default_logging_port = DEFAULT_TCP_LOGGING_PORT for logging_port in range(default_logging_port, 65535): try: logging_receiver = logging_server.LoggingReceiver( host=logging_host, port=logging_port, handler=logging_server.LogRecordStreamHandler) logging_receiver_thread = Thread(target=logging_receiver.serve_forever) logging_receiver_thread.daemon = True logger.info('%s started at %s' % ( logging_receiver.__class__.__name__, logging_receiver.server_address)) logging_receiver_thread.start() break # TODO I did not find any useful documentation about which Exceptions # I should catch here... except Exception as e: logger.debug(e) logger.debug(e.message) if logging_receiver_thread is None: logger.critical("Could not create the logging server. Going to shut " "down.") sys.exit(1) config.set("HPOLIB", "logging_port", str(logging_port)) with open(os.path.join(optimizer_dir_in_experiment, "config.cfg"), "w") as f: config.set("HPOLIB", "is_not_original_config_file", "True") wrapping_util.save_config_to_file(f, config, write_nones=True) # initialize/reload pickle file if args.restore: try: os.remove(os.path.join(optimizer_dir_in_experiment, optimizer + ".pkl.lock")) except OSError: pass folds = config.getint('HPOLIB', 'number_cv_folds') trials = Experiment.Experiment(expt_dir=optimizer_dir_in_experiment, expt_name=experiment_directory_prefix + optimizer, folds=folds, max_wallclock_time=config.get('HPOLIB', 'cpu_limit'), title=args.title) trials.optimizer = optimizer_version optimizer_output_file = os.path.join(optimizer_dir_in_experiment, optimizer + wrapping_util.get_time_string() + "_" + str(args.seed) + ".out") if args.restore: # noinspection PyBroadException try: restored_runs = optimizer_module.restore(config=config, optimizer_dir=optimizer_dir_in_experiment, cmd=cmd) except: logger.critical("Could not restore runs for %s", args.restore) import traceback logger.critical(traceback.format_exc()) sys.exit(1) logger.info("Restored %d runs", restored_runs) trials.remove_all_but_first_runs(restored_runs) fh = open(optimizer_output_file, "a") fh.write("#" * 80 + "\n" + "Restart! Restored %d runs.\n" % restored_runs) fh.close() if len(trials.endtime) < len(trials.starttime): trials.endtime.append(trials.cv_endtime[-1]) trials.starttime.append(time.time()) else: trials.starttime.append(time.time()) # noinspection PyProtectedMember trials._save_jobs() del trials sys.stdout.flush() # Run call if args.printcmd: logger.info(cmd) return 0 else: # Create a second formatter and handler to customize the optimizer # output optimization_formatter = logging.Formatter( '[%(levelname)s] [%(asctime)s:%(optimizer)s] %(message)s', datefmt='%H:%M:%S') optimization_handler = logging.StreamHandler(sys.stdout) optimization_handler.setFormatter(optimization_formatter) optimization_logger = logging.getLogger(optimizer) optimization_logger.addHandler(optimization_handler) optimizer_loglevel = config.getint("HPOLIB", "optimizer_loglevel") optimization_logger.setLevel(optimizer_loglevel) # Use a flag which is set to true as soon as all children are # supposed to be killed exit_ = wrapping_util.Exit() signal.signal(signal.SIGTERM, exit_.signal_callback) signal.signal(signal.SIGABRT, exit_.signal_callback) signal.signal(signal.SIGINT, exit_.signal_callback) signal.signal(signal.SIGHUP, exit_.signal_callback) # Change into the current experiment directory # Some optimizer might expect this dir_before_exp = os.getcwd() temporary_output_dir = config.get("HPOLIB", "temporary_output_directory") if temporary_output_dir: last_part = os.path.split(optimizer_dir_in_experiment)[1] temporary_output_dir = os.path.join(temporary_output_dir, last_part) # Replace any occurence of the path in the command cmd = cmd.replace(optimizer_dir_in_experiment, temporary_output_dir) optimizer_output_file = optimizer_output_file.replace(optimizer_dir_in_experiment, temporary_output_dir) shutil.copytree(optimizer_dir_in_experiment, temporary_output_dir) # shutil.rmtree does not work properly with NFS # https://github.com/hashdist/hashdist/issues/113 # Idea from https://github.com/ahmadia/hashdist/ for rmtree_iter in range(5): try: shutil.rmtree(optimizer_dir_in_experiment) break except OSError, e: time.sleep(rmtree_iter) optimizer_dir_in_experiment = temporary_output_dir # call target_function.setup() fn_setup = config.get("HPOLIB", "function_setup") if fn_setup: # if temporary_output_dir: # logger.critical("The options 'temporary_output_directory' " # "and 'function_setup' cannot be used " # "together.") # sys.exit(1) fn_setup_output = os.path.join(optimizer_dir_in_experiment, "function_setup.out") runsolver_cmd = runsolver_wrapper._make_runsolver_command( config, fn_setup_output) setup_cmd = runsolver_cmd + " " + fn_setup # runsolver_output = subprocess.STDOUT runsolver_output = open("/dev/null") runsolver_wrapper._run_command_with_shell(setup_cmd, runsolver_output) os.chdir(optimizer_dir_in_experiment) logger.info(cmd) output_file = optimizer_output_file fh = open(output_file, "a") cmd = shlex.split(cmd) print cmd # See man 7 credentials for the meaning of a process group id # This makes wrapping.py useable with SGEs default behaviour, # where qdel sends a SIGKILL to a whole process group # logger.info(os.getpid()) # os.setpgid(os.getpid(), os.getpid()) # same as os.setpgid(0, 0) # TODO: figure out why shell=True was removed in commit f47ac4bb3ffe7f70b795d50c0828ca7e109d2879 # maybe it has something todo with the previous behaviour where a # session id was set... proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) global child_process_pid child_process_pid = proc.pid process = psutil.Process(os.getpid()) logger.info("-----------------------RUNNING----------------------------------") # http://stackoverflow.com/questions/375427/non-blocking-read-on-a-subprocess-pipe-in-python # How often is the experiment pickle supposed to be opened? if config.get("HPOLIB", "total_time_limit"): optimizer_end_time = time.time() + config.getint("HPOLIB", "total_time_limit") else: optimizer_end_time = sys.float_info.max sent_SIGINT = False sent_SIGINT_time = np.inf sent_SIGTERM = False sent_SIGTERM_time = np.inf sent_SIGKILL = False sent_SIGKILL_time = np.inf children_to_kill = list() def enqueue_output(out, queue): for line in iter(out.readline, b''): queue.put(line) out.close() stderr_queue = Queue() stdout_queue = Queue() stderr_thread = Thread(target=enqueue_output, args=(proc.stderr, stderr_queue)) stdout_thread = Thread(target=enqueue_output, args=(proc.stdout, stdout_queue)) stderr_thread.daemon = True stdout_thread.daemon = True stderr_thread.start() stdout_thread.start() if not (args.verbose or args.silent): logger.info('Optimizer runs with PID: %d', proc.pid) logger.info('We start in directory %s', os.getcwd()) while True: # this implements the total runtime limit if time.time() > optimizer_end_time and not sent_SIGINT: logger.info("Reached total_time_limit, going to shutdown.") exit_.true() # necessary, otherwise HPOlib-run takes 100% of one processor time.sleep(0.25) try: while True: line = stdout_queue.get_nowait() fh.write(line) fh.flush() optimization_logger.info(line.replace("\n", ""), extra={'optimizer': optimizer}) except Empty: pass try: while True: line = stderr_queue.get_nowait() fh.write(line) fh.flush() optimization_logger.error(line.replace("\n", ""), extra={'optimizer': optimizer}) except Empty: pass ret = proc.poll() if ret is not None: # This does not include wrapping.py children = process.children() if len(children) == 0: break # TODO: what happens if we have a ret but something is still # running? if exit_.get_exit() == True and not sent_SIGINT: logger.critical("Shutdown procedure: Sending SIGINT") wrapping_util.kill_processes(signal.SIGINT) sent_SIGINT_time = time.time() sent_SIGINT = True if exit_.get_exit() == True and not sent_SIGTERM and time.time() \ > sent_SIGINT_time + 5: logger.critical("Shutdown procedure: Sending SIGTERM") wrapping_util.kill_processes(signal.SIGTERM) sent_SIGTERM_time = time.time() sent_SIGTERM = True if exit_.get_exit() == True and not sent_SIGKILL and time.time() \ > sent_SIGTERM_time + 5: logger.critical("Shutdown procedure: Sending SIGKILL") wrapping_util.kill_processes(signal.SIGKILL) sent_SIGKILL_time = time.time() sent_SIGKILL = True logger.info("-----------------------END--------------------------------------") ret = proc.returncode logger.info("Finished with return code: %d", ret) del proc fh.close() # Change back into to directory os.chdir(dir_before_exp) # call target_function.setup() fn_teardown = config.get("HPOLIB", "function_teardown") if fn_teardown: # if temporary_output_dir: # logger.critical("The options 'temporary_output_directory' " # "and 'function_teardown' cannot be used " # "together.") # sys.exit(1) fn_teardown_output = os.path.join(optimizer_dir_in_experiment, "function_teardown.out") runsolver_cmd = runsolver_wrapper._make_runsolver_command( config, fn_teardown_output) teardown_cmd = runsolver_cmd + " " + fn_teardown # runsolver_output = subprocess.STDOUT runsolver_output = open("/dev/null") runsolver_wrapper._run_command_with_shell(teardown_cmd, runsolver_output) if temporary_output_dir: # We cannot be sure that the directory # optimizer_dir_in_experiment in dir_before_exp got deleted # properly, therefore we append an underscore to the end of the # filename last_part = os.path.split(optimizer_dir_in_experiment)[1] new_dir = os.path.join(dir_before_exp, last_part) try: shutil.copytree(optimizer_dir_in_experiment, new_dir) except OSError as e: new_dir += "_" shutil.copytree(optimizer_dir_in_experiment, new_dir) # shutil.rmtree does not work properly with NFS # https://github.com/hashdist/hashdist/issues/113 # Idea from https://github.com/ahmadia/hashdist/ for rmtree_iter in range(5): try: shutil.rmtree(optimizer_dir_in_experiment) break except OSError, e: time.sleep(rmtree_iter) optimizer_dir_in_experiment = new_dir
def main(config, options, experiment_dir, experiment_directory_prefix, **kwargs): # config: Loaded .cfg file # options: Options containing seed, restore_dir, # experiment_dir: Experiment directory/Benchmark_directory # **kwargs: Nothing so far time_string = wrapping_util.get_time_string() cmd = "" # Add path_to_optimizer to PYTHONPATH and to sys.path # Only for HYPEROPT if not 'PYTHONPATH' in os.environ: os.environ['PYTHONPATH'] = config.get('TPE', 'path_to_optimizer') else: os.environ['PYTHONPATH'] = config.get('TPE', 'path_to_optimizer') + os.pathsep + os.environ['PYTHONPATH'] sys.path.append(config.get('TPE', 'path_to_optimizer')) optimizer_str = os.path.splitext(os.path.basename(__file__))[0] # TODO: Check whether we might need this again # SYSTEM_WIDE = 0 # AUGUST_2013_MOD = 1 # try: # import hyperopt # version = SYSTEM_WIDE # except ImportError: # try: # cmd += "export PYTHONPATH=$PYTHONPATH:" + os.path.dirname(os.path.abspath(__file__)) + \ # "/optimizers/hyperopt_august2013_mod\n" # import optimizers.hyperopt_august2013_mod.hyperopt as hyperopt # except ImportError, e: # import HPOlib.optimizers.hyperopt_august2013_mod.hyperopt as hyperopt # version = AUGUST_2013_MOD # Find experiment directory if options.restore: if not os.path.exists(options.restore): raise Exception("The restore directory does not exist") optimizer_dir = options.restore else: optimizer_dir = os.path.join(experiment_dir, experiment_directory_prefix + optimizer_str + "_" + str(options.seed) + "_" + time_string) # Build call cmd += build_tpe_call(config, options, optimizer_dir) # Set up experiment directory if not os.path.exists(optimizer_dir): os.mkdir(optimizer_dir) space = config.get('TPE', 'space') abs_space = os.path.abspath(space) parent_space = os.path.join(experiment_dir, optimizer_str, space) if os.path.exists(abs_space): space = abs_space elif os.path.exists(parent_space): space = parent_space else: raise Exception("TPE search space not found. Searched at %s and " "%s" % (abs_space, parent_space)) # Copy the hyperopt search space if not os.path.exists(os.path.join(optimizer_dir, os.path.basename(space))): os.symlink(os.path.join(experiment_dir, optimizer_str, space), os.path.join(optimizer_dir, os.path.basename(space))) import hyperopt # import HPOlib.optimizers.hyperopt_august2013_mod.hyperopt as hyperopt path_to_loaded_optimizer = os.path.abspath(os.path.dirname(os.path.dirname(hyperopt.__file__))) logger.info("### INFORMATION ################################################################") logger.info("# You are running: #") logger.info("# %76s #" % path_to_loaded_optimizer) if not os.path.samefile(path_to_loaded_optimizer, config.get('TPE', 'path_to_optimizer')): logger.warning("# BUT hyperopt_august2013_modDefault.cfg says:") logger.warning("# %76s #" % config.get('TPE', 'path_to_optimizer')) logger.warning("# Found a global hyperopt version. This installation will be used! #") else: logger.info("# To reproduce our results you need version 0.0.3.dev, which can be found here:#") logger.info("%s" % version_info) logger.info("# A newer version might be available, but not yet built in. #") logger.info("################################################################################") return cmd, optimizer_dir
def build_call(self, config, options, optimizer_dir): import HPOlib call = config.get('SMAC', 'path_to_optimizer') + "/smac" call = " ".join([ call, '--numRun', str(options.seed), '--scenario-file', os.path.join(optimizer_dir, 'scenario.txt'), '--cutoffTime', config.get('SMAC', 'cutoff_time'), # The instance file does interfere with state restoration, it will only # be loaded if no state is restored (look further down in the code # '--instanceFile', config.get('SMAC', 'instanceFile'), '--intraInstanceObj', config.get('SMAC', 'intra_instance_obj'), '--runObj', config.get('SMAC', 'run_obj'), # '--testInstanceFile', config.get('SMAC', 'testInstanceFile'), '--algoExec', self.get_algo_exec(), '--execDir', optimizer_dir, '-p', os.path.join(optimizer_dir, os.path.basename(config.get('SMAC', 'p'))), # The experiment dir MUST not be specified when restarting, it is set # further down in the code # '--experimentDir', optimizer_dir, '--numIterations', config.get('SMAC', 'num_iterations'), '--totalNumRunsLimit', config.get('SMAC', 'total_num_runs_limit'), '--outputDirectory', optimizer_dir, '--numConcurrentAlgoExecs', config.get('SMAC', 'num_concurrent_algo_execs'), # '--runGroupName', config.get('SMAC', 'runGroupName'), '--maxIncumbentRuns', config.get('SMAC', 'max_incumbent_runs'), '--retryTargetAlgorithmRunCount', config.get('SMAC', 'retry_target_algorithm_run_count'), '--intensification-percentage', config.get('SMAC', 'intensification_percentage'), '--initial-incumbent', config.get('SMAC', 'initial_incumbent'), '--rf-split-min', config.get('SMAC', 'rf_split_min'), '--validation', config.get('SMAC', 'validation'), '--runtime-limit', config.get('SMAC', 'runtime_limit'), '--exec-mode', config.get('SMAC', 'exec_mode'), '--rf-num-trees', config.get('SMAC', 'rf_num_trees'), '--continous-neighbours', config.get('SMAC', 'continous_neighbours') ]) if config.getboolean('SMAC', 'save_runs_every_iteration'): call = " ".join([call, '--save-runs-every-iteration true']) else: call = " ".join([call, '--save-runs-every-iteration false']) if config.getboolean('SMAC', 'deterministic'): call = " ".join([call, '--deterministic true']) if config.getboolean('SMAC', 'adaptive_capping') and \ config.get('SMAC', 'run_obj') == "RUNTIME": call = " ".join([call, '--adaptiveCapping true']) if config.getboolean('SMAC', 'rf_full_tree_bootstrap'): call = " ".join([call, '--rf-full-tree-bootstrap true']) if options.restore: state_run = self._get_state_run(optimizer_dir) restore_path = os.path.join( os.path.dirname(os.path.realpath(__file__)), os.getcwd(), state_run) call = " ".join([call, "--restore-scenario", restore_path]) call = " ".join([ call, "--rungroup restore_%s_" % wrapping_util.get_time_string() ]) else: call = " ".join([ call, '--instanceFile', os.path.join(optimizer_dir, 'train.txt'), '--testInstanceFile', os.path.join(optimizer_dir, 'test.txt') ]) return call
def main(): """ If we are not called from cv means we are called from CLI. This means the optimizer itself handles crossvalidation (smac). To keep a nice .pkl we have to do some bookkeeping here """ cfg = wrappingUtil.load_experiment_config_file() called_from_cv = True if cfg.getint('HPOLIB', 'handles_cv') == 1: # If Our Optimizer can handle crossvalidation, # we are called from CLI. To keep a sane nice .pkl # we have to do some bookkeeping here called_from_cv = False # This has to be done here for SMAC, since smac does not call cv.py if not called_from_cv: cv_starttime = time.time() experiment = load_experiment_file() experiment.start_cv(cv_starttime) del experiment fold, seed = parse_command_line() # Side-effect: removes all additional information like log and applies # transformations to the parameters params = get_parameters() param_string = " ".join([key + " " + str(params[key]) for key in params]) time_string = wrappingUtil.get_time_string() run_instance_output = os.path.join(os.getcwd(), time_string + "_run_instance.out") runsolver_output_file = os.path.join(os.getcwd(), time_string + "_runsolver.out") cmd = make_command(cfg, fold, param_string, run_instance_output) fh = open(runsolver_output_file, "w") experiment = load_experiment_file() # Side-effect: adds a job if it is not yet in the experiments file trial_index = get_trial_index(experiment, fold, params) experiment.set_one_fold_running(trial_index, fold) del experiment # release Experiment lock logger.debug("Calling: %s" % cmd) #sys.stdout.write(cmd + "\n") #sys.stdout.flush() process = subprocess.Popen(cmd, stdout=fh, stderr=fh, shell=True, executable="/bin/bash") logger.info( "-----------------------RUNNING RUNSOLVER----------------------------") process.wait() fh.close() cpu_time, wallclock_time, status, result, additional_data = \ parse_output_files(cfg, run_instance_output, runsolver_output_file) experiment = load_experiment_file() if status == "SAT": experiment.set_one_fold_complete(trial_index, fold, result, wallclock_time) elif status == "CRASHED" or status == "UNSAT": result = cfg.getfloat("HPOLIB", "result_on_terminate") experiment.set_one_fold_crashed(trial_index, fold, result, wallclock_time) status = "SAT" else: # TODO: We need a global stopping mechanism pass del experiment # release lock return_string = format_return_string(status, wallclock_time, 1, result, seed, additional_data) if not called_from_cv: experiment = load_experiment_file() experiment.end_cv(time.time()) del experiment logger.info(return_string) print return_string return return_string
def main(config, options, experiment_dir, experiment_directory_prefix, **kwargs): # config: Loaded .cfg file # options: Options containing seed, restore_dir, # experiment_dir: Experiment directory/Benchmark_directory # **kwargs: Nothing so far time_string = wrapping_util.get_time_string() optimizer_str = os.path.splitext(os.path.basename(__file__))[0] # Find experiment directory if options.restore: if not os.path.exists(options.restore): raise Exception("The restore directory does not exist") optimizer_dir = options.restore else: optimizer_dir = os.path.join( experiment_dir, experiment_directory_prefix + optimizer_str + "_" + str(options.seed) + "_" + time_string) # Build call cmd = build_smac_call(config, options, optimizer_dir) # Set up experiment directory if not os.path.exists(optimizer_dir): os.mkdir(optimizer_dir) # TODO: This can cause huge problems when the files are located # somewhere else? space = config.get('SMAC', "p") abs_space = os.path.abspath(space) parent_space = os.path.join(experiment_dir, optimizer_str, space) if os.path.exists(abs_space): space = abs_space elif os.path.exists(parent_space): space = parent_space else: raise Exception("SMAC search space not found. Searched at %s and " "%s" % (abs_space, parent_space)) if not os.path.exists( os.path.join(optimizer_dir, os.path.basename(space))): os.symlink(os.path.join(experiment_dir, optimizer_str, space), os.path.join(optimizer_dir, os.path.basename(space))) # Copy the smac search space and create the instance information fh = open(os.path.join(optimizer_dir, 'train.txt'), "w") for i in range(config.getint('HPOLIB', 'number_cv_folds')): fh.write(str(i) + "\n") fh.close() fh = open(os.path.join(optimizer_dir, 'test.txt'), "w") for i in range(config.getint('HPOLIB', 'number_cv_folds')): fh.write(str(i) + "\n") fh.close() fh = open(os.path.join(optimizer_dir, "scenario.txt"), "w") fh.close() logger.info( "### INFORMATION ################################################################" ) logger.info("# You're running %40s #" % config.get('SMAC', 'path_to_optimizer')) for v in version_info: logger.info("# %76s #" % v) logger.info( "# A newer version might be available, but not yet built in. #" ) logger.info( "# Please use this version only to reproduce our results on automl.org #" ) logger.info( "################################################################################" ) return cmd, optimizer_dir
def wrapAutoweka(params, **kwargs): """ This functions wraps the autoweka software to use it in the BBoM workflow """ # Look for spearmint calling wrappingAutoweka if len(params.keys()) > 700: params = reduce_params(params) print "Params:", params # The values Xmx (java memory) and the number 9000 (150 minutes) are from # the AutoWEKA paper. The 15 minutes for feature search are specified in the # parameter file # algo = "/usr/lib/jvm/java-7-openjdk-amd64/jre/bin/java" -Xmx1000m -cp # pathToAW autoweka.smac.SMACWrapper -prop datasetString=testArff= # testarff__COLONESCAPE__:type=trainTestArff__COLONESCAPE__: # trainArff=trainarff: # instanceGenerator=autoweka.instancegenerators.CrossValidation: # resultMetric=errorRate # -prop executionMode=SMAC:initialIncumbent=RANDOM:initialN=1 -wrapper # We do not necessarily need testarff, javamemory, trainingtime, pathToAW if "testarff" not in kwargs: kwargs["testarff"] = "__DUMMY__" else: kwargs["testarff"] = os.path.abspath(kwargs["testarff"]) if not os.path.exists(kwargs["testarff"]): raise ValueError("%s does not exist, please correct --testarff" % kwargs["testarff"]) if "javamemory" not in kwargs: kwargs["javamemory"] = "3000" if "trainingtime" not in kwargs: kwargs["trainingtime"] = "9000" if "pathToAw" not in kwargs: path_to_aw = os.path.join(os.path.dirname(os.path.realpath(__file__)), "/autoweka-0.5/autoweka.jar") else: path_to_aw = os.path.abspath(kwargs["pathToAw"]) if not os.path.exists(path_to_aw): raise ValueError("%s does not exist, please corect --pathToAw") # But we do need trainarff, folds, fold if "trainarff" not in kwargs: raise ValueError("Please set --trainarff") else: kwargs["trainarff"] = os.path.abspath(kwargs["trainarff"]) if not os.path.exists(kwargs["trainarff"]): raise ValueError("%s does not exist, please correct --trainarff" % kwargs["trainarff"]) if not "folds" in kwargs: raise ValueError("--folds not specified") if not "fold" in kwargs: raise ValueError("--fold not specified") # Build the command cmd = "".join(['java -Xmx%sm -cp "%s" ' % (kwargs["javamemory"], path_to_aw), "autoweka.smac.SMACWrapper -prop ", "datasetString=testArff=%s__COLONESCAPE__:" % kwargs['testarff'], "type=trainTestArff__COLONESCAPE__:", "trainArff=%s:" % kwargs['trainarff'], "instanceGenerator=autoweka.instancegenerators.CrossValidation:" "resultMetric=errorRate ", "-prop executionMode=SMAC:", "initialIncumbent=RANDOM:", "initialN=1 ", "-wrapper seed=0:", "numFolds=%s:fold=%s " % (kwargs['folds'], kwargs['fold']), "0 %s 2147483647 -1 " % kwargs["trainingtime"]]) cmd += ("%s" % (" ".join("-%s %s" % (i, params[i]) for i in params.keys()))) time_string = wrapping_util.get_time_string() output_file = os.path.join(os.getcwd(), time_string + "_autoweka.out") print "\nRunning this command:\n%s" % cmd print "Saving to %s" % output_file fh = open(output_file, "a") process = subprocess.Popen(cmd, stdout=fh, stderr=fh, shell=True, executable="/bin/bash") ret = process.wait() fh.close() # Read the run_instance output file fh = open(output_file, "r") content = fh.readlines() fh.close() result = 100 sat = '' for line in content: pos = line.find("Result for ParamILS:") if pos != -1: line = line.split(",") result = float(line[3].strip()) sat = line[0].split(":")[1].strip() break if sat == "SAT": os.remove(output_file) print result return result else: raise Exception(("Autoweka crashed, have a look at %s" % (output_file,)))
def main(): """Start an optimization of the HPOlib. For documentation see the comments inside this function and the general HPOlib documentation.""" args, unknown_arguments = use_arg_parser() if args.working_dir: experiment_dir = args.working_dir elif args.restore: args.restore = os.path.abspath(args.restore) + "/" experiment_dir = args.restore else: experiment_dir = os.getcwd() formatter = logging.Formatter( '[%(levelname)s] [%(asctime)s:%(name)s] %(' 'message)s', datefmt='%H:%M:%S') handler = logging.StreamHandler(sys.stdout) handler.setFormatter(formatter) hpolib_logger.addHandler(handler) hpolib_logger.setLevel(1) # First of all print the infodevel if IS_DEVELOPMENT: logger.critical(INFODEVEL) args, unknown_arguments = use_arg_parser() # Convert the path to the optimizer to be an absolute path, which is # necessary later when we change the working directory optimizer = args.optimizer print("opti:", optimizer) if not os.path.isabs(optimizer): relative_path = optimizer optimizer = os.path.abspath(optimizer) logger.info( "Converting relative optimizer path %s to absolute " "optimizer path %s.", relative_path, optimizer) os.chdir(experiment_dir) experiment_dir = os.getcwd() check_before_start.check_first(experiment_dir) # Now we can safely import non standard things import numpy as np import HPOlib.Experiment as Experiment # Wants numpy and scipy # Check how many optimizer versions are present and if all dependencies # are installed also dynamically load optimizer obj optimizer_version, opt_obj = check_before_start.check_optimizer(optimizer) logger.warning( "You called -o %s, I am using optimizer defined in " "%sDefault.cfg", optimizer, optimizer_version) optimizer = os.path.basename(optimizer_version) config = wrapping_util.get_configuration(experiment_dir, optimizer_version, unknown_arguments, opt_obj) # DO NOT LOG UNTIL HERE UNLESS SOMETHING DRAMATIC HAS HAPPENED!!! loglevel = config.getint("HPOLIB", "HPOlib_loglevel") hpolib_logger.setLevel(loglevel) if args.silent: hpolib_logger.setLevel(60) if args.verbose: hpolib_logger.setLevel(10) # Saving the config file is down further at the bottom, as soon as we get # hold of the new optimizer directory # wrapping_dir = os.path.dirname(os.path.realpath(__file__)) # Load optimizer try: optimizer_dir = os.path.dirname(os.path.realpath(optimizer_version)) optimizer_module = imp.load_source(optimizer_dir, optimizer_version + ".py") except (ImportError, IOError): logger.critical("Optimizer module %s not found", optimizer) import traceback logger.critical(traceback.format_exc()) sys.exit(1) # So the optimizer module can acces the seed from the config and config.set("HPOLIB", "seed", str(args.seed)) experiment_directory_prefix = config.get("HPOLIB", "experiment_directory_prefix") optimizer_call, optimizer_dir_in_experiment = \ opt_obj.main(config=config, options=args, experiment_dir=experiment_dir) # experiment_directory_prefix=experiment_directory_prefix) cmd = optimizer_call # Start the server for logging from subprocesses here, because its port must # be written to the config file. logging_host = config.get("HPOLIB", "logging_host") if logging_host: logging_receiver_thread = None default_logging_port = DEFAULT_TCP_LOGGING_PORT for logging_port in range(default_logging_port, 65535): try: logging_receiver = logging_server.LoggingReceiver( host=logging_host, port=logging_port, handler=logging_server.LogRecordStreamHandler) logging_receiver_thread = Thread( target=logging_receiver.serve_forever) logging_receiver_thread.daemon = True logger.info('%s started at %s' % (logging_receiver.__class__.__name__, logging_receiver.server_address)) logging_receiver_thread.start() break # TODO I did not find any useful documentation about which Exceptions # I should catch here... except Exception as e: logger.debug(e) logger.debug(e.message) if logging_receiver_thread is None: logger.critical( "Could not create the logging server. Going to shut " "down.") sys.exit(1) config.set("HPOLIB", "logging_port", str(logging_port)) with open(os.path.join(optimizer_dir_in_experiment, "config.cfg"), "w") as f: config.set("HPOLIB", "is_not_original_config_file", "True") wrapping_util.save_config_to_file(f, config, write_nones=True) # initialize/reload pickle file if args.restore: try: os.remove( os.path.join(optimizer_dir_in_experiment, optimizer + ".pkl.lock")) except OSError: pass folds = config.getint('HPOLIB', 'number_cv_folds') trials = Experiment.Experiment( expt_dir=optimizer_dir_in_experiment, expt_name=experiment_directory_prefix + optimizer, folds=folds, max_wallclock_time=config.get('HPOLIB', 'cpu_limit'), title=args.title) trials.optimizer = optimizer_version optimizer_output_file = os.path.join( optimizer_dir_in_experiment, optimizer + wrapping_util.get_time_string() + "_" + str(args.seed) + ".out") if args.restore: # noinspection PyBroadException try: restored_runs = optimizer_module.restore( config=config, optimizer_dir=optimizer_dir_in_experiment, cmd=cmd) except: logger.critical("Could not restore runs for %s", args.restore) import traceback logger.critical(traceback.format_exc()) sys.exit(1) logger.info("Restored %d runs", restored_runs) trials.remove_all_but_first_runs(restored_runs) fh = open(optimizer_output_file, "a") fh.write("#" * 80 + "\n" + "Restart! Restored %d runs.\n" % restored_runs) fh.close() if len(trials.endtime) < len(trials.starttime): trials.endtime.append(trials.cv_endtime[-1]) trials.starttime.append(time.time()) else: trials.starttime.append(time.time()) # noinspection PyProtectedMember trials._save_jobs() del trials sys.stdout.flush() # Run call if args.printcmd: logger.info(cmd) return 0 else: # Create a second formatter and handler to customize the optimizer # output optimization_formatter = logging.Formatter( '[%(levelname)s] [%(asctime)s:%(optimizer)s] %(message)s', datefmt='%H:%M:%S') optimization_handler = logging.StreamHandler(sys.stdout) optimization_handler.setFormatter(optimization_formatter) optimization_logger = logging.getLogger(optimizer) optimization_logger.addHandler(optimization_handler) optimizer_loglevel = config.getint("HPOLIB", "optimizer_loglevel") optimization_logger.setLevel(optimizer_loglevel) # Use a flag which is set to true as soon as all children are # supposed to be killed exit_ = wrapping_util.Exit() signal.signal(signal.SIGTERM, exit_.signal_callback) signal.signal(signal.SIGABRT, exit_.signal_callback) signal.signal(signal.SIGINT, exit_.signal_callback) signal.signal(signal.SIGHUP, exit_.signal_callback) # Change into the current experiment directory # Some optimizer might expect this dir_before_exp = os.getcwd() temporary_output_dir = config.get("HPOLIB", "temporary_output_directory") if temporary_output_dir: last_part = os.path.split(optimizer_dir_in_experiment)[1] temporary_output_dir = os.path.join(temporary_output_dir, last_part) # Replace any occurence of the path in the command cmd = cmd.replace(optimizer_dir_in_experiment, temporary_output_dir) optimizer_output_file = optimizer_output_file.replace( optimizer_dir_in_experiment, temporary_output_dir) shutil.copytree(optimizer_dir_in_experiment, temporary_output_dir) # shutil.rmtree does not work properly with NFS # https://github.com/hashdist/hashdist/issues/113 # Idea from https://github.com/ahmadia/hashdist/ for rmtree_iter in range(5): try: shutil.rmtree(optimizer_dir_in_experiment) break except OSError, e: time.sleep(rmtree_iter) optimizer_dir_in_experiment = temporary_output_dir # call target_function.setup() fn_setup = config.get("HPOLIB", "function_setup") if fn_setup: # if temporary_output_dir: # logger.critical("The options 'temporary_output_directory' " # "and 'function_setup' cannot be used " # "together.") # sys.exit(1) fn_setup_output = os.path.join(optimizer_dir_in_experiment, "function_setup.out") runsolver_cmd = runsolver_wrapper._make_runsolver_command( config, fn_setup_output) setup_cmd = runsolver_cmd + " " + fn_setup # runsolver_output = subprocess.STDOUT runsolver_output = open("/dev/null") runsolver_wrapper._run_command_with_shell(setup_cmd, runsolver_output) os.chdir(optimizer_dir_in_experiment) logger.info(cmd) output_file = optimizer_output_file fh = open(output_file, "a") cmd = shlex.split(cmd) print cmd # See man 7 credentials for the meaning of a process group id # This makes wrapping.py useable with SGEs default behaviour, # where qdel sends a SIGKILL to a whole process group # logger.info(os.getpid()) # os.setpgid(os.getpid(), os.getpid()) # same as os.setpgid(0, 0) # TODO: figure out why shell=True was removed in commit f47ac4bb3ffe7f70b795d50c0828ca7e109d2879 # maybe it has something todo with the previous behaviour where a # session id was set... proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) global child_process_pid child_process_pid = proc.pid process = psutil.Process(os.getpid()) logger.info( "-----------------------RUNNING----------------------------------") # http://stackoverflow.com/questions/375427/non-blocking-read-on-a-subprocess-pipe-in-python # How often is the experiment pickle supposed to be opened? if config.get("HPOLIB", "total_time_limit"): optimizer_end_time = time.time() + config.getint( "HPOLIB", "total_time_limit") else: optimizer_end_time = sys.float_info.max sent_SIGINT = False sent_SIGINT_time = np.inf sent_SIGTERM = False sent_SIGTERM_time = np.inf sent_SIGKILL = False sent_SIGKILL_time = np.inf children_to_kill = list() def enqueue_output(out, queue): for line in iter(out.readline, b''): queue.put(line) out.close() stderr_queue = Queue() stdout_queue = Queue() stderr_thread = Thread(target=enqueue_output, args=(proc.stderr, stderr_queue)) stdout_thread = Thread(target=enqueue_output, args=(proc.stdout, stdout_queue)) stderr_thread.daemon = True stdout_thread.daemon = True stderr_thread.start() stdout_thread.start() if not (args.verbose or args.silent): logger.info('Optimizer runs with PID: %d', proc.pid) logger.info('We start in directory %s', os.getcwd()) while True: # this implements the total runtime limit if time.time() > optimizer_end_time and not sent_SIGINT: logger.info("Reached total_time_limit, going to shutdown.") exit_.true() # necessary, otherwise HPOlib-run takes 100% of one processor time.sleep(0.25) try: while True: line = stdout_queue.get_nowait() fh.write(line) fh.flush() optimization_logger.info(line.replace("\n", ""), extra={'optimizer': optimizer}) except Empty: pass try: while True: line = stderr_queue.get_nowait() fh.write(line) fh.flush() optimization_logger.error(line.replace("\n", ""), extra={'optimizer': optimizer}) except Empty: pass ret = proc.poll() if ret is not None: # This does not include wrapping.py children = process.children() if len(children) == 0: break # TODO: what happens if we have a ret but something is still # running? if exit_.get_exit() == True and not sent_SIGINT: logger.critical("Shutdown procedure: Sending SIGINT") wrapping_util.kill_processes(signal.SIGINT) sent_SIGINT_time = time.time() sent_SIGINT = True if exit_.get_exit() == True and not sent_SIGTERM and time.time() \ > sent_SIGINT_time + 5: logger.critical("Shutdown procedure: Sending SIGTERM") wrapping_util.kill_processes(signal.SIGTERM) sent_SIGTERM_time = time.time() sent_SIGTERM = True if exit_.get_exit() == True and not sent_SIGKILL and time.time() \ > sent_SIGTERM_time + 5: logger.critical("Shutdown procedure: Sending SIGKILL") wrapping_util.kill_processes(signal.SIGKILL) sent_SIGKILL_time = time.time() sent_SIGKILL = True logger.info( "-----------------------END--------------------------------------") ret = proc.returncode logger.info("Finished with return code: %d", ret) del proc fh.close() # Change back into to directory os.chdir(dir_before_exp) # call target_function.setup() fn_teardown = config.get("HPOLIB", "function_teardown") if fn_teardown: # if temporary_output_dir: # logger.critical("The options 'temporary_output_directory' " # "and 'function_teardown' cannot be used " # "together.") # sys.exit(1) fn_teardown_output = os.path.join(optimizer_dir_in_experiment, "function_teardown.out") runsolver_cmd = runsolver_wrapper._make_runsolver_command( config, fn_teardown_output) teardown_cmd = runsolver_cmd + " " + fn_teardown # runsolver_output = subprocess.STDOUT runsolver_output = open("/dev/null") runsolver_wrapper._run_command_with_shell(teardown_cmd, runsolver_output) if temporary_output_dir: # We cannot be sure that the directory # optimizer_dir_in_experiment in dir_before_exp got deleted # properly, therefore we append an underscore to the end of the # filename last_part = os.path.split(optimizer_dir_in_experiment)[1] new_dir = os.path.join(dir_before_exp, last_part) try: shutil.copytree(optimizer_dir_in_experiment, new_dir) except OSError as e: new_dir += "_" shutil.copytree(optimizer_dir_in_experiment, new_dir) # shutil.rmtree does not work properly with NFS # https://github.com/hashdist/hashdist/issues/113 # Idea from https://github.com/ahmadia/hashdist/ for rmtree_iter in range(5): try: shutil.rmtree(optimizer_dir_in_experiment) break except OSError, e: time.sleep(rmtree_iter) optimizer_dir_in_experiment = new_dir