if correct_folder: del_inds.append(j) cmd_path = dir_path break for j in sorted(del_inds, reverse=True): del result_dirs[j] if cmd_path is None: warn('Command %d could not be matched to a result directory.' % i) missing_jobs.append(cmd) continue ### Read performance Summary. try: performance_dict = hpsearch._SUMMARY_PARSER_HANDLE(cmd_path, i) except: #traceback.print_exc(file=sys.stdout) warn('Could not read performance summary from command %d.' % i) missing_jobs.append(cmd) to_be_deleted.append(cmd_path) continue # Delete underperforming runs. if args.performance_criteria is not None: if not hpsearch._PERFORMANCE_EVAL_HANDLE( performance_dict, args.performance_criteria): to_be_deleted.append(cmd_path) # Check whether jobs have finished properly. assert ('finished' in performance_dict)
def run(ref_module, results_dir='./out/random_seeds', config=None, ignore_kwds=None, forced_params=None): """Run the script. Args: ref_module (str): Name of the reference module which contains the hyperparameter search config that can be modified to gather random seeds. results_dir (str, optional): The path where to store the results. config: The Namespace object containing argument names and values. If provided, all random seeds will be gathered from zero, with no reference run. ignore_kwds (list, optional): The list of keywords in the config file to exclude from the grid. forced_params (dict, optional): Dict of key-value pairs specifying hyperparameter values that should be fixed across runs """ if ignore_kwds is None: ignore_kwds = [] if forced_params is None: forced_params = {} ### Parse the command-line arguments. parser = argparse.ArgumentParser(description= \ 'Gathering random seeds for the specified experiment.') parser.add_argument('--out_dir', type=str, default='', help='The output directory of the run or runs. ' + 'For single runs, the configuration will be ' + 'loaded and run with different seeds.' + 'For multiple runs, i.e. results of ' + 'hyperparameter searches, the configuration ' + 'leading to the best mean final accuracy ' + 'will be selected and run with different seeds. ' + 'Default: %(default)s.') parser.add_argument('--config_name', type=str, default='hpsearch_random_seeds.py', help='The name of the hpsearch config file. Since ' + 'multiple random seed gathering experiments ' + 'might be running in parallel, it is important ' + 'that this file has a unique name for each ' + 'experiment. Default: %(default)s.') parser.add_argument('--config_pickle', type=str, default='', help='The path to a pickle file containing a run ' + ' config that will be loaded.') parser.add_argument('--num_seeds', type=int, default=10, help='The number of different random seeds.') # FIXME `None` is not a valid default value. parser.add_argument('--seeds_list', type=str, default=None, help='The list of seeds to use. If specified, ' + '"num_seeds" will be ignored.') parser.add_argument('--vary_data_seed', action='store_true', help='If activated, "data_random_seed"s are set ' + 'equal to "random_seed"s. Otherwise only ' + '"random_seed"s are varied.') parser.add_argument('--num_tot_hours', type=int, metavar='N', default=120, help='If "run_cluster" is activated, then this ' + 'option determines the maximum number of hours ' + 'the entire search may run on the cluster. ' + 'Default: %(default)s.') # FIXME Arguments below are copied from hpsearch. parser.add_argument('--run_cluster', action='store_true', help='This option would produce jobs for a GPU ' + 'cluser running a job scheduler (see option ' + '"scheduler".') parser.add_argument('--scheduler', type=str, default='lsf', choices=['lsf', 'slurm'], help='The job scheduler used on the cluster. ' + 'Default: %(default)s.') parser.add_argument('--num_jobs', type=int, metavar='N', default=8, help='If "run_cluster" is activated, then this ' + 'option determines the maximum number of jobs ' + 'that can be submitted in parallel. ' + 'Default: %(default)s.') parser.add_argument('--num_hours', type=int, metavar='N', default=24, help='If "run_cluster" is activated, then this ' + 'option determines the maximum number of hours ' + 'a job may run on the cluster. ' + 'Default: %(default)s.') parser.add_argument('--resources', type=str, default='"rusage[mem=8000, ngpus_excl_p=1]"', help='If "run_cluster" is activated and "scheduler" ' + 'is "lsf", then this option determines the ' + 'resources assigned to job in the ' + 'hyperparameter search (option -R of bsub). ' + 'Default: %(default)s.') parser.add_argument('--slurm_mem', type=str, default='8G', help='If "run_cluster" is activated and "scheduler" ' + 'is "slurm", then this value will be passed as ' + 'argument "mem" of "sbatch". An empty string ' + 'means that "mem" will not be specified. ' + 'Default: %(default)s.') parser.add_argument('--slurm_gres', type=str, default='gpu:1', help='If "run_cluster" is activated and "scheduler" ' + 'is "slurm", then this value will be passed as ' + 'argument "gres" of "sbatch". An empty string ' + 'means that "gres" will not be specified. ' + 'Default: %(default)s.') parser.add_argument('--slurm_partition', type=str, default='', help='If "run_cluster" is activated and "scheduler" ' + 'is "slurm", then this value will be passed as ' + 'argument "partition" of "sbatch". An empty ' + 'string means that "partition" will not be ' + 'specified. Default: %(default)s.') parser.add_argument('--slurm_qos', type=str, default='', help='If "run_cluster" is activated and "scheduler" ' + 'is "slurm", then this value will be passed as ' + 'argument "qos" of "sbatch". An empty string ' + 'means that "qos" will not be specified. ' + 'Default: %(default)s.') parser.add_argument('--slurm_constraint', type=str, default='', help='If "run_cluster" is activated and "scheduler" ' + 'is "slurm", then this value will be passed as ' + 'argument "constraint" of "sbatch". An empty ' + 'string means that "constraint" will not be ' + 'specified. Default: %(default)s.') parser.add_argument('--visible_gpus', type=str, default='', help='If "run_cluster" is NOT activated, then this ' + 'option determines the CUDA devices visible to ' + 'the hyperparameter search. A string of comma ' + 'separated integers is expected. If the list is ' + 'empty, then all GPUs of the machine are used. ' + 'The relative memory usage is specified, i.e., ' + 'a number between 0 and 1. If "-1" is given, ' + 'the jobs will be executed sequentially and not ' + 'assigned to a particular GPU. ' + 'Default: %(default)s.') parser.add_argument('--allowed_load', type=float, default=0.5, help='If "run_cluster" is NOT activated, then this ' + 'option determines the maximum load a GPU may ' + 'have such that another process may start on ' + 'it. The relative load is specified, i.e., a ' + 'number between 0 and 1. Default: %(default)s.') parser.add_argument('--allowed_memory', type=float, default=0.5, help='If "run_cluster" is NOT activated, then this ' + 'option determines the maximum memory usage a ' + 'GPU may have such that another process may ' + 'start on it. Default: %(default)s.') parser.add_argument('--sim_startup_time', type=int, metavar='N', default=60, help='If "run_cluster" is NOT activated, then this ' + 'option determines the startup time of ' + 'simulations. If a job was assigned to a GPU, ' + 'then this time (in seconds) has to pass before ' + 'options "allowed_load" and "allowed_memory" ' + 'are checked to decide whether a new process ' + 'can be send to a GPU.Default: %(default)s.') parser.add_argument('--max_num_jobs_per_gpu', type=int, metavar='N', default=1, help='If "run_cluster" is NOT activated, then this ' + 'option determines the maximum number of jobs ' + 'per GPU that can be submitted in parallel. ' + 'Note, this script does not validate whether ' + 'other processes are already assigned to a GPU. ' + 'Default: %(default)s.') cmd_args = parser.parse_args() out_dir = cmd_args.out_dir if cmd_args.out_dir == '' and cmd_args.config_pickle != '': with open(cmd_args.config_pickle, "rb") as f: config = pickle.load(f) # Either a config or an experiment folder need to be provided. assert config is not None or cmd_args.out_dir != '' if cmd_args.out_dir == '': out_dir = config.out_dir # Make sure that the provided hpsearch config file name does not exist. config_name = cmd_args.config_name if config_name[-3:] != '.py': config_name = config_name + '.py' if os.path.exists(config_name): overwrite = input('The config file "%s" '% config_name + \ 'already exists! Do you want to overwrite the file? [y/n] ') if not overwrite in ['yes', 'y', 'Y']: exit() # The following ensures that we can safely use `basename` later on. out_dir = os.path.normpath(out_dir) ### Create directory for results. if not os.path.exists(results_dir): os.makedirs(results_dir) # Define a subfolder for the current random seed runs. results_dir = os.path.join(results_dir, os.path.basename(out_dir)) print('Random seeds will be gathered in folder %s.' % results_dir) if os.path.exists(results_dir): # If random seeds have been gathered already, simply get the results for # publication. write_seeds_summary(results_dir) raise RuntimeError('Output directory %s already exists! ' %results_dir+\ 'seems like random seeds already have been gathered.') ### Get the experiments config. num_seeds = cmd_args.num_seeds if config is None: # Check if the current directory corresponds to a single run or not. # FIXME quick and dirty solution to figure out, whether it's a single # run. single_run = False if not os.path.exists(os.path.join(out_dir, 'search_results.csv')) \ and not os.path.exists(os.path.join(out_dir, \ 'postprocessing_results.csv')): single_run = True # Get the configuration. if single_run: config = get_single_run_config(out_dir) best_out_dir = out_dir else: config, best_out_dir = get_hpsearch_config(out_dir) # Since we already have a reference run, we can run one seed less. num_seeds -= 1 if cmd_args.seeds_list is not None: seeds_list = misc.str_to_ints(cmd_args.seeds_list) cmd_args.num_seeds = len(seeds_list) else: seeds_list = list(range(num_seeds)) # Replace config values provided via `forced_params`. if len(forced_params.keys()) > 0: for kwd, value in forced_params.items(): setattr(config, kwd, value) ### Write down the hp search grid module in its own file. ref_module_basename = ref_module[[i for i,e in \ enumerate(ref_module) if e == '.'][-1]+1:] ref_module_path = ref_module[:[i for i,e in \ enumerate(ref_module) if e == '.'][-1]+1] shutil.copy(ref_module_basename + '.py', config_name) # Define the kwds to be added to the grid. kwds = list(vars(config).keys()) for kwd in ignore_kwds: if kwd in kwds: kwds.remove(kwd) # Remove old grid and write new grid, and remove conditions. grid_loc = delete_object_from_text(config_name, 'grid', '{', '}') random_seeds = write_new_grid_to_text(config_name, config, grid_loc, \ seeds_list, cmd_args, kwds=kwds) cond_loc = delete_object_from_text(config_name, 'conditions', \ '[', ']') write_new_conditions_to_text(config_name, cond_loc, random_seeds, cmd_args) ### Run the hpsearch code with different random seeds. hpsearch_module = ref_module_path + config_name[:-3] cmd_str = get_command_line(hpsearch_module, results_dir, cmd_args) print(cmd_str) if cmd_args.run_cluster and cmd_args.scheduler == 'slurm': # FIXME hacky solution to write SLURM job script. # FIXME might be wrong to give the same `slurm_qos` to the hpsearch, # as the job might have to run much longer. job_script_fn = hpsearch._write_slurm_script( Namespace( **{ 'num_hours': cmd_args.num_tot_hours, 'slurm_mem': '8G', 'slurm_gres': '', 'slurm_partition': cmd_args.slurm_partition, 'slurm_qos': cmd_args.slurm_qos, 'slurm_constraint': cmd_args.slurm_constraint, }), cmd_str, 'random_seeds') cmd_str = 'sbatch %s' % job_script_fn print('We will execute command "%s".' % cmd_str) # Execute the program. print('Starting gathering random seeds...') ret = call(cmd_str, shell=True, executable='/bin/bash') print('Call finished with return code %d.' % ret) ### Add results of the reference run to our results folder. new_best_out_dir = os.path.join(results_dir, os.path.basename(out_dir)) copy_tree(best_out_dir, new_best_out_dir) ### Store results of given run in CSV file. # FIXME Extremely ugly solution. imported_grid_module = importlib.import_module(hpsearch_module) hpsearch._read_config(imported_grid_module) results_file = os.path.join(results_dir, 'search_results.csv') cmd_dict = dict() for k in kwds: cmd_dict[k] = getattr(config, k) # Get training results. performance_dict = hpsearch._SUMMARY_PARSER_HANDLE(new_best_out_dir, -1) for k, v in performance_dict.items(): cmd_dict[k] = v # Create or update the CSV file summarizing all runs. panda_frame = pd.DataFrame.from_dict(cmd_dict) if os.path.isfile(results_file): old_frame = pd.read_csv(results_file, sep=';') panda_frame = pd.concat([old_frame, panda_frame], sort=True) panda_frame.to_csv(results_file, sep=';', index=False) # Create a text file aggregating all results for publication. write_seeds_summary(results_dir)
def write_seeds_summary(results_dir, summary_keys, summary_sem, summary_precs, ret_seeds=False, summary_fn=None, seeds_summary_fn='seeds_summary_text.txt'): """Write the MEAN and STD (resp. SEM) while aggregating all seeds to text file. Args: results_dir (str): The results directory. summary_keys (list): See argument ``summary_keys`` of function :func:`run`. summary_sem (bool): See argument ``summary_sem`` of function :func:`run`. summary_precs (list or int, optional): See argument ``summary_precs`` of function :func:`run`. summary_fn (str, optional): If given, this will determine the name of the summary file within individual runs. seeds_summmary_fn (str, optional): The name to give to the summary file across all seeds. ret_seeds (bool, optional): If activated, the random seeds of all considered runs are returned as a list. """ random_seeds = [] if summary_precs is None: summary_precs = 2 if isinstance(summary_precs, int): summary_precs = [summary_precs] * len(summary_keys) else: assert len(summary_keys) == len(summary_precs) # Iterate over all result folders. score_dict = {} n_scores = 0 for k in summary_keys: score_dict[k] = [] seed_dirs = [] seed_dir_prefix = {} for i, sim_dir in enumerate(os.listdir(results_dir)): sim_path = os.path.join(results_dir, sim_dir) if not os.path.isdir(sim_path): continue try: performance_dict = hpsearch._SUMMARY_PARSER_HANDLE( sim_path, i, summary_fn=summary_fn) except: warn('Cannot read results from simulation "%s"!' % sim_dir) continue has_finished = int(performance_dict['finished'][0]) if not has_finished: warn('Simulation "%s" did not finish!' % sim_dir) continue n_scores += 1 for k in summary_keys: score_dict[k].append(float(performance_dict[k][0])) if ret_seeds: sim_config = get_single_run_config(sim_path) random_seeds.append(sim_config.random_seed) seed_dirs.append(sim_path) if sim_dir.count('_') == 2: prefix = sim_dir[:sim_dir.rfind('_')] if prefix not in seed_dir_prefix.keys(): seed_dir_prefix[prefix] = [sim_path, 1] else: seed_dir_prefix[prefix][1] += 1 else: seed_dir_prefix[sim_dir] = [sim_path, 1] # In case the gathering has been started from an existing seed, we can try # to determine its path. Note, this might turn out difficult if only single # seeds have been gathered or the seed gathering has been started multiple # times. original_seed_path = None nunique = 0 for k, v in seed_dir_prefix.items(): if v[1] == 1: original_seed_path = v[0] nunique += 1 if nunique > 1: original_seed_path = None if n_scores == 0: raise RuntimeError('No results found!') score_means = {} score_devs = {} # Get averages across seeds. for k in summary_keys: score_means[k] = np.mean(score_dict[k]) score_devs[k] = np.std(score_dict[k]) if summary_sem: score_devs[k] /= np.sqrt(n_scores) # Write into a summary text file. filename = os.path.join(results_dir, seeds_summary_fn) with open(filename, "w") as f: for i, k in enumerate(summary_keys): p = summary_precs[i] f.write(('%s (mean +/- %s): %.'+str(p)+'f +- %.'+str(p)+'f\n') % \ (k, 'sem' if summary_sem else 'std', score_means[k], score_devs[k])) f.write('Number of seeds: %i \n\n' % n_scores) f.write('Publication tables style: \n') f.write('%s \n' % summary_keys) tab_str = '' for i, k in enumerate(summary_keys): if i > 0: tab_str += ' & ' p = summary_precs[i] tab_str += ('%.'+str(p)+'f $\pm$ %.'+str(p)+'f ') \ % (score_means[k], score_devs[k]) f.write('%s \n\n' % tab_str) return random_seeds if ret_seeds else None
def run(grid_module=None, results_dir='./out/random_seeds', config=None, ignore_kwds=None, forced_params=None, summary_keys=None, summary_sem=False, summary_precs=None, hpmod_path=None): """Run the script. Args: grid_module (str, optional): Name of the reference module which contains the hyperparameter search config that can be modified to gather random seeds. results_dir (str, optional): The path where the hpsearch should store its results. config: The Namespace object containing argument names and values. If provided, all random seeds will be gathered from zero, with no reference run. ignore_kwds (list, optional): A list of keywords in the config file to exclude from the grid. forced_params (dict, optional): Dict of key-value pairs specifying hyperparameter values that should be fixed across runs. summary_keys (list, optional): If provided, those mean and std of those summary keys will be written by function :func:`write_seeds_summary`. Otherwise, the performance key defined in ``grid_module`` will be used. summary_sem (bool): Whether SEM or SD should be calculated in function :func:`write_seeds_summary`. summary_precs (list or int, optional): The precision with which the summary statistics according to ``summary_keys`` should be listed. hpmod_path (str, optional): If the hpsearch doesn't reside in the same directory as the calling script, then we need to know from where to start the hpsearch. """ if ignore_kwds is None: ignore_kwds = [] if forced_params is None: forced_params = {} ### Parse the command-line arguments. parser = argparse.ArgumentParser(description= \ 'Gathering random seeds for the specified experiment.') parser.add_argument('--seeds_dir', type=str, default='', help='If provided, all other arguments (except ' + '"grid_module") are ignored! ' + 'This is supposed to be the output folder of a ' + 'random seed gathering experiment. If provided, ' + 'the results (for different seeds) within this ' + 'directory are gathered and written to a human-' + 'readible text file.') parser.add_argument('--run_dir', type=str, default='', help='The output directory of a simulation or a ' + 'hyperparameter search. ' 'For single runs, the configuration will be ' + 'loaded and run with different seeds.' + 'For multiple runs, i.e. results of ' + 'hyperparameter searches, the configuration ' + 'leading to the best performance will be ' + 'selected and run with different seeds.') parser.add_argument('--config_name', type=str, default='hpsearch_random_seeds', help='A name for this call of gathering random ' + 'seeds. As multiple gatherings might be running ' + 'in parallel, it is important that this name is ' + 'unique name for each experiment. ' + 'Default: %(default)s.') parser.add_argument('--grid_module', type=str, default=grid_module, help='See CLI argument "grid_module" of ' + 'hyperparameter search script "hpsearch". ' + ('Default: %(default)s.' \ if grid_module is not None else '')) parser.add_argument('--num_seeds', type=int, default=10, help='The number of different random seeds.') parser.add_argument('--seeds_list', type=str, default='', help='The list of seeds to use. If specified, ' + '"num_seeds" will be ignored.') parser.add_argument('--vary_data_seed', action='store_true', help='If activated, "data_random_seed"s are set ' + 'equal to "random_seed"s. Otherwise only ' + '"random_seed"s are varied.') parser.add_argument('--start_gathering', action='store_true', help='If activated, the actual gathering of random ' + 'seeds is started via the "hpsearch.py" script.') # Arguments only required if `start_gathering`. hpgroup = parser.add_argument_group('Hpsearch call options') hpgroup.add_argument('--hps_num_hours', type=int, metavar='N', default=24, help='If "run_cluster" is activated, then this ' + 'option determines the maximum number of hours ' + 'the entire search may run on the cluster. ' + 'Default: %(default)s.') hpgroup.add_argument( '--hps_resources', type=str, default='"rusage[mem=8000]"', help='If "run_cluster" is activated and "scheduler" ' + 'is "lsf", then this option determines the ' + 'resources assigned to the entire ' + 'hyperparameter search (option -R of bsub). ' + 'Default: %(default)s.') hpgroup.add_argument('--hps_slurm_mem', type=str, default='8G', help='See option "slum_mem". This argument effects ' + 'hyperparameter search itself. ' 'Default: %(default)s.') rsgroup = parser.add_argument_group('Random seed hpsearch options') hpsearch.hpsearch_cli_arguments(rsgroup, show_out_dir=False, show_grid_module=False) cmd_args = parser.parse_args() grid_module = cmd_args.grid_module if grid_module is None: raise ValueError('"grid_module" needs to be specified.') grid_module = importlib.import_module(grid_module) hpsearch._read_config(grid_module, require_perf_eval_handle=True) if summary_keys is None: summary_keys = [hpsearch._PERFORMANCE_KEY] #################################################### ### Aggregate results of random seed experiments ### #################################################### if len(cmd_args.seeds_dir): print('Writing seed summary ...') write_seeds_summary(cmd_args.seeds_dir, summary_keys, summary_sem, summary_precs) exit(0) ####################################################### ### Create hp config grid for random seed gathering ### ####################################################### if len(cmd_args.seeds_list) > 0: seeds_list = misc.str_to_ints(cmd_args.seeds_list) cmd_args.num_seeds = len(seeds_list) else: seeds_list = list(range(cmd_args.num_seeds)) if config is not None and cmd_args.run_dir != '': raise ValueError('"run_dir" may not be specified if configuration ' + 'is provided directly.') # The directory in which the hpsearch results should be written. Will only # be specified if the `config` is read from a finished simulation. hpsearch_dir = None # Get config if not provided. if config is None: if not os.path.exists(cmd_args.run_dir): raise_error = True # FIXME hacky solution. if cmd_args.run_cwd != '': tmp_dir = os.path.join(cmd_args.run_cwd, cmd_args.run_dir) if os.path.exists(tmp_dir): cmd_args.run_dir = tmp_dir raise_error = False if raise_error: raise ValueError('Directory "%s" does not exist!' % \ cmd_args.run_dir) # FIXME A bit of a shady decision. single_run = False if os.path.exists(os.path.join(cmd_args.run_dir, 'config.pickle')): single_run = True # Get the configuration. if single_run: config = get_single_run_config(cmd_args.run_dir) run_dir = cmd_args.run_dir else: config, run_dir = get_best_hpsearch_config(cmd_args.run_dir) # We should already have one random seed. try: performance_dict = hpsearch._SUMMARY_PARSER_HANDLE(run_dir, -1) has_finished = int(performance_dict['finished'][0]) if not has_finished: raise Exception() use_run = True except: use_run = False if use_run: # The following ensures that we can safely use `basename` later on. run_dir = os.path.normpath(run_dir) if not os.path.isabs(results_dir): if os.path.isdir(cmd_args.run_cwd): results_dir = os.path.join(cmd_args.run_cwd, results_dir) results_dir = os.path.abspath(results_dir) hpsearch_dir = os.path.join(results_dir, os.path.basename(run_dir)) if os.path.exists(hpsearch_dir): # TODO attempt to write summary and exclude existing seeds. warn('Folder "%s" already exists.' % hpsearch_dir) print('Attempting to aggregate random seed results ...') gathered_seeds = write_seeds_summary(hpsearch_dir, summary_keys, summary_sem, summary_precs, ret_seeds=True) if len(gathered_seeds) >= len(seeds_list): print('Already enough seeds have been gathered!') exit(0) for gs in gathered_seeds: if gs in seeds_list: seeds_list.remove(gs) else: ignored_seed = seeds_list.pop() if len(cmd_args.seeds_list) > 0: print('Seed %d is ignored as seed %d already ' \ % (ignored_seed, gs) + 'exists.') else: os.makedirs(hpsearch_dir) # We utilize the already existing random seed. shutil.copytree( run_dir, os.path.join(hpsearch_dir, os.path.basename(run_dir))) if config.random_seed in seeds_list: seeds_list.remove(config.random_seed) else: ignored_seed = seeds_list.pop() if len(cmd_args.seeds_list) > 0: print('Seed %d is ignored as seed %d already exists.' \ % (ignored_seed, config.random_seed)) print('%d random seeds will be gathered!' % len(seeds_list)) ### Which attributes of the `config` should be ignored? # We never set the ouput directory. if hpsearch._OUT_ARG not in ignore_kwds: ignore_kwds.append(hpsearch._OUT_ARG) for kwd in ignore_kwds: delattr(config, kwd) ### Replace config values provided via `forced_params`. if len(forced_params.keys()) > 0: for kwd, value in forced_params.items(): setattr(config, kwd, value) ### Get a filename for where to store the search grid. config_dn, config_bn = os.path.split(cmd_args.config_name) if len(config_dn) == 0: # No relative path given, store only temporary. config_dn = tempfile.gettempdir() else: config_dn = os.path.abspath(config_dn) config_fn_prefix = os.path.splitext(config_bn)[0] config_name = os.path.join(config_dn, config_fn_prefix + '.pickle') if os.path.exists(config_name): if len(config_dn) > 0: overwrite = input('The config file "%s" ' % config_name + \ 'already exists! Do you want to overwrite the file? [y/n] ') if not overwrite in ['yes', 'y', 'Y']: exit(1) else: # Get random temporary filename. config_name_temp = tempfile.NamedTemporaryFile( \ prefix=config_fn_prefix, suffix=".pickle") print('Search grid "%s" already exists, using name "%s" instead!' \ % (config_name, config_name_temp.name)) config_name = config_name_temp.name config_name_temp.close() ### Build and store hpconfig for random seed gathering! grid, conditions = build_grid_and_conditions(cmd_args, config, seeds_list) rseed_config = {'grid': grid, 'conditions': conditions} with open(config_name, 'wb') as f: pickle.dump(rseed_config, f) ### Gather random seeds. if cmd_args.start_gathering: cmd_str = get_hpsearch_call(cmd_args, len(seeds_list), config_name, hpsearch_dir=hpsearch_dir) print(cmd_str) ### Start hpsearch. if hpmod_path is not None: backup_curr_path = os.getcwd() os.chdir(hpmod_path) if cmd_args.run_cluster and cmd_args.scheduler == 'slurm': # FIXME hacky solution to write SLURM job script. # FIXME might be wrong to give the same `slurm_qos` to the hpsearch, # as the job might have to run much longer. job_script_fn = hpsearch._write_slurm_script( Namespace( **{ 'num_hours': cmd_args.hps_num_hours, 'slurm_mem': cmd_args.hps_slurm_mem, 'slurm_gres': 'gpu:0', 'slurm_partition': cmd_args.slurm_partition, 'slurm_qos': cmd_args.slurm_qos, 'slurm_constraint': cmd_args.slurm_constraint, }), cmd_str, 'random_seeds') cmd_str = 'sbatch %s' % job_script_fn print('We will execute command "%s".' % cmd_str) # Execute the program. print('Starting gathering random seeds...') ret = call(cmd_str, shell=True, executable='/bin/bash') print('Call finished with return code %d.' % ret) if hpmod_path is not None: os.chdir(backup_curr_path) # If we run the hpsearch on the cluster, then we just submitted a job # and the search didn't actually run yet. if not cmd_args.run_cluster and hpsearch_dir is not None: write_seeds_summary(hpsearch_dir, summary_keys, summary_sem, summary_precs) print('Random seed gathering finished successfully!') exit(0) ### Random seeds not gathered yet - finalize program. print(hpsearch_dir is None) if hpsearch_dir is not None: print('IMPORTANT: At least one random seed has already been ' + \ 'gathered! Please ensure that the hpsearch forces the correct ' + 'output path.') print('Below is a possible hpsearch call:') call_appendix = '' if hpsearch_dir is not None: call_appendix = '--force_out_dir --dont_force_new_dir ' + \ '--out_dir=%s' % hpsearch_dir print() print('python3 hpsearch.py --grid_module=%s --grid_config=%s %s' % \ (cmd_args.grid_module, config_name, call_appendix)) print() # We print the individual paths to allow easy parsing via `awk` and `xargs`. if hpsearch_dir is None: print('Below is the "grid_module" name and the path to the ' + '"grid_config".') print(cmd_args.grid_module, config_name) else: print( 'Below is the "grid_module" name, the path to the ' + '"grid_config" and the output path that should be used for the ' + 'hpsearch.') print(cmd_args.grid_module, config_name, hpsearch_dir)
def get_best_hpsearch_config(out_dir): """Load the config file from the best run of a hyperparameter search. This file loads the results of the hyperparameter search, and select the configuration that lead to the best performance score. Args: out_dir (str): The path to the hpsearch result folder. Returns: (tuple): Tuple containing: - **config**: The config of the best run. - **best_out_dir**: The path to the best run. """ run_dirs = os.listdir(out_dir) if 'TO_BE_DELETED' in run_dirs: # Postprocessing has marked some runs for deletion! run_dirs.remove('TO_BE_DELETED') run_dirs.extend(os.listdir(os.path.join(out_dir, 'TO_BE_DELETED'))) curr_best_dir = None curr_best_score = None for i, sim_dir in enumerate(run_dirs): sim_path = os.path.join(run_dirs, sim_dir) if not os.path.isdir(sim_path): continue if not os.path.exists( os.path.join(sim_path, hpsearch._SUMMARY_FILENAME)): # No result summary in folder. continue try: performance_dict = hpsearch._SUMMARY_PARSER_HANDLE(sim_path, i) except: #warn('Cannot read results from simulation "%s"!' % sim_dir) continue has_finished = int(performance_dict['finished'][0]) if not has_finished: #warn('Simulation "%s" did not finish!' % sim_dir) continue curr_score = float(performance_dict[hpsearch._PERFORMANCE_KEY][0]) if curr_best_dir is None: curr_best_dir = sim_path curr_best_score = curr_score elif hpsearch._PERFORMANCE_SORT_ASC: if curr_score < curr_best_score: curr_best_dir = sim_path curr_best_score = curr_score else: if curr_score > curr_best_score: curr_best_dir = sim_path curr_best_score = curr_score if curr_best_dir is None: raise RuntimeError('Did not find any finished run!') return get_single_run_config(curr_best_dir), curr_best_dir