def __init__(self, folderpath, create_if_notexists=False): self.folderpath = folderpath self.key_to_filename = {} self.key_to_foldername = {} self.key_to_memo = {} tb_fs.create_folder(folderpath, abort_if_exists=False, create_parent_folders=create_if_notexists) # initialize the memo based on the state of the memo folder: for p in tb_fs.list_files(folderpath): name_with_ext = tb_fs.path_last_element(p) # for the files. if name_with_ext.startswith( 'file_config-') and name_with_ext.endswith('.json'): name = name_with_ext[len('file_config-'):-len('.json')] config = tb_io.read_jsonfile(p) key = self._key_from_config(config) self.key_to_filename[key] = name # for the sub-memos. elif name_with_ext.startswith( 'memo_config-') and name_with_ext.endswith('.json'): name = name_with_ext[len('memo_config-'):-len('.json')] config = tb_io.read_jsonfile(p) key = self._key_from_config(config) self.key_to_foldername[key] = name
def map_experiment_folder(experiment_folderpath, fn): fo_paths = tb_fs.list_folders( experiment_folderpath, recursive=False, use_relative_paths=False) num_exps = len( [p for p in fo_paths if tb_fs.path_last_element(p).startswith('cfg')]) ps = [] rs = [] for i in xrange(num_exps): p = tb_fs.join_paths([experiment_folderpath, 'cfg%d' % i]) rs.append(fn(p)) ps.append(p) return (ps, rs)
def summarize_results(d): i = np.argmax(d['dev_accuracy']) # additional information for inspection. d['dev_accuracy_best_epoch'] = i d['dev_accuracy_at_20'] = np.max(d['dev_accuracy'][:20]) d['cfg_name'] = tb_fs.path_last_element(d['out_folder']) d['cfg_id'] = int(d['cfg_name'].lstrip('cfg')) d['dev_accuracy'] = d['dev_accuracy'][i] d['test_accuracy'] = d['test_accuracy'][i] d['epoch_mins'] = np.mean(d['epoch_mins']) d['epoch_mbs'] = np.max(d['epoch_mbs']) del d['used_lr'] return d
def __init__(self, folderpath, create_if_notexists=False): self.folderpath = folderpath self.key_to_filename = {} tb_fs.create_folder(folderpath, abort_if_exists=False, create_parent_folders=create_if_notexists) # initialize the memo based on the state of the folder. for fpath in tb_fs.list_files(folderpath): fname_with_ext = tb_fs.path_last_element(fpath) if fname_with_ext.startswith( 'config-') and fname_with_ext.endswith('.json'): fname = fname_with_ext[len('config-'):-len('.json')] config = tb_io.read_jsonfile(fpath) key = self._key_from_config(config) self.key_to_filename[key] = fname
def create_runall_script(experiment_folderpath): fo_names = tb_fs.list_folders( experiment_folderpath, recursive=False, use_relative_paths=True) num_exps = len( [n for n in fo_names if tb_fs.path_last_element(n).startswith('cfg')]) # creating the script. sc_lines = ['#!/bin/bash'] sc_lines += [ tb_fs.join_paths([experiment_folderpath, "cfg%d" % i, 'run.sh']) for i in xrange(num_exps) ] # creating the run all script. out_filepath = tb_fs.join_paths([experiment_folderpath, 'run.sh']) tb_io.write_textfile(out_filepath, sc_lines, with_newline=True) st = os.stat(out_filepath) exec_bits = stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH os.chmod(out_filepath, st.st_mode | exec_bits)
def create_runall_script_with_parallelization(experiment_folderpath): fo_names = tb_fs.list_folders(experiment_folderpath, recursive=False, use_relative_paths=True) num_exps = len( [n for n in fo_names if tb_fs.path_last_element(n).startswith('cfg')]) # creating the script. sc_lines = [ '#!/bin/bash', 'if [ "$#" -lt 0 ] && [ "$#" -gt 3 ]; then', ' echo "Usage: run.sh [worker_id num_workers] [--force-rerun]"', ' exit 1', 'fi', 'force_rerun=0', 'if [ $# -eq 0 ] || [ $# -eq 1 ]; then', ' worker_id=0', ' num_workers=1', ' if [ $# -eq 1 ]; then', ' if [ "$1" != "--force-rerun" ]; then', ' echo "Usage: run.sh [worker_id num_workers] [--force-rerun]"', ' exit 1', ' else', ' force_rerun=1', ' fi', ' fi', 'else', ' worker_id=$1', ' num_workers=$2', ' if [ $# -eq 3 ]; then', ' if [ "$3" != "--force-rerun" ]; then', ' echo "Usage: run.sh [worker_id num_workers] [--force-rerun]"', ' exit 1', ' else', ' force_rerun=1', ' fi', ' fi', 'fi', 'if [ $num_workers -le $worker_id ] || [ $worker_id -lt 0 ]; then', ' echo "Invalid call: requires 0 <= worker_id < num_workers."', ' exit 1', 'fi' '', 'num_exps=%d' % num_exps, 'i=0', 'while [ $i -lt $num_exps ]; do', ' if [ $(($i % $num_workers)) -eq $worker_id ]; then', ' if [ ! -f %s ] || [ $force_rerun -eq 1 ]; then' % tb_fs.join_paths([experiment_folderpath, "cfg$i", 'results.json']), ' echo cfg$i', ' %s' % tb_fs.join_paths([experiment_folderpath, "cfg$i", 'run.sh']), ' fi', ' fi', ' i=$(($i + 1))', 'done' ] # creating the run all script. out_filepath = tb_fs.join_paths([experiment_folderpath, 'run.sh']) tb_io.write_textfile(out_filepath, sc_lines, with_newline=True) st = os.stat(out_filepath) exec_bits = stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH os.chmod(out_filepath, st.st_mode | exec_bits)
def create_experiment_folder( main_filepath, argname_lst, argval_lst_lst, output_folderpath_argname, all_experiments_folderpath, readme, experiment_name=None, # entry_folderpath=None, code_folderpath=None, # data_folderpath=None, capture_output=False, profile_run=False): assert tb_fs.folder_exists(all_experiments_folderpath) assert experiment_name is None or (not tb_fs.path_exists( tb_fs.join_paths([all_experiments_folderpath, experiment_name]))) # assert folder_exists(project_folderpath) and file_exists(tb_fs.join_paths([ # project_folderpath, main_relfilepath])) # create the main folder where things for the experiment will be. if experiment_name is None: experiment_name = get_available_filename(all_experiments_folderpath, "exp") experiment_folderpath = tb_fs.join_paths( [all_experiments_folderpath, experiment_name]) tb_fs.create_folder(experiment_folderpath) # copy the code to the experiment folder. if code_folderpath is not None: code_foldername = tb_fs.path_last_element(code_folderpath) dst_code_fo = tb_fs.join_paths( [experiment_folderpath, code_foldername]) tb_fs.copy_folder(code_folderpath, dst_code_fo, ignore_hidden_files=True, ignore_hidden_folders=True, ignore_file_exts=['.pyc']) # change main_filepath to use that new code. main_filepath = tb_fs.join_paths( [experiment_folderpath, main_filepath]) # NOTE: no data copying for now because it often does not make much sense. data_folderpath = None ### TODO: remove later. # # copy the code to the experiment folder. # if data_folderpath is not None: # data_foldername = path_last_element(data_folderpath) # dst_data_fo = join_paths([experiment_folderpath, data_foldername]) # copy_folder(data_folderpath, dst_data_fo, # ignore_hidden_files=True, ignore_hidden_folders=True) # write the config for the experiment. tb_io.write_jsonfile( tb_ut.subset_dict_via_selection(locals(), [ 'main_filepath', 'argname_lst', 'argval_lst_lst', 'output_folderpath_argname', 'all_experiments_folderpath', 'readme', 'experiment_name', 'code_folderpath', 'data_folderpath', 'capture_output', 'profile_run' ]), tb_fs.join_paths([experiment_folderpath, 'config.json'])) # generate the executables for each configuration. argname_lst = list(argname_lst) argname_lst.append(output_folderpath_argname) for (i, vs) in enumerate(argval_lst_lst): cfg_folderpath = tb_fs.join_paths([experiment_folderpath, "cfg%d" % i]) tb_fs.create_folder(cfg_folderpath) # create the script argvalue_lst = list(vs) argvalue_lst.append(cfg_folderpath) call_args = tb_ut.subset_dict_via_selection( locals(), ['argname_lst', 'argvalue_lst', 'main_filepath']) call_args['script_filepath'] = tb_fs.join_paths( [cfg_folderpath, 'run.sh']) if capture_output: call_args['output_filepath'] = tb_fs.join_paths( [cfg_folderpath, 'output.txt']) if profile_run: call_args['profile_filepath'] = tb_fs.join_paths( [cfg_folderpath, 'profile.txt']) create_run_script(**call_args) # write a config file for each configuration tb_io.write_jsonfile(tb_ut.create_dict(argname_lst, argvalue_lst), tb_fs.join_paths([cfg_folderpath, 'config.json'])) # create_runall_script(experiment_folderpath) create_runall_script_with_parallelization(experiment_folderpath) return experiment_folderpath