def create_run_script( main_filepath, argname_lst, argvalue_lst, script_filepath, # entry_folderpath=None, output_filepath=None, profile_filepath=None): sc_lines = ['#!/bin/bash', 'set -e'] # # change into the entry folder if provided. # if entry_folderpath is not None: # sc_lines += ['cd %s' % entry_folderpath] # call the main function. sc_lines += generate_call_lines( **tb_ut.subset_dict_via_selection(locals(), [ 'main_filepath', 'argname_lst', 'argvalue_lst', 'output_filepath', 'profile_filepath' ])) # change back to the previous folder if I change to some other folder. # if entry_folderpath is not None: # sc_lines += ['cd -'] tb_io.write_textfile(script_filepath, sc_lines, with_newline=True) # add run permissions. st = os.stat(script_filepath) exec_bits = stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH os.chmod(script_filepath, st.st_mode | exec_bits)
def write_server_run_script(): assert servertype == 'bridges' # NOTE: to edit according to the configuration needed. jobname = jobtype time_budget_in_hours = 48 # max 48 hours mem_budget_in_gb = 16 partition_name = 'GPU-shared' num_cpus = 1 # probably ask a CPU for each GPU (or more if you have data loaders) num_gpus = 1 # up to 4 if k80, up to 2 if p100 gpu_type = 'k80' # in ['k80', 'p100'] script_header = [ '#!/bin/bash', '#SBATCH --nodes=1', '#SBATCH --partition=%s' % partition_name, '#SBATCH --cpus-per-task=%d' % num_cpus, '#SBATCH --gres=gpu:%s:%d' % (gpu_type, num_gpus), '#SBATCH --mem=%dM' % tb_rs.convert_between_byte_units( mem_budget_in_gb, src_units='gigabytes', dst_units='megabytes'), '#SBATCH --time=%d' % tb_lg.convert_between_time_units( time_budget_in_hours, src_units='hours', dst_units='minutes'), '#SBATCH --job-name=%s' % jobname, ] # NOTE: changes to the environment can be put in the run script. script_body = [ 'module load tensorflow/1.5_gpu', 'PYTHONPATH=%s:$PYTHONPATH' % remote_folderpath, 'python -u %s > log_%s.txt' % (main_relfilepath, jobname) ] script_filepath = tb_fs.join_paths([local_folderpath, "run.sh"]) tb_io.write_textfile(script_filepath, script_header + [''] + script_body) subprocess.check_output(['chmod', '+x', script_filepath])
def create_project_folder(folderpath, project_name, initialize_git_repo=False): fn = lambda xs: tb_fs.join_paths([folderpath, project_name] + xs) tb_fs.create_folder(fn([])) # typical directories tb_fs.create_folder(fn([project_name])) tb_fs.create_folder(fn(["analyses"])) tb_fs.create_folder(fn(["data"])) tb_fs.create_folder(fn(["experiments"])) tb_fs.create_folder(fn(["notes"])) tb_fs.create_folder(fn(["temp"])) # code files (in order): data, preprocessing, model definition, model training, # model evaluation, main to generate the results with different relevant # parameters, setting up different experiments, analyze the results and # generate plots and tables. tb_fs.create_file(fn([project_name, "__init__.py"])) tb_fs.create_file(fn([project_name, "data.py"])) tb_fs.create_file(fn([project_name, "preprocess.py"])) tb_fs.create_file(fn([project_name, "model.py"])) tb_fs.create_file(fn([project_name, "train.py"])) tb_fs.create_file(fn([project_name, "evaluate.py"])) tb_fs.create_file(fn([project_name, "main.py"])) tb_fs.create_file(fn([project_name, "experiment.py"])) tb_fs.create_file(fn([project_name, "analyze.py"])) # add an empty script that can be used to download data. tb_fs.create_file(fn(["data", "download_data.py"])) # common notes to keep around. tb_fs.create_file(fn(["notes", "journal.txt"])) tb_fs.create_file(fn(["notes", "reading_list.txt"])) tb_fs.create_file(fn(["notes", "todos.txt"])) # placeholders tb_io.write_textfile(fn(["experiments", "readme.txt"]), ["All experiments will be placed under this folder."]) tb_io.write_textfile(fn(["temp", "readme.txt"]), [ "Here lie temporary files that are relevant or useful for the project " "but that are not kept under version control." ]) tb_io.write_textfile(fn(["analyses", "readme.txt"]), [ "Here lie files containing information extracted from the " "results of the experiments. Tables and plots are typical examples." ]) # typical git ignore file. tb_io.write_textfile( fn([".gitignore"]), ["data", "experiments", "temp", "*.pyc", "*.pdf", "*.aux"]) if initialize_git_repo: subprocess.call("cd %s && git init && git add -f .gitignore * && " "git commit -a -m \"Initial commit for %s.\" && cd -" % (fn([]), project_name), shell=True)
def create_runall_script(experiment_folderpath): fo_names = tb_fs.list_folders( experiment_folderpath, recursive=False, use_relative_paths=True) num_exps = len( [n for n in fo_names if tb_fs.path_last_element(n).startswith('cfg')]) # creating the script. sc_lines = ['#!/bin/bash'] sc_lines += [ tb_fs.join_paths([experiment_folderpath, "cfg%d" % i, 'run.sh']) for i in xrange(num_exps) ] # creating the run all script. out_filepath = tb_fs.join_paths([experiment_folderpath, 'run.sh']) tb_io.write_textfile(out_filepath, sc_lines, with_newline=True) st = os.stat(out_filepath) exec_bits = stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH os.chmod(out_filepath, st.st_mode | exec_bits)
def create_runall_script_with_parallelization(experiment_folderpath): fo_names = tb_fs.list_folders(experiment_folderpath, recursive=False, use_relative_paths=True) num_exps = len( [n for n in fo_names if tb_fs.path_last_element(n).startswith('cfg')]) # creating the script. sc_lines = [ '#!/bin/bash', 'if [ "$#" -lt 0 ] && [ "$#" -gt 3 ]; then', ' echo "Usage: run.sh [worker_id num_workers] [--force-rerun]"', ' exit 1', 'fi', 'force_rerun=0', 'if [ $# -eq 0 ] || [ $# -eq 1 ]; then', ' worker_id=0', ' num_workers=1', ' if [ $# -eq 1 ]; then', ' if [ "$1" != "--force-rerun" ]; then', ' echo "Usage: run.sh [worker_id num_workers] [--force-rerun]"', ' exit 1', ' else', ' force_rerun=1', ' fi', ' fi', 'else', ' worker_id=$1', ' num_workers=$2', ' if [ $# -eq 3 ]; then', ' if [ "$3" != "--force-rerun" ]; then', ' echo "Usage: run.sh [worker_id num_workers] [--force-rerun]"', ' exit 1', ' else', ' force_rerun=1', ' fi', ' fi', 'fi', 'if [ $num_workers -le $worker_id ] || [ $worker_id -lt 0 ]; then', ' echo "Invalid call: requires 0 <= worker_id < num_workers."', ' exit 1', 'fi' '', 'num_exps=%d' % num_exps, 'i=0', 'while [ $i -lt $num_exps ]; do', ' if [ $(($i % $num_workers)) -eq $worker_id ]; then', ' if [ ! -f %s ] || [ $force_rerun -eq 1 ]; then' % tb_fs.join_paths([experiment_folderpath, "cfg$i", 'results.json']), ' echo cfg$i', ' %s' % tb_fs.join_paths([experiment_folderpath, "cfg$i", 'run.sh']), ' fi', ' fi', ' i=$(($i + 1))', 'done' ] # creating the run all script. out_filepath = tb_fs.join_paths([experiment_folderpath, 'run.sh']) tb_io.write_textfile(out_filepath, sc_lines, with_newline=True) st = os.stat(out_filepath) exec_bits = stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH os.chmod(out_filepath, st.st_mode | exec_bits)
def create_table_from_experiment(experiment_name, rows, columns, values, abort_if_incomplete_configs=True, use_checkpoints=False, single_row_multitable=False, print_to_terminal=True, max_column_width=10**9, abort_if_different_keys=True): _, xs = explore_experiment('experiments/%s' % experiment_name, use_checkpoints) cfgs = [] res = [] for (c, r) in xs: if r is not None: cfgs.append(c) res.append(r) else: assert not abort_if_incomplete_configs xs = tb_ut.zip_toggle([cfgs, res]) ks = keys_with_variation(cfgs) c = dict(cfgs[0]) for k in ks: c.pop(k) ks.pop('out_folder') print("***%s***" % experiment_name) pprint(ks) print() ds = [summarize_results(tb_ut.merge_dicts(x)) for x in xs] # if the values are with respective if any([ v in values for v in [ 'dev_precision', 'dev_recall', 'dev_fb1', 'test_precision', 'test_recall', 'test_fb1' ] ]): def _extract_fn(fpath): out = subprocess.check_output( ["cat %s | data/conll_2000/conlleval.txt" % fpath], shell=True) res_line = out.split('\n')[1] f1 = float(res_line.split(';')[-1].split(": ")[1]) p, r, fb1 = map(lambda x: 0.01 * float(x.split(': ')[1]), res_line.split('%; '))[1:] return p, r, fb1 # add the test and dev performances to the file. for d in ds: (d['dev_precision'], d['dev_recall'], d['dev_fb1']) = _extract_fn( tb_fs.join_paths([d['out_folder'], 'pred_dev.txt'])) (d['test_precision'], d['test_recall'], d['test_fb1']) = _extract_fn( tb_fs.join_paths([d['out_folder'], 'pred_test.txt'])) # this is the final, last run for conll2000 fpath = tb_fs.join_paths([d['out_folder'], 'final_pred_test.txt']) if tb_fs.file_exists(fpath): (d['final_test_precision'], d['final_test_recall'], d['final_test_fb1']) = _extract_fn(fpath) df = tb_ut.create_dataframe(ds, abort_if_different_keys) # # shorten the names appropriately. df = df.rename(columns={k: k[:max_column_width] for k in rows}) rows = [k[:max_column_width] for k in rows] # determines teh table layout. if not single_row_multitable: ts = [ df.pivot_table(index=rows, columns=columns, values=[v]) for v in values ] else: ts = [ df.pivot_table( index=rows, columns=columns, values=values) #.sort_values('dev_accuracy', ascending=False) ] tb_fs.create_folder('analyses/%s' % experiment_name, abort_if_exists=False) s_c = pformat(c) ss_df = [ t.to_string(float_format=get_float_formatter(2, 100.0)) for t in ts ] lines = [s_c] for s in ss_df: lines.append('') lines.append(s) if print_to_terminal: # print to terminal for s in lines: print(s) # write to file tb_io.write_textfile('analyses/%s/results.txt' % experiment_name, lines) tb_io.write_csvfile(ds, 'analyses/%s/results.csv' % experiment_name, sort_keys=True, abort_if_different_keys=abort_if_different_keys)