def hpc(args, cism_driver, data_dir, test_dict): """ Run commands for high performance computers (HPCs). """ # ---------------------- # Setup all run commands # ---------------------- platform_key = args.platform.split("-")[0] platform_dict = dicts.hpc_dict[platform_key] if 'RUN_CMD' in platform_dict: run_cmd = platform_dict['RUN_CMD'] else: run_cmd = '' perf_large_dict = dicts.perf_dict jobs_dir = os.path.join(data_dir, 'all_jobs') paths.mkdir_p(jobs_dir) # get file name modifier mod_list = paths.file_modifier_list(args) mod_arg = "" if mod_list: mod_arg = " -m " + str.join("-", mod_list) timing_exports = set() timing_commands = [] for case in test_dict: case_split = str.split(case, " ") case_dir = os.path.join(data_dir, str.split(case_split[0], "/")[-1], case_split[-1]) cism_test_dir = os.path.join(args.cism_dir, 'tests', case_split[0]) run_script, mod_dict = test_dict[case] run_args, ignore_args = paths.run_parser.parse_known_args( str.split(run_script, " ") + ['--scale', '0', '-n', '1']) case_run_dir = paths.case_run_directory(case_dir, run_args) print(" Setting up " + case + " tests") test_commands = [ "cd " + cism_test_dir, "export PYTHONPATH=$PYTHONPATH:" + cism_test_dir, "./" + run_script + " -q -e " + cism_driver + " -o " + case_run_dir + mod_arg + " -s -n 1 --hpc " + run_cmd, "exit" ] subprocess.check_call(str.join(" ; ", test_commands), executable='/bin/bash', shell=True) # run performance tests (always do this for hpc systems) if mod_dict: for mod in mod_dict: run_args, ignore_args = paths.run_parser.parse_known_args( str.split(run_script, " ") + mod_dict[mod].split()) case_run_dir = paths.case_run_directory(case_dir, run_args) test_commands = [ "cd " + cism_test_dir, "export PYTHONPATH=$PYTHONPATH:" + cism_test_dir, "./" + run_script + " -q -e " + cism_driver + " -o " + case_run_dir + mod_arg + " " + mod_dict[mod] + " -s --hpc " + run_cmd, "exit" ] subprocess.check_call(str.join(" ; ", test_commands), executable='/bin/bash', shell=True) # get info to setup timing runs. if args.timing and mod_dict: for rnd in range(10): print(" Setting up " + case + " small timing test " + str(rnd)) if mod_arg: timing_mod = mod_arg + '-t' + str(rnd) else: timing_mod = " -m t" + str(rnd) run_args, ignore_args = paths.run_parser.parse_known_args( str.split(run_script, " ") + ['--scale', '0', '-n', '1']) case_run_dir = paths.case_run_directory(case_dir, run_args) timing_exports.add("export PYTHONPATH=$PYTHONPATH:" + cism_test_dir) timing_commands.extend([ "cd " + cism_test_dir, "./" + run_script + " -q -e " + cism_driver + " -o " + case_run_dir + timing_mod + " -s -n 1 --hpc " + run_cmd ]) for mod in mod_dict: run_args, ignore_args = paths.run_parser.parse_known_args( str.split(run_script, " ") + mod_dict[mod].split()) case_run_dir = paths.case_run_directory(case_dir, run_args) timing_commands.extend([ "cd " + cism_test_dir, "./" + run_script + " -q -e " + cism_driver + " -o " + case_run_dir + timing_mod + " " + mod_dict[mod] + " -s --hpc " + run_cmd ]) # ------------------------- # Setup the small batch job # ------------------------- # Get the default and small perf run files small_run_files = paths.recursive_glob(data_dir, "*.run") # get the default and small perf run commands small_run_commands = [] for rf in small_run_files: with open(rf, 'r') as rfo: rfo.next() # skip shebang for command in rfo: small_run_commands.append(command) ## set all aprun commands to background #small_run_commands = [command.replace('\n',' & \n') if 'aprun' in command else command for command in small_run_commands ] # create the default and small perf job script. platform_dict['PBS_N'] = 'small' small_job_name = os.path.join(jobs_dir, platform_key + '_job.small') create_job(args, small_job_name, platform_dict, small_run_commands) # ---------------------------------------- # setup the large performance run commands # ---------------------------------------- large_timing_commands = [] for case in perf_large_dict: case_split = str.split(case, " ") case_dir = os.path.join(data_dir, str.split(case_split[0], "/")[-1], case_split[-1]) cism_test_dir = os.path.join(args.cism_dir, 'tests', case_split[0]) run_script, mod_dict = perf_large_dict[case] if mod_dict: for mod in mod_dict: run_args, ignore_args = paths.run_parser.parse_known_args( str.split(run_script, " ") + mod_dict[mod].split()) case_run_dir = paths.case_run_directory(case_dir, run_args) test_commands = [ "cd " + cism_test_dir, "export PYTHONPATH=$PYTHONPATH:" + cism_test_dir, "./" + run_script + " -q -e " + cism_driver + " -o " + case_run_dir + mod_arg + " " + mod_dict[mod] + " -s --hpc " + run_cmd, "exit" ] subprocess.check_call(str.join(" ; ", test_commands), executable='/bin/bash', shell=True) # get info to setup timing runs. if args.timing and mod_dict: for rnd in range(10): print(" Setting up " + case + " large timing test " + str(rnd)) if mod_arg: timing_mod = mod_arg + '-t' + str(rnd) else: timing_mod = " -m t" + str(rnd) run_args, ignore_args = paths.run_parser.parse_known_args( str.split(run_script, " ") + ['--scale', '0', '-n', '1']) case_run_dir = paths.case_run_directory(case_dir, run_args) timing_exports.add("export PYTHONPATH=$PYTHONPATH:" + cism_test_dir) large_timing_commands.extend([ "cd " + cism_test_dir, "./" + run_script + " -q -e " + cism_driver + " -o " + case_run_dir + timing_mod + " -s -n 1 --hpc " + run_cmd ]) for mod in mod_dict: run_args, ignore_args = paths.run_parser.parse_known_args( str.split(run_script, " ") + mod_dict[mod].split()) case_run_dir = paths.case_run_directory(case_dir, run_args) large_timing_commands.extend([ "cd " + cism_test_dir, "./" + run_script + " -q -e " + cism_driver + " -o " + case_run_dir + timing_mod + " " + mod_dict[mod] + " -s --hpc " + run_cmd ]) # ------------------------- # Setup the large batch job # ------------------------- # Get large perf run files all_run_files = paths.recursive_glob(data_dir, "*.run") large_run_files = list(set(small_run_files) ^ set(all_run_files)) # get the new run files # get the large perf run commands large_run_commands = [] for rf in large_run_files: with open(rf, 'r') as rfo: rfo.next() # skip shebang for command in rfo: large_run_commands.append(command) # set all aprun commands to background #large_run_commands = [command.replace('\n',' & \n') if 'aprun' in command else command for command in large_run_commands ] # create the default job script. platform_dict['PBS_N'] = 'large' platform_dict['PBS_walltime'] = '01:00:00' if platform_key.lower() == 'hopper': platform_dict['RES_NUM'] = str(11 * 24) else: platform_dict['RES_NUM'] = '16' large_job_name = os.path.join(jobs_dir, platform_key + '_job.large') create_job(args, large_job_name, platform_dict, large_run_commands) if args.timing: # ---------------------- # setup small timing job # ---------------------- timing_exports_all = ['# Setup the environment variables \n'] timing_exports_all.extend(timing_exports) timing_exports_all.append("\n") timing_commands.append("exit") subprocess.check_call(str.join(" ; ", timing_commands), executable='/bin/bash', shell=True) all_run_files = paths.recursive_glob(data_dir, "*.run") small_timing_run_files = list( set(small_run_files + large_run_files) ^ set(all_run_files)) # get the new run files small_timing_jobs = set() for rnd in range(10): subset_run_files = [ f for f in small_timing_run_files if '-t' + str(rnd) in f ] # get the small timing run commands small_timing_run_commands = [] for rf in subset_run_files: with open(rf, 'r') as rfo: rfo.next() # skip shebang for command in rfo: small_timing_run_commands.append(command) # set all aprun commands to background #small_timing_run_commands = [command.replace('\n',' & \n') if 'aprun' in command else command for command in small_timing_run_commands ] # create the default job script. platform_dict['PBS_N'] = 'small_timing_' + str(rnd) platform_dict['PBS_walltime'] = '1:00:00' if platform_key.lower() == 'hopper': platform_dict['RES_NUM'] = str(1 * 24) else: platform_dict['RES_NUM'] = '1' small_timing_job_name = os.path.join( jobs_dir, platform_key + '_job.small_timing_' + str(rnd)) create_job(args, small_timing_job_name, platform_dict, small_timing_run_commands) small_timing_jobs.add(small_timing_job_name) # ---------------------- # setup large timing job # ---------------------- large_timing_commands.append("exit") subprocess.check_call(str.join(" ; ", large_timing_commands), executable='/bin/bash', shell=True) all_run_files = paths.recursive_glob(data_dir, "*.run") large_timing_run_files = list( set(small_run_files + large_run_files + small_timing_run_files) ^ set(all_run_files)) # get the new run files large_timing_jobs = set() for rnd in range(10): subset_run_files = [ f for f in large_timing_run_files if '-t' + str(rnd) in f ] # get the large timing run commands large_timing_run_commands = [] for rf in subset_run_files: with open(rf, 'r') as rfo: rfo.next() # skip shebang for command in rfo: large_timing_run_commands.append(command) # set all aprun commands to background #large_timing_run_commands = [command.replace('\n',' & \n') if 'aprun' in command else command for command in large_timing_run_commands ] # create the default job script. platform_dict['PBS_N'] = 'large_timing_' + str(rnd) if platform_key.lower() == 'hopper': platform_dict['PBS_walltime'] = '20:00' platform_dict['RES_NUM'] = str(11 * 24) else: platform_dict['PBS_walltime'] = '00:20:00' platform_dict['RES_NUM'] = '16' large_timing_job_name = os.path.join( jobs_dir, platform_key + '_job.large_timing_' + str(rnd)) create_job(args, large_timing_job_name, platform_dict, large_timing_run_commands) large_timing_jobs.add(large_timing_job_name) # create a script to submit all batch jobs sub_script_script = os.path.join(data_dir, "submit_all_jobs.bash") with open(sub_script_script, 'w') as sub_script_file: sub_script_file.write('#!/usr/bin/env bash \n \n') sub_script_file.write('qsub ' + small_job_name + '\n \n') sub_script_file.write('qsub ' + large_job_name + '\n \n') if args.timing: for sm_jb in small_timing_jobs: sub_script_file.write('qsub ' + sm_jb + '\n \n') for lg_jb in large_timing_jobs: sub_script_file.write('qsub ' + lg_jb + '\n \n') os.chmod(sub_script_script, 0o755) # uses an octal number! if args.timing: # create a script to clean out the timing directory. clean_script = os.path.join(data_dir, "clean_timing.bash") with open(clean_script, 'w') as clean_file: clean_file.write('#!/usr/bin/env bash \n \n') clean_file.write("cd " + data_dir + " \n") clean_file.write( 'find ./ -iname "*-t[0-9]*" -not -iname "*.cism_timing*" -type f -exec rm -f {} \\; \n' ) clean_file.write(" \n") os.chmod(clean_script, 0o755) # uses an octal number! # ----- # DONE! # ----- print("\n Created batch job scripts:") print(" " + small_job_name) if args.timing: for sm_jb in small_timing_jobs: print(" " + sm_jb) print("\n " + large_job_name) if args.timing: for lg_jb in large_timing_jobs: print(" " + lg_jb) print("\n Submit all jobs with this script:") print(" " + sub_script_script) if args.timing: print("\n Created script to clean out timing directory:") print(" " + clean_script) print( "\n Run this script after ALL jobs finish to remove every unneeded file in the timing directories." )
def personal(args, cism_driver, data_dir, test_dict): """ Run commands for personal computers (PCs) """ test_run = {} for case in test_dict: case_dir = str.split(case, " ")[0] case_data_dir = os.path.normpath(data_dir + os.sep + case_dir) run_script, mod_dict = test_dict[case] mod_list = paths.file_modifier_list(args) mod_arg = "" if mod_list: mod_arg = " -m " + str.join("-", mod_list) if args.tmod and mod_dict: # run the default case that also have performance runs for timing case_data_dir += os.sep + "timing" paths.mkdir_p(case_data_dir) # run default test test_commands = [ "cd " + os.path.normpath(args.cism_dir + os.sep + "tests" + os.sep + case_dir), "./" + run_script + " -q -e " + cism_driver + " -o " + case_data_dir + mod_arg + " -n 1", ] if args.tmod: test_commands.extend( [ "cd " + os.path.normpath(case_data_dir), 'find ./ -not -iname "*.results" -not -iname "*.cism_timing*" -type f -exec rm -f {} \\;', "exit", ] ) else: test_commands.append("exit") # print(str.join(" ; ",test_commands)) if not args.tmod or (args.tmod and mod_dict): print(" Spawning " + case + " test default...") test_run[case] = subprocess.Popen(str.join(" ; ", test_commands), executable="/bin/bash", shell=True) # run performance test if specified if args.performance and mod_dict: for mod in mod_dict: print(" Spawning " + case + " test " + mod + "...") test_commands = [ "cd " + os.path.normpath(args.cism_dir + os.sep + "tests" + os.sep + case_dir) + " ", "./" + run_script + " -q -e " + cism_driver + " -o " + case_data_dir + mod_arg + " " + mod_dict[mod], ] if args.tmod: test_commands.extend( [ "cd " + os.path.normpath(case_data_dir), 'find ./ -not -iname "*.results" -not -iname "*.cism_timing*" -type f -exec rm -f {} \\;', "exit", ] ) else: test_commands.append("exit") # print(str.join(" ; ",test_commands)) test_run[case + " " + mod] = subprocess.Popen( str.join(" ; ", test_commands), executable="/bin/bash", shell=True ) print("\n All tests spawned.\n") print(" Waiting for processes to finish.\n") waiting = True waited = 0 while waiting: time.sleep(args.sleep) waited += args.sleep print("\n Total wait time: " + str(waited) + " seconds.") print(" Checking processes:") running = 0 for pros in test_run: if test_run[pros].poll() is None: print(" Still waiting on " + pros) running += 1 if running == 0: waiting = False print(" Processes finished.\n") else: print(" Will check again in " + str(args.sleep) + " seconds...")
def main(): global args # used to modify timing file names args.tmod = None # setup the needed paths args = paths.make_absolute(args) paths.mkdir_p(args.build_dir) cism_driver = os.path.join(args.build_dir, 'cism_driver', 'cism_driver') # always run performance tests on HPC systems. if args.platform.lower().split("-")[0] in dicts.hpc_dict.keys(): isHPC = True args.performance = True else: isHPC = False # always run performance tests if timing runs selected. if args.timing == True: args.performance = True if not args.skip_build: args = paths.cmake(args) print("\nPreparing to build CISM") print("=======================") print("Build options:") print(" Platform: " + args.platform) print("-----------------------") print("cmake directory: " + args.cmake_dir) print("cmake file: " + args.cmake_file) print("\nBuilding CISM") print("=============\n") #TODO: turn on args.library option. #if args.library and args.library.lower() == 'trilinos': # trilinos_string = "CISM_USE_TRILINOS=ON" #else: # trilinos_string = "CISM_USE_TRILINOS=OFF" prep_commands = [ "cd " + args.build_dir, #TODO: turn on args.library option. #"export "+trilinos_string, "source " + os.path.join(args.cmake_dir, args.cmake_file) + " " + args.cism_dir, "make -j " + str(args.j), "exit" ] #print(str.join("; ",prep_commands)) process = subprocess.check_call(str.join("; ", prep_commands), executable='/bin/bash', shell=True) print("\nCISM built!") print("\nSetting up regression tests directory") print("=====================================\n") data_dir = paths.mkdir_test(args, test_dict) print(" Copying CMake cache into regression test directory.") cache_name = "CMakeCache.txt" cache_file = os.path.join(args.build_dir, cache_name) cache_new = os.path.join(data_dir, cache_name) subprocess.check_call("cp " + cache_file + " " + cache_new, shell=True) # check for GPTL if timing or performance is on args.GPTLflag = None if args.performance: with open(cache_new, 'r') as cf: for line in cf: if 'CISM_USE_GPTL_INSTRUMENTATION' in line: args.GPTLflag = line.strip().split('=')[-1] break if args.GPTLflag == "OFF": print("\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n") print("WARNING: CISM was not build with GPTL\n") print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n") print("Performance runs cannot be analyzed without GPTL. ") print( "Either rebuild CISM with GPTL or rerun BATS without the performance or timing option." ) print("\nExiting...") sys.exit(1) elif args.GPTLflag != "ON": print("\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n") print("WARNING: Could not determine if CISM") print("was build with GPTL or not.\n") print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n") print( "Performance runs cannot be analyzed via LIVVkit without GPTL. " ) print( "Either rebuild CISM with GPTL or rerun BATS without the performance or timing option." ) print("\nExiting...") sys.exit(1) if isHPC: print("\nPreparing HPC batch jobs") print("========================\n") runnit.hpc(args, cism_driver, data_dir, test_dict) print("\nDone! You can now submit the job scripts.") print("=========================================") else: print("\nRunning regression tests") print("========================\n") runnit.personal(args, cism_driver, data_dir, test_dict) if args.timing: print("\nRe-running regression tests for timing data.") print("This is going to take a while. A long while.") print("============================================\n") for rnd in range(10): print("\nTiming round: " + str(rnd + 1) + " of 10") args.tmod = 't' + str(rnd) runnit.personal(args, cism_driver, data_dir, test_dict) # turn timing modifier off now that we're done. args.tmod = None # clean unecessary timing files subprocess.check_call( 'cd ' + data_dir + ' ; find ./ -iname "*-t[0-9]*" -not -iname "*.results" -not -iname "*.cism_timing*" -type f -exec rm -f {} \\; \n', shell=True) print("\nAll regression tests finished.") print("==============================")
def hpc(args, cism_driver, data_dir, test_dict): """ Run commands for high performance computers (HPCs). """ # ---------------------- # Setup all run commands # ---------------------- platform_dict = dicts.hpc_dict[args.platform] perf_large_dict = dicts.perf_dict jobs_dir = data_dir + os.sep + "jobs" paths.mkdir_p(jobs_dir) # get file name modifier mod_list = paths.file_modifier_list(args) mod_arg = "" if mod_list: mod_arg = " -m " + str.join("-", mod_list) timing_exports = set() timing_dirs = set() timing_commands = [] for case in test_dict: case_dir = str.split(case, " ")[0] case_data_dir = os.path.normpath(data_dir + os.sep + case_dir) cism_test_dir = os.path.normpath(args.cism_dir + os.sep + "tests" + os.sep + case_dir) run_script, mod_dict = test_dict[case] print(" Setting up " + case + " tests") test_commands = [ "cd " + cism_test_dir, "export PYTHONPATH=$PYTHONPATH:" + cism_test_dir, "./" + run_script + " -q -e " + cism_driver + " -o " + case_data_dir + mod_arg + " -s -n 1 --hpc", "exit", ] subprocess.check_call(str.join(" ; ", test_commands), executable="/bin/bash", shell=True) # run performance tests (always do this for hpc systems) if mod_dict: for mod in mod_dict: test_commands = [ "cd " + cism_test_dir, "export PYTHONPATH=$PYTHONPATH:" + cism_test_dir, "./" + run_script + " -q -e " + cism_driver + " -o " + case_data_dir + mod_arg + " " + mod_dict[mod] + " -s --hpc", "exit", ] subprocess.check_call(str.join(" ; ", test_commands), executable="/bin/bash", shell=True) # get info to setup timing runs. if args.timing and mod_dict: timing_dir = case_data_dir + os.sep + "timing" for rnd in range(10): print(" Setting up " + case + " small timing test " + str(rnd)) if mod_arg: timing_mod = mod_arg + "-t" + str(rnd) else: timing_mod = " -m t" + str(rnd) timing_dirs.add(timing_dir) timing_exports.add("export PYTHONPATH=$PYTHONPATH:" + cism_test_dir) timing_commands.extend( [ "cd " + cism_test_dir, "./" + run_script + " -q -e " + cism_driver + " -o " + timing_dir + timing_mod + " -s -n 1 --hpc", ] ) for mod in mod_dict: timing_commands.extend( [ "cd " + cism_test_dir, "./" + run_script + " -q -e " + cism_driver + " -o " + timing_dir + timing_mod + " " + mod_dict[mod] + " -s --hpc", ] ) # ------------------------- # Setup the small batch job # ------------------------- # Get the default and small perf run files small_run_files = paths.recursive_glob(data_dir, "*.run") # get the default and small perf run commands small_run_commands = [] for rf in small_run_files: rfo = open(rf, "r") rfo.next() # skip shebang for command in rfo: small_run_commands.append(command) ## set all aprun commands to background # small_run_commands = [command.replace('\n',' & \n') if 'aprun' in command else command for command in small_run_commands ] # create the default and small perf job script. platform_dict["PBS_N"] = "small" small_job_name = jobs_dir + os.sep + args.platform + "_job.small" create_job(args, small_job_name, platform_dict, small_run_commands) # ---------------------------------------- # setup the large performance run commands # ---------------------------------------- large_timing_commands = [] for case in perf_large_dict: case_dir = str.split(case, " ")[0] case_data_dir = os.path.normpath(data_dir + os.sep + case_dir) cism_test_dir = os.path.normpath(args.cism_dir + os.sep + "tests" + os.sep + case_dir) run_script, mod_dict = perf_large_dict[case] if mod_dict: for mod in mod_dict: test_commands = [ "cd " + cism_test_dir, "export PYTHONPATH=$PYTHONPATH:" + cism_test_dir, "./" + run_script + " -q -e " + cism_driver + " -o " + case_data_dir + mod_arg + " " + mod_dict[mod] + " -s --hpc", "exit", ] subprocess.check_call(str.join(" ; ", test_commands), executable="/bin/bash", shell=True) # get info to setup timing runs. if args.timing and mod_dict: timing_dir = case_data_dir + os.sep + "timing" for rnd in range(10): print(" Setting up " + case + " large timing test " + str(rnd)) if mod_arg: timing_mod = mod_arg + "-t" + str(rnd) else: timing_mod = " -m t" + str(rnd) timing_dirs.add(timing_dir) timing_exports.add("export PYTHONPATH=$PYTHONPATH:" + cism_test_dir) large_timing_commands.extend( [ "cd " + cism_test_dir, "./" + run_script + " -q -e " + cism_driver + " -o " + timing_dir + timing_mod + " -s -n 1 --hpc", ] ) for mod in mod_dict: large_timing_commands.extend( [ "cd " + cism_test_dir, "./" + run_script + " -q -e " + cism_driver + " -o " + timing_dir + timing_mod + " " + mod_dict[mod] + " -s --hpc", ] ) # ------------------------- # Setup the large batch job # ------------------------- # Get large perf run files all_run_files = paths.recursive_glob(data_dir, "*.run") large_run_files = list(set(small_run_files) ^ set(all_run_files)) # get the new run files # get the large perf run commands large_run_commands = [] for rf in large_run_files: rfo = open(rf, "r") rfo.next() # skip shebang for command in rfo: large_run_commands.append(command) # set all aprun commands to background # large_run_commands = [command.replace('\n',' & \n') if 'aprun' in command else command for command in large_run_commands ] # create the default job script. platform_dict["PBS_N"] = "large" platform_dict["PBS_walltime"] = "01:00:00" if args.platform.lower() == "hopper": platform_dict["RES_NUM"] = str(11 * 24) else: platform_dict["RES_NUM"] = "16" large_job_name = jobs_dir + os.sep + args.platform + "_job.large" create_job(args, large_job_name, platform_dict, large_run_commands) if args.timing: # ---------------------- # setup small timing job # ---------------------- timing_exports_all = ["# Setup the environment variables \n"] timing_exports_all.extend(timing_exports) timing_exports_all.append("\n") timing_commands.append("exit") subprocess.check_call(str.join(" ; ", timing_commands), executable="/bin/bash", shell=True) all_run_files = paths.recursive_glob(data_dir, "*.run") small_timing_run_files = list( set(small_run_files + large_run_files) ^ set(all_run_files) ) # get the new run files small_timing_jobs = set() for rnd in range(10): subset_run_files = [f for f in small_timing_run_files if "-t" + str(rnd) in f] # get the small timing run commands small_timing_run_commands = [] for rf in subset_run_files: rfo = open(rf, "r") rfo.next() # skip shebang for command in rfo: small_timing_run_commands.append(command) # set all aprun commands to background # small_timing_run_commands = [command.replace('\n',' & \n') if 'aprun' in command else command for command in small_timing_run_commands ] # create the default job script. platform_dict["PBS_N"] = "small_timing_" + str(rnd) platform_dict["PBS_walltime"] = "1:00:00" if args.platform.lower() == "hopper": platform_dict["RES_NUM"] = str(1 * 24) else: platform_dict["RES_NUM"] = "1" small_timing_job_name = jobs_dir + os.sep + args.platform + "_job.small_timing_" + str(rnd) create_job(args, small_timing_job_name, platform_dict, small_timing_run_commands) small_timing_jobs.add(small_timing_job_name) # ---------------------- # setup large timing job # ---------------------- large_timing_commands.append("exit") subprocess.check_call(str.join(" ; ", large_timing_commands), executable="/bin/bash", shell=True) all_run_files = paths.recursive_glob(data_dir, "*.run") large_timing_run_files = list( set(small_run_files + large_run_files + small_timing_run_files) ^ set(all_run_files) ) # get the new run files large_timing_jobs = set() for rnd in range(10): subset_run_files = [f for f in large_timing_run_files if "-t" + str(rnd) in f] # get the large timing run commands large_timing_run_commands = [] for rf in subset_run_files: rfo = open(rf, "r") rfo.next() # skip shebang for command in rfo: large_timing_run_commands.append(command) # set all aprun commands to background # large_timing_run_commands = [command.replace('\n',' & \n') if 'aprun' in command else command for command in large_timing_run_commands ] # create the default job script. platform_dict["PBS_N"] = "large_timing_" + str(rnd) if args.platform.lower() == "hopper": platform_dict["PBS_walltime"] = "20:00" platform_dict["RES_NUM"] = str(11 * 24) else: platform_dict["PBS_walltime"] = "00:20:00" platform_dict["RES_NUM"] = "16" large_timing_job_name = jobs_dir + os.sep + args.platform + "_job.large_timing_" + str(rnd) create_job(args, large_timing_job_name, platform_dict, large_timing_run_commands) large_timing_jobs.add(large_timing_job_name) # create a script to submit all batch jobs sub_script_script = data_dir + os.sep + "submit_all_jobs.bash" sub_script_file = open(sub_script_script, "w") sub_script_file.write("#!/bin/bash \n \n") sub_script_file.write("qsub " + small_job_name + "\n \n") sub_script_file.write("qsub " + large_job_name + "\n \n") if args.timing: for sm_jb in small_timing_jobs: sub_script_file.write("qsub " + sm_jb + "\n \n") for lg_jb in large_timing_jobs: sub_script_file.write("qsub " + lg_jb + "\n \n") sub_script_file.close() os.chmod(sub_script_script, 0o755) # uses an octal number! # create a script to clean out the timing directory. clean_script = data_dir + os.sep + "clean_timing.bash" clean_file = open(clean_script, "w") clean_file.write("#!/bin/bash \n") for dr in timing_dirs: clean_file.write("cd " + dr + " \n") clean_file.write( 'find ./ -not -iname "*.results" -not -iname "*.cism_timing*" -type f -exec rm -f {} \\; \n' ) clean_file.write(" \n") clean_file.close() os.chmod(clean_script, 0o755) # uses an octal number! # ----- # DONE! # ----- print("\n Created batch job scripts:") print(" " + small_job_name) if args.timing: for sm_jb in small_timing_jobs: print(" " + sm_jb) print("\n " + large_job_name) if args.timing: for lg_jb in large_timing_jobs: print(" " + lg_jb) print("\n Submit all jobs with this script:") print(" " + sub_script_script) print("\n Created script to clean out timing directory:") print(" " + clean_script) print("\n Run this script after ALL jobs finish to remove every unneeded file in the timing directories.")
def main(): global args # used to modify timing file names args.tmod = None # setup the needed paths args = paths.make_absolute(args) cmake_dir, cmake_file = paths.cmake(args) paths.mkdir_p(args.build_dir) cism_driver = args.build_dir+os.sep+'cism_driver'+os.sep+'cism_driver' # always run performance tests on HPC systems. if args.platform.lower() in dicts.hpc_dict.keys(): isHPC = True args.performance = True else: isHPC = False # always run performance tests if timing runs selected. if args.timing == True: args.performance = True if not args.skip_build: print("\nPreparing to build CISM") print( "=======================") print("Build options:") print(" Platform: "+args.platform) print(" Compiler: "+args.compiler) print("-----------------------") print("cmake directory: "+cmake_dir) print("cmake file: "+cmake_file) print("\nBuilding CISM") print( "=============\n") #TODO: turn on args.library option. #if args.library and args.library.lower() == 'trilinos': # trilinos_string = "CISM_USE_TRILINOS=ON" #else: # trilinos_string = "CISM_USE_TRILINOS=OFF" prep_commands = ["cd "+args.build_dir, #TODO: turn on args.library option. #"export "+trilinos_string, "source "+cmake_dir+os.sep+cmake_file+" "+args.cism_dir, "make -j "+str(args.j), "exit"] #print(str.join("; ",prep_commands)) process = subprocess.check_call(str.join("; ",prep_commands),executable='/bin/bash',shell=True) print("\nCISM built!") print("\nSetting up regression tests directory") print( "=====================================\n") data_dir = paths.mkdir_test(args, test_dict) print( " Copying CMake cache into regression test directory.") mod_list = paths.file_modifier_list(args) cache_mod = "" if mod_list: cache_mod = "-"+str.join("-", mod_list) cache_root = "CMakeCache" cache_ext = ".txt" cache_file = args.build_dir+os.sep+cache_root+cache_ext cache_new = data_dir+os.sep+cache_root+cache_mod+cache_ext subprocess.check_call("cp "+cache_file+" "+cache_new, shell=True) if isHPC: print("\nPreparing HPC batch jobs") print( "========================\n") runnit.hpc(args, cism_driver, data_dir, test_dict) print("\nDone! You can now submit the job scripts.") print( "=========================================") else: print("\nRunning regression tests") print( "========================\n") runnit.personal(args, cism_driver, data_dir, test_dict) if args.timing: print("\nRe-running regression tests for timing data.") print( "This is going to take a while. A long while.") print( "============================================\n") for rnd in range(10): print("\nTiming round: "+str(rnd+1)+" of 10") args.tmod = 't'+str(rnd) runnit.personal(args, cism_driver, data_dir, test_dict) # turn timing modifier off now that we're done. args.tmod = None print("\nAll regression tests finished.") print( "==============================")