# Setup parallelization p.setup_parallelization([pspace, ptime]) if verbose: pspace.print() ptime.print() p.parallelization.print() if len(tsm) > 4: s = tsm[4] p.load_from_dict(tsm[4]) p.parallelization.max_wallclock_seconds = estimateWallclockTime( p) p.gen_jobscript_directory('job_benchref_' + p.getUniqueID()) p.reference_job_unique_id = p.job_unique_id # # Create job scripts # for tsm in ts_methods[1:]: p.runtime.timestepping_method = tsm[0] p.runtime.timestepping_order = tsm[1] p.runtime.timestepping_order2 = tsm[2] if len(tsm) > 4: s = tsm[4] p.runtime.load_from_dict(tsm[4])
def jobscript_get_header(jg : JobGeneration): """ These headers typically contain the information on e.g. Job exection, number of compute nodes, etc. Returns ------- string multiline text for scripts """ job_id = jg.getUniqueID() p = jg.parallelization time_str = p.get_max_wallclock_seconds_hh_mm_ss() # Available queues: # premium (only use this in extreme cases) # regular # economy queue = 'economy' # Use regular queue if we need more than 32 nodes # Otherwise, the job doesn't seem to be scheduled if p.num_nodes >= 32: queue = 'premium' elif p.num_nodes >= 16: queue = 'regular' # # See https://www.lrz.de/services/compute/linux-cluster/batch_parallel/example_jobs/ # content = """#! /bin/bash # ## project code #PBS -A NCIS0002 #PBS -q """+queue+""" ## wall-clock time (hrs:mins:secs) #PBS -l walltime="""+time_str+""" ## select: number of nodes ## ncpus: number of CPUs per node ## mpiprocs: number of ranks per node #PBS -l select="""+str(p.num_nodes)+""":ncpus="""+str(p.num_cores_per_node)+""":mpiprocs="""+str(p.num_ranks_per_node)+""":ompthreads="""+str(p.num_threads_per_rank)+"\n" #"default": 2301000 #"turbo": 2301000 #"rated": 2300000 #"slow": 1200000 if p.force_turbo_off: content += "#PBS -l select=cpufreq=2300000\n" content += """# #PBS -N """+job_id[0:100]+""" #PBS -o """+jg.p_job_stdout_filepath+""" #PBS -e """+jg.p_job_stderr_filepath+""" #source /etc/profile.d/modules.sh #module load openmpi """+("module load mkl" if jg.compile.mkl==True or jg.compile.mkl=='enable' else "")+""" """+p_gen_script_info(jg)+""" echo echo "hostname" hostname echo echo echo "lscpu -e" lscpu -e echo echo echo "CPU Frequencies (uniquely reduced):" cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_cur_freq | sort -u echo """ if jg.compile.threading != 'off': content += """ export OMP_NUM_THREADS="""+str(p.num_threads_per_rank)+""" export OMP_DISPLAY_ENV=VERBOSE """ if p.core_oversubscription: raise Exception("Not supported with this script!") else: if p.core_affinity != None: content += "\necho \"Affnity: "+str(p.core_affinity)+"\"\n" if p.core_affinity == 'compact': content += "source $MULE_ROOT/platforms/bin/setup_omp_places.sh nooversubscription close\n" #content += "\nexport OMP_PROC_BIND=close\n" elif p.core_affinity == 'scatter': raise Exception("Affinity '"+str(p.core_affinity)+"' not supported") content += "\nexport OMP_PROC_BIND=spread\n" else: raise Exception("Affinity '"+str(p.core_affinity)+"' not supported") content += "\n" return content
def p_gen_script_info(jg : JobGeneration): return """# # Generating function: """+_whoami(2)+""" # Platform: """+get_platform_id()+""" # Job id: """+jg.getUniqueID()+"""
pspace = JobParallelizationDimOptions('space') pspace.num_cores_per_rank = 1 pspace.num_threads_per_rank = params_pspace_num_cores_per_rank[-1] pspace.num_ranks = 1 # Setup parallelization p.setup_parallelization([pspace]) if verbose: pspace.print() p.parallelization.print() p.parallelization.max_wallclock_seconds = estimateWallclockTime(p) p.gen_jobscript_directory('job_benchref_' + p.getUniqueID()) # Use this as a reference job p.reference_job_unique_id = p.job_unique_id for tsm in ts_methods[1:]: p.runtime.timestepping_method = tsm[0] p.runtime.timestepping_order = tsm[1] p.runtime.timestepping_order2 = tsm[2] if len(tsm) > 4: s = tsm[4] p.runtime.load_from_dict(tsm[4]) tsm_name = tsm[0] if 'ln_erk' in tsm_name:
basename = jg.runtime.benchmark_name more_samples = True if more_samples: alpha_min = 1.0 alpha_max = 41.0 alpha_samples = 160 else: alpha_min = 1.0 alpha_max = 20.0 alpha_samples = 19 n_list = [5, 3, 7] m_list = [4, 1, 3] experiment = mexp.modes_TC2(n_list, m_list, alpha_min, alpha_max, alpha_samples) exp_filename = "mode_setup_n" + '-'.join(map( str, n_list)) + "_m" + '-'.join(map(str, m_list)) + ".pckl" codes = experiment.codes experiment.save_file(exp_filename) #setup up mode initializations for mode_code in codes: jg.runtime.benchmark_name = basename + "_" + mode_code jg.gen_jobscript_directory('job_bench_' + jg.getUniqueID()) # Write compile script jg.write_compilecommands("./compile_platform_" + jg.platforms.platform_id + ".sh")
jg.setup_parallelization([pspace, ptime]) if verbose: pspace.print() ptime.print() jg.parallelization.print() if len(tsm) > 4: s = tsm[4] jg.load_from_dict(tsm[4]) jg.reference_job = True jg.parallelization.max_wallclock_seconds = estimateWallclockTime( jg) jg.gen_jobscript_directory('job_benchref_' + jg.getUniqueID()) jg.reference_job = False jg.reference_job_unique_id = jg.job_unique_id # # Create job scripts # for tsm in ts_methods[1:]: jg.runtime.timestepping_method = tsm[0] jg.runtime.timestepping_order = tsm[1] jg.runtime.timestepping_order2 = tsm[2] if len(tsm) > 4: s = tsm[4]