def submit_to_queue(self, sub_job: Submission_job) -> int: """ submitt a local job """ orig_dir = os.getcwd() if isinstance(sub_job.submit_from_dir, str) and os.path.isdir( sub_job.submit_from_dir): os.chdir(sub_job.submit_from_dir) command_file_path = sub_job.submit_from_dir + "/job_" + str( sub_job.jobName) + ".sh" else: command_file_path = "./job_" + str(sub_job.jobName) + ".sh" sub_job.command = sub_job.command.strip() # remove trailing linebreaks if self._nomp >= 1: command = "export OMP_NUM_THREADS=" + str( self._nomp) + ";\n " + sub_job.command + "" else: command = sub_job.command if sub_job.sumbit_from_file: command_file = open(command_file_path, "w") command_file.write("#!/bin/bash\n") command_file.write(command.replace("&& ", ";\n") + ";\n") command_file.close() command = command_file_path bash.execute("chmod +x " + command_file_path, env=self.environment) # finalize string if self.verbose: print("Submission Command: \t", " ".join(command)) if self.submission: try: process = bash.execute(command=command, catch_STD=True, env=self.environment) std_out_buff = map(str, process.stdout.readlines()) std_out = "\t" + "\n\t".join(std_out_buff) # next sopt_job is queued with id: if self.verbose: print("STDOUT: \n\t" + std_out + "\nEND\n") if os.path.exists(orig_dir): os.chdir(orig_dir) return 0 except ChildProcessError: try: print(process) except ChildProcessError: pass raise ChildProcessError("command failed: \n" + str(command)) else: print("Did not submit: ", command) return -1
def chain_submission( simSystem: Gromos_System, out_dir_path: str, out_prefix: str, chain_job_repetitions: int, worker_script: str, job_submission_system: _SubmissionSystem, jobname: str, run_analysis_script_every_x_runs: int = 0, in_analysis_script_path: str = "", start_run_index: int = 1, prefix_command: str = "", previous_job_ID: int = None, work_dir: str = None, initialize_first_run: bool = True, reinitialize_every_run: bool = False, verbose: bool = False, verbose_lvl: int = 1, ) -> Tuple[int, str, Gromos_System]: """ this function submits a chain of simulation steps to the queuing system and does the file managment. Parameters ---------- simSystem : Gromos_System simulation system out_dir_path : str out directory path out_prefix : str out prefix for simulation files chain_job_repetitions : int how often, should the simulation be repeated (in continuation) worker_script : str worker, that should be submitted. This script will be executed at each scheduled job. job_submission_system : _SubmissionSystem submission system, what type of submission? jobname : str name of the simulation job run_analysis_script_every_x_runs : int, optional run analysis in between - (careful will not be overwritten, make sure final analysis is correct.), by default 0 in_analysis_script_path : str, optional analysis script for simulation, that should be applied (will at least be applied after the full simulation chain.), by default "" start_run_index : int, optional start index of the job chain., by default 1 prefix_command : str, optional any bash prefix commands, before submitting?, by default "" previous_job_ID : int, optional ID of the prefious job, to be chained to. , by default None work_dir : str, optional dir to wich the work in progress will be written. if None a tmp-srcatch dir will be used with LSF!, by default None initialize_first_run : bool, optional should the velocities for the first run be initialized?, by default True reinitialize_every_run : bool, optional should in every run, the velocities be reinitialized?, by default False verbose : bool, optional more bla bla, by default False verbose_lvl : int, optional nicely define ammount of bla bla, by default 1 Returns ------- Tuple[int, str, Gromos_System] Tuple[previous_job_ID, tmp_jobname, simSystem] will return the last job_ID, the last tmp_jobname and the final gromosSystem. Raises ------ ValueError if submission fails. This can habe various reasons, always check also the present files! (*omd etc.) """ if verbose: print("\nChainSubmission - " + out_prefix + "\n" + "=" * 30 + "\n") if (verbose) and verbose_lvl >= 2: print("start_run_index " + str(start_run_index)) if (verbose) and verbose_lvl >= 2: print("job reptitions " + str(chain_job_repetitions)) if job_submission_system is not LOCAL: simSystem._future_promise = True ana_id = None job_submission_system.job_duration = job_submission_system.job_duration for runID in range(start_run_index, chain_job_repetitions + 1): if verbose: print("\n submit " + jobname + "_" + str(runID) + "\n" + spacer3) tmp_outprefix = out_prefix + "_" + str(runID) tmp_jobname = jobname + "_" + str(runID) tmp_outdir = out_dir_path + "/" + tmp_outprefix tmp_out_cnf = tmp_outdir + "/" + tmp_outprefix + ".cnf" # Checks if run should be skipped! do_skip, previous_job_ID = do_skip_job( tmp_out_cnf=tmp_out_cnf, simSystem=simSystem, tmp_jobname=tmp_jobname, job_submission_system=job_submission_system, previous_job=previous_job_ID, verbose=verbose, ) if not do_skip: bash.make_folder(tmp_outdir) # build COMMANDS: if len(prefix_command) > 1: prefix_command += " && " # We will write the arguments to the python script in a bash array # to make it simpler to read in our input files. md_args = "md_args=(\n" md_args += "-out_dir " + tmp_outdir + "\n" md_args += "-in_cnf_path " + simSystem.cnf.path + "\n" md_args += "-in_imd_path " + simSystem.imd.path + "\n" md_args += "-in_top_path " + simSystem.top.path + "\n" md_args += "-runID " + str(runID) + "\n" # OPTIONAL ARGS if simSystem.disres is not None: md_args += "-in_disres_path " + simSystem.disres.path + "\n" if simSystem.ptp is not None: md_args += "-in_perttopo_path " + simSystem.ptp.path + "\n" if simSystem.refpos is not None: md_args += "-in_refpos_path " + simSystem.refpos.path + "\n" if simSystem.qmmm is not None: md_args += "-in_qmmm_path " + simSystem.qmmm.path + " " if simSystem.posres is not None: md_args += "-in_posres_path " + simSystem.posres.path + "\n" md_args += "-nmpi " + str(job_submission_system.nmpi) + "\n" md_args += "-nomp " + str(job_submission_system.nomp) + "\n" md_args += "-initialize_first_run " + str( initialize_first_run) + "\n" md_args += "-reinitialize_every_run " + str( reinitialize_every_run) + "\n" md_args += "-gromosXX_bin_dir " + str( simSystem.gromosXX.bin) + "\n" md_args += "-gromosXX_check_binary_paths " + str( simSystem.gromosXX._check_binary_paths) + "\n" if work_dir is not None: md_args += "-work_dir " + str(work_dir) + "\n" if hasattr(simSystem.imd, "WRITETRAJ"): if simSystem.imd.WRITETRAJ.NTWX > 0: md_args += "-out_trc " + str(True) + "\n" if simSystem.imd.WRITETRAJ.NTWE > 0: md_args += "-out_tre " + str(True) + "\n" if simSystem.imd.WRITETRAJ.NTWV > 0: md_args += "-out_trv " + str(True) + "\n" if simSystem.imd.WRITETRAJ.NTWF > 0: md_args += "-out_trf " + str(True) + "\n" if simSystem.imd.WRITETRAJ.NTWG > 0: md_args += "-out_trg " + str(True) + "\n" md_args += "-zip_trajectories " + str( job_submission_system.zip_trajectories) + "\n" md_args += ")\n" # closing the bash array which stores all arguments. # add zip option here # MAIN commands md_script_command = prefix_command + "\n\n" + md_args + "\n" md_script_command += "python3 " + worker_script + ' "${md_args[@]}" \n' if verbose: print("PREVIOUS ID: ", previous_job_ID) if verbose_lvl >= 2: print("COMMAND: ", md_script_command) # SCHEDULE THE COMMANDS try: if verbose: print("\tSIMULATION") os.chdir(tmp_outdir) sub_job = Submission_job( command=md_script_command, jobName=tmp_jobname, submit_from_dir=tmp_outdir, queue_after_jobID=previous_job_ID, outLog=tmp_outdir + "/" + out_prefix + "_md.out", errLog=tmp_outdir + "/" + out_prefix + "_md.err", sumbit_from_file=True, ) previous_job_ID = job_submission_system.submit_to_queue( sub_job) if verbose: print("SIMULATION ID: ", previous_job_ID) except ValueError as err: # job already in the queue raise ValueError("ERROR during submission of main job " + str(tmp_jobname) + ":\n" + "\n".join(err.args)) # OPTIONAL schedule - analysis inbetween. if (runID > 1 and run_analysis_script_every_x_runs != 0 and runID % run_analysis_script_every_x_runs == 0 and runID < chain_job_repetitions): if (verbose) and verbose_lvl >= 2: print("\tINBETWEEN ANALYSIS") sub_job = Submission_job( command=in_analysis_script_path, jobName=jobname + "_intermediate_ana_run_" + str(runID), outLog=tmp_outdir + "/" + out_prefix + "_inbetweenAna.out", errLog=tmp_outdir + "/" + out_prefix + "_inbetweenAna.err", queue_after_jobID=previous_job_ID, ) try: ana_id = job_submission_system.submit_to_queue(sub_job) if (verbose) and verbose_lvl >= 2: print("\n") except ValueError as err: # job already in the queue print("ERROR during submission of analysis command of " + sub_job.jobName + ":\n") print("\n".join(err.args)) else: if (verbose) and verbose_lvl >= 2: print("Did not submit!") if (verbose) and verbose_lvl >= 2: print("\n") if (verbose) and verbose_lvl >= 2: print("job_postprocess ") prefix_command = "" # Resulting cnf is provided to use it in further approaches. simSystem.cnf = Cnf(tmp_out_cnf, _future_file=True) if ana_id is not None: previous_job_ID = ana_id return previous_job_ID, tmp_jobname, simSystem
def submit_to_queue(self, sub_job: Submission_job) -> int: """ This function submits the given command to the LSF QUEUE Parameters ---------- submission_job : Submission_job the job to be submitted ------- """ # job_properties:Job_properties=None, <- currently not usd orig_dir = os.getcwd() # generate submission_string: submission_string = "" # QUEUE checking to not double submit if self._block_double_submission and self._submission: if self.verbose: print("check queue") ids = list(self.search_queue_for_jobname(sub_job.jobName).index) if len(ids) > 0: if self.verbose: print("\tSKIP - FOUND JOB: \t\t" + "\n\t\t".join(map(str, ids)) + "\n\t\t with jobname: " + sub_job.jobName) return ids[0] if isinstance(sub_job.submit_from_dir, str) and os.path.isdir( sub_job.submit_from_dir): os.chdir(sub_job.submit_from_dir) command_file_path = sub_job.submit_from_dir + "/job_" + str( sub_job.jobName) + ".sh" else: command_file_path = "./job_" + str(sub_job.jobName) + ".sh" submission_string += "bsub " submission_string += " -J" + sub_job.jobName + " " submission_string += " -W " + str(self._job_duration) + " " if not isinstance(sub_job.post_execution_command, type(None)): submission_string += '-Ep "' + sub_job.post_execution_command + '" ' if not isinstance(sub_job.outLog, str) and not isinstance( sub_job.errLog, str): outLog = sub_job.jobName + ".out" submission_string += " -o " + outLog elif isinstance(sub_job.outLog, str): submission_string += " -o " + sub_job.outLog if isinstance(sub_job.errLog, str): submission_string += " -e " + sub_job.errLog nCPU = self._nmpi * self._nomp submission_string += " -n " + str(nCPU) + " " # TODO: add GPU support # add_string = "" # add_string= "-R \"select[model==XeonGold_5118 || model==XeonGold_6150 || model==XeonE3_1585Lv5 || model==XeonE3_1284Lv4 || model==XeonE7_8867v3 || model == XeonGold_6140 || model==XeonGold_6150 ]\"" if isinstance(self._max_storage, int): submission_string += " -R rusage[mem=" + str( self._max_storage) + "] " if isinstance(sub_job.queue_after_jobID, (int, str)) and (sub_job.queue_after_jobID != 0 or sub_job.queue_after_jobID != "0"): submission_string += ' -w "' + self._chain_prefix + "(" + str( sub_job.queue_after_jobID) + ')" ' if self._begin_mail: submission_string += " -B " if self._end_mail: submission_string += " -N " sub_job.command = sub_job.command.strip( ) # remove trailing line breaks if self._nomp >= 1: command = "export OMP_NUM_THREADS=" + str( self._nomp) + ";\n " + sub_job.command + " " else: command = "\n " + sub_job.command + "" if sub_job.sumbit_from_file: if self.verbose: print("writing tmp-submission-file to: ", command_file_path) command_file = open(command_file_path, "w") command_file.write("#!/bin/bash\n") command_file.write(command + ";\n") command_file.close() command = command_file_path bash.execute("chmod +x " + command_file_path, env=self._environment) # finalize string submission_string = list( map(lambda x: x.strip(), submission_string.split())) + [command] if self.verbose: print("Submission Command: \t", " ".join(submission_string)) if self._submission and not self._dummy: try: out_process = bash.execute(command=submission_string, catch_STD=True, env=self._environment) std_out = "\n".join(map(str, out_process.stdout.readlines())) # next sopt_job is queued with id: id_start = std_out.find("<") id_end = std_out.find(">") job_id = int(str(std_out[id_start + 1:id_end]).strip()) if self.verbose: print("process returned id: " + str(job_id)) if str(job_id) == "" and job_id.isalnum(): raise ValueError("Did not get at job ID!") except Exception as e: raise ChildProcessError("could not submit this command: \n" + str(submission_string) + "\n\n" + str(e)) else: job_id = -1 os.chdir(orig_dir) sub_job.jobID = job_id return job_id
def submit_jobAarray_to_queue(self, sub_job: Submission_job) -> int: """ This functioncan be used for submission of a job array. The ammount of jobs is determined by the difference: end_job-start_job An array index variable is defined called ${JOBID} inside the command representing job x in the array. Parameters ---------- sub_job: Submission_job the job to be submitted Returns ------- int return job ID """ # QUEUE checking to not double submit if self._submission and self._block_double_submission: if self.verbose: print("check queue") ids = self.search_queue_for_jobname(sub_job.jobName) if len(ids) > 0: if self.verbose: print("\tSKIP - FOUND JOB: \t\t" + "\n\t\t".join(map(str, ids)) + "\n\t\t with jobname: " + sub_job.jobName) return ids[0] # generate submission_string: submission_string = "" if isinstance(sub_job.submit_from_dir, str) and os.path.isdir( sub_job.submit_from_dir): submission_string += "cd " + sub_job.submit_from_dir + " && " if sub_job.jobLim is None: jobLim = sub_job.end_job - sub_job.start_job jobName = str(sub_job.jobName) + "[" + str( sub_job.start_job) + "-" + str( sub_job.end_job) + "]%" + str(jobLim) submission_string += 'bsub -J " ' + jobName + ' " -W "' + str( self._job_duration) + '" ' if isinstance(sub_job.jobGroup, str): submission_string += " -g " + sub_job.jobGroup + " " if not isinstance(sub_job.outLog, str) and not isinstance( sub_job.errLog, str): outLog = jobName + ".out" submission_string += " -oo " + outLog elif isinstance(sub_job.outLog, str): submission_string += " -oo " + sub_job.outLog if isinstance(sub_job.errLog, str): submission_string += " -eo " + sub_job.errLog nCPU = self._nmpi * self._nomp submission_string += " -n " + str(nCPU) + " " if isinstance(self.max_storage, int): submission_string += ' -R "rusage[mem=' + str( self._max_storage) + ']" ' if isinstance(sub_job.queue_after_jobID, (int, str)): submission_string += " -w " + self._chain_prefix + "(" + str( sub_job.queue_after_jobID) + ')" ' if self._begin_mail: submission_string += " -B " if self._end_mail: submission_string += " -N " if self._nomp > 1: command = " export OMP_NUM_THREADS=" + str( self._nomp) + " && " + sub_job.command + " " else: command = " " + sub_job.command + " " # finalize string submission_string = list( map(lambda x: x.strip(), submission_string.split())) + [command] if self.verbose: print("Submission Command: \t", " ".join(submission_string)) if self._submission and not self._dummy: try: std_out_buff = bash.execute(command=submission_string, env=self._environment) std_out = "\n".join(std_out_buff.readlines()) # next sopt_job is queued with id: id_start = std_out.find("<") id_end = std_out.find(">") job_id = str(std_out[id_start + 1:id_end]).strip() if self.verbose: print("process returned id: " + str(job_id)) if job_id == "" and job_id.isalnum(): raise ValueError("Did not get at job ID!") except Exception as e: raise ChildProcessError("could not submit this command: \n" + " ".join(submission_string) + "\n\n" + str(e)) else: job_id = -1 sub_job.jobID = job_id return int(job_id)
def do( in_simSystem: Gromos_System, out_dir_path: str, simulation_run_num: int, equilibration_run_num: int = 0, initialize_first_run=False, reinitialize_every_run=False, analysis_script_path: str = None, submission_system: _SubmissionSystem = LSF(), previous_job_ID: int = None, _no_double_submit_check: bool = False, _work_dir: str = None, verbose: bool = True, verbose_lvl: int = 1, ) -> int: """ This function schedules simulations starting from the gromos system. Parameters ---------- in_simSystem : Gromos_System system that should be submitted with the provided imd file out_dir_path : str out directory path for the simulation simulation_run_num : int number of simulations equilibration_run_num : int, optional number of the equilibraitons, by default 0 initialize_first_run : bool, optional should the velocities be initialized in the first run?, by default False reinitialize_every_run : bool, optional DEAPPRECIATED! should always the velocities be initialized, by default False analysis_script_path : str, optional path to the analysis script, that should be used for this simulaiton approach, by default None submission_system : _SubmissionSystem, optional system, to be used to submit the jobs, by default LSF() previous_job_ID : int, optional previous job ID, by default None _no_double_submit_check : bool, optional don't check if job was already submit to queue (increases performance!), by default False _work_dir : str, optional directory, to write out the tmp files, by default None verbose : bool, optional Baeh Baeh, by default True verbose_lvl : int, optional amount of Baehs, by default 1 Returns ------- int the last job id, that was submitted. Raises ------ IOError If error happens in preperation of simulation or in the submission """ submission_system.block_double_submission = _no_double_submit_check job_verb = True if (verbose and verbose_lvl > 2) else False # prepare try: if verbose: print("Script: ", __file__) if verbose and verbose_lvl > 2: print(spacer + "Simulation PREPERATION\n" + spacer + "\n") # Outdir bash.make_folder(out_dir_path) # final output_folder # workdir: if not isinstance(_work_dir, type(None)) and _work_dir != "None": if verbose and verbose_lvl > 2: print("\t -> Generating given workdir: " + _work_dir) bash.make_folder(_work_dir, "-p") os.chdir(_work_dir) else: if verbose and verbose_lvl > 2: print("\t -> Using on node workdir") # sim vars logs out_prefix = in_simSystem.name worker_script = workerScript.__file__ # CHECK PATH DEPENDENCIES - all Files present? # needed variables check_path_dependencies_paths = [ worker_script, out_dir_path, ] # Coord file is used by repex in_imd_path prepared_im # variable paths if _work_dir is not None and _work_dir != "out_dir": check_path_dependencies_paths.append(_work_dir) if not in_simSystem.top._future_file: check_path_dependencies_paths.append(in_simSystem.top.path) if not in_simSystem.cnf._future_file: check_path_dependencies_paths.append(in_simSystem.cnf.path) if not in_simSystem.imd._future_file: check_path_dependencies_paths.append(in_simSystem.imd.path) if in_simSystem.ptp is not None: check_path_dependencies_paths.append(in_simSystem.ptp.path) if in_simSystem.disres is not None: check_path_dependencies_paths.append(in_simSystem.disres.path) if in_simSystem.posres is not None: check_path_dependencies_paths.append(in_simSystem.posres.path) if in_simSystem.refpos is not None: check_path_dependencies_paths.append(in_simSystem.refpos.path) if in_simSystem.qmmm is not None: check_path_dependencies_paths.append(in_simSystem.qmmm.path) bash.check_path_dependencies(check_path_dependencies_paths, verbose=job_verb) except Exception as err: print( "#####################################################################################" ) print("\t\tERROR in Preperations") print( "#####################################################################################" ) traceback.print_exception(*sys.exc_info()) raise IOError("ERROR in Preperations to submission!") from err # RUN Job try: if verbose: print("\n" + spacer + "Simulation Setup:\n" + spacer) if verbose: print("steps_per_run: ", in_simSystem.imd.STEP.NSTLIM) if verbose: print("equis: ", equilibration_run_num) if verbose: print("simulation runs: ", simulation_run_num) # Submission # EQ eq_job_id = None if equilibration_run_num > 0: # EQUILIBRATION tmp_outprefix = "eq_" + out_prefix tmp_jobname = in_simSystem.name + "_eq" eq_job_id, tmp_jobname, in_simSystem = chain_submission( simSystem=in_simSystem, out_dir_path=out_dir_path, out_prefix=tmp_outprefix, jobname=tmp_jobname, chain_job_repetitions=equilibration_run_num, worker_script=workerScript.__file__, job_submission_system=submission_system, start_run_index=1, prefix_command="", previous_job_ID=previous_job_ID, work_dir=_work_dir, initialize_first_run=initialize_first_run, reinitialize_every_run=reinitialize_every_run, verbose=job_verb, ) # MD tmp_outprefix = out_prefix tmp_jobname = in_simSystem.name previous_job_ID = previous_job_ID if (eq_job_id is None) else eq_job_id previous_job_ID, tmp_jobname, in_simSystem = chain_submission( simSystem=in_simSystem, out_dir_path=out_dir_path, out_prefix=tmp_outprefix, jobname=tmp_jobname, chain_job_repetitions=equilibration_run_num + simulation_run_num, start_run_index=equilibration_run_num + 1, worker_script=workerScript.__file__, job_submission_system=submission_system, prefix_command="", previous_job_ID=previous_job_ID, work_dir=_work_dir, initialize_first_run=initialize_first_run, reinitialize_every_run=reinitialize_every_run, verbose=job_verb, ) ana_previous_job_ID = previous_job_ID if analysis_script_path is not None: tmp_jobname = in_simSystem.name + "_ana" ana_log = os.path.dirname(analysis_script_path) + "/ana_out.log" if verbose: print(spacer + "\n submit final analysis part \n") if verbose: print(ana_log) if verbose: print(analysis_script_path) sub_job = Submission_job( command="python3 " + analysis_script_path, jobName=tmp_jobname, outLog=ana_log, queue_after_jobID=previous_job_ID, ) ana_previous_job_ID = submission_system.submit_to_queue(sub_job) if verbose: print("ANA jobID: " + str(previous_job_ID)) except Exception as err: print( "#####################################################################################" ) print("\t\tERROR in Submission") print( "#####################################################################################" ) traceback.print_exception(*sys.exc_info()) raise IOError("ERROR in SUBMISSION!") from err # in_simSystem._future_promise = False #reset future promising if necessary return ana_previous_job_ID
def test_submit_jobAarray_to_queue1_10(self): sub_job2 = Submission_job(jobName="test_job", command='echo " WUHAHAHA"', start_job=1, end_job=10) subSys = self.file_class(verbose=self.verbose, submission=self.submission) subSys.submit_jobAarray_to_queue(sub_job=sub_job2)
def test_submit(self): sub_job = Submission_job(jobName="test_job", command='echo " WUHAHAHA"') subSys = self.file_class(verbose=self.verbose, submission=self.submission) subSys.submit_to_queue(sub_job=sub_job)