def _writeJobScript(statement, job_memory, job_name, shellfile): # disabled - problems with quoting # tmpfile.write( '''echo 'statement=%s' >> %s\n''' % # (shellquote(statement), shellfile) ) # module list outputs to stderr, so merge stderr and stdout script = '''#!/bin/bash -e \n echo "%(job_name)s : START -> ${0}" >> %(shellfile)s set | sed 's/^/%(job_name)s : /' &>> %(shellfile)s set +o errexit module list 2>&1 | sed 's/^/%(job_name)s: /' &>> %(shellfile)s set -o errexit hostname | sed 's/^/%(job_name)s: /' &>> %(shellfile)s cat /proc/meminfo | sed 's/^/%(job_name)s: /' &>> %(shellfile)s echo "%(job_name)s : END -> ${0}" >> %(shellfile)s ''' % locals() # restrict virtual memory # Note that there are resources in SGE which could do this directly # such as v_hmem. # Note that limiting resident set sizes (RSS) with ulimit is not # possible in newer kernels. script += "ulimit -v %i\n" % IOTools.human2bytes(job_memory) script += expandStatement(statement, ignore_pipe_errors=ignore_pipe_errors) script += "\n" job_path = getTempFilename(dir=PARAMS["workingdir"]) with open(job_path, "w") as script_file: script_file.write(script) return (job_path)
def __init__(self, **kwargs): self.logger = get_logger() self.job_threads = kwargs.get("job_threads", 1) if "job_memory" in kwargs and "job_total_memory" in kwargs: raise ValueError( "both job_memory and job_total_memory have been given") if "job_total_memory" in kwargs: self.job_total_memory = kwargs['job_total_memory'] self.job_memory = IOTools.bytes2human( IOTools.human2bytes(self.job_total_memory) / self.job_threads) elif 'job_memory' in kwargs: # job_memory takes precedence over job_total_memory self.job_memory = kwargs['job_memory'] self.job_total_memory = self.job_memory * self.job_threads else: self.job_memory = PARAMS["cluster"].get("memory_default", "4G") self.job_total_memory = self.job_memory * self.job_threads self.ignore_pipe_errors = kwargs.get('ignore_pipe_errors', False) self.ignore_errors = kwargs.get('ignore_errors', False) self.job_name = kwargs.get("job_name", "unknow_job_name") self.task_name = kwargs.get("task_name", "unknown_task_name") # deduce output directory/directories, requires somewhat # consistent naming in the calling function. outfiles = [] if "outfile" in kwargs: outfiles.append(kwargs["outfile"]) if "outfiles" in kwargs: outfiles.extend(kwargs["outfiles"]) self.output_directories = set( sorted([os.path.dirname(x) for x in outfiles])) self.options = kwargs self.workingdir = PARAMS["workingdir"] self.shellfile = kwargs.get("shell_logfile", "shell.log") if self.shellfile: if not self.shellfile.startswith(os.sep): self.shellfile = os.path.join(self.workingdir, "shell.log")
def build_job_script(self, statement): '''build job script from statement. returns (name_of_script, stdout_path, stderr_path) ''' tmpfilename = get_temp_filename(dir=self.workingdir, clear=True) tmpfilename = tmpfilename + ".sh" tmpdir = get_temp_dir(clear=True) expanded_statement, cleanup_funcs = self.expand_statement(statement) with open(tmpfilename, "w") as tmpfile: # disabled: -l -O expand_aliases\n" ) # make executable tmpfile.write("#!/bin/bash -eu\n") if not self.ignore_pipe_errors: tmpfile.write("set -o pipefail\n") os.chmod(tmpfilename, stat.S_IRWXG | stat.S_IRWXU) tmpfile.write("\ncd {}\n".format(self.workingdir)) if self.output_directories is not None: for outdir in self.output_directories: if outdir: tmpfile.write("\nmkdir -p {}\n".format(outdir)) # create and set system scratch dir for temporary files tmpfile.write("umask 002\n") tmpfile.write("mkdir -p {}\n".format(tmpdir)) tmpfile.write("export TMPDIR={}\n".format(tmpdir)) cleanup_funcs.append( ("clean_temp", "{{ rm -rf {}; }}".format(tmpdir))) # output times whenever script exits, preserving # return status cleanup_funcs.append( ("info", "{ echo 'benchmark'; hostname; times; }")) for cleanup_func, cleanup_code in cleanup_funcs: tmpfile.write("\n{}() {}\n".format(cleanup_func, cleanup_code)) tmpfile.write("\nclean_all() {{ {}; }}\n".format("; ".join( [x[0] for x in cleanup_funcs]))) tmpfile.write("\ntrap clean_all EXIT\n\n") if self.job_memory != "unlimited": # restrict virtual memory # Note that there are resources in SGE which could do this directly # such as v_hmem. # Note that limiting resident set sizes (RSS) with ulimit is not # possible in newer kernels. # -v and -m accept memory in kb requested_memory_kb = max( 1000, int( math.ceil( IOTools.human2bytes(self.job_memory) / 1024 * self.job_threads))) # unsetting error exit as often not permissions tmpfile.write("set +e\n") tmpfile.write( "ulimit -v {} > /dev/null \n".format(requested_memory_kb)) tmpfile.write( "ulimit -m {} > /dev/null \n".format(requested_memory_kb)) # set as hard limit tmpfile.write("ulimit -H -v > /dev/null \n") tmpfile.write("set -e\n") if self.shellfile: # make sure path exists that we want to write to tmpfile.write("mkdir -p $(dirname \"{}\")\n".format( self.shellfile)) # output low-level debugging information to a shell log file tmpfile.write('echo "%s : START -> %s" >> %s\n' % (self.job_name, tmpfilename, self.shellfile)) # disabled - problems with quoting # tmpfile.write( '''echo 'statement=%s' >> %s\n''' % # (shellquote(statement), self.shellfile) ) tmpfile.write("set | sed 's/^/%s : /' &>> %s\n" % (self.job_name, self.shellfile)) tmpfile.write("pwd | sed 's/^/%s : /' &>> %s\n" % (self.job_name, self.shellfile)) tmpfile.write("hostname | sed 's/^/%s: /' &>> %s\n" % (self.job_name, self.shellfile)) tmpfile.write("cat /proc/meminfo | sed 's/^/%s: /' &>> %s\n" % (self.job_name, self.shellfile)) tmpfile.write('echo "%s : END -> %s" >> %s\n' % (self.job_name, tmpfilename, self.shellfile)) tmpfile.write("ulimit | sed 's/^/%s: /' &>> %s\n" % (self.job_name, self.shellfile)) job_path = os.path.abspath(tmpfilename) tmpfile.write(expanded_statement) tmpfile.write("\n\n") tmpfile.close() return statement, job_path