Example #1
0
    def _writeJobScript(statement, job_memory, job_name, shellfile):
        # disabled - problems with quoting
        # tmpfile.write( '''echo 'statement=%s' >> %s\n''' %
        # (shellquote(statement), shellfile) )
        # module list outputs to stderr, so merge stderr and stdout

        script = '''#!/bin/bash -e \n
                    echo "%(job_name)s : START -> ${0}" >> %(shellfile)s
                    set | sed 's/^/%(job_name)s : /' &>> %(shellfile)s
                    set +o errexit
                    module list 2>&1 | sed 's/^/%(job_name)s: /' &>> %(shellfile)s
                    set -o errexit
                    hostname | sed 's/^/%(job_name)s: /' &>> %(shellfile)s
                    cat /proc/meminfo | sed 's/^/%(job_name)s: /' &>> %(shellfile)s
                    echo "%(job_name)s : END -> ${0}" >> %(shellfile)s
                 ''' % locals()

        # restrict virtual memory
        # Note that there are resources in SGE which could do this directly
        # such as v_hmem.
        # Note that limiting resident set sizes (RSS) with ulimit is not
        # possible in newer kernels.
        script += "ulimit -v %i\n" % IOTools.human2bytes(job_memory)
        script += expandStatement(statement,
                                  ignore_pipe_errors=ignore_pipe_errors)
        script += "\n"

        job_path = getTempFilename(dir=PARAMS["workingdir"])

        with open(job_path, "w") as script_file:
            script_file.write(script)

        return (job_path)
Example #2
0
    def __init__(self, **kwargs):

        self.logger = get_logger()

        self.job_threads = kwargs.get("job_threads", 1)

        if "job_memory" in kwargs and "job_total_memory" in kwargs:
            raise ValueError(
                "both job_memory and job_total_memory have been given")

        if "job_total_memory" in kwargs:
            self.job_total_memory = kwargs['job_total_memory']
            self.job_memory = IOTools.bytes2human(
                IOTools.human2bytes(self.job_total_memory) / self.job_threads)
        elif 'job_memory' in kwargs:
            # job_memory takes precedence over job_total_memory
            self.job_memory = kwargs['job_memory']
            self.job_total_memory = self.job_memory * self.job_threads
        else:
            self.job_memory = PARAMS["cluster"].get("memory_default", "4G")
            self.job_total_memory = self.job_memory * self.job_threads

        self.ignore_pipe_errors = kwargs.get('ignore_pipe_errors', False)
        self.ignore_errors = kwargs.get('ignore_errors', False)

        self.job_name = kwargs.get("job_name", "unknow_job_name")
        self.task_name = kwargs.get("task_name", "unknown_task_name")

        # deduce output directory/directories, requires somewhat
        # consistent naming in the calling function.
        outfiles = []
        if "outfile" in kwargs:
            outfiles.append(kwargs["outfile"])
        if "outfiles" in kwargs:
            outfiles.extend(kwargs["outfiles"])

        self.output_directories = set(
            sorted([os.path.dirname(x) for x in outfiles]))

        self.options = kwargs

        self.workingdir = PARAMS["workingdir"]

        self.shellfile = kwargs.get("shell_logfile", "shell.log")
        if self.shellfile:
            if not self.shellfile.startswith(os.sep):
                self.shellfile = os.path.join(self.workingdir, "shell.log")
Example #3
0
    def build_job_script(self, statement):
        '''build job script from statement.

        returns (name_of_script, stdout_path, stderr_path)
        '''
        tmpfilename = get_temp_filename(dir=self.workingdir, clear=True)
        tmpfilename = tmpfilename + ".sh"

        tmpdir = get_temp_dir(clear=True)

        expanded_statement, cleanup_funcs = self.expand_statement(statement)

        with open(tmpfilename, "w") as tmpfile:
            # disabled: -l -O expand_aliases\n" )

            # make executable
            tmpfile.write("#!/bin/bash -eu\n")
            if not self.ignore_pipe_errors:
                tmpfile.write("set -o pipefail\n")

            os.chmod(tmpfilename, stat.S_IRWXG | stat.S_IRWXU)

            tmpfile.write("\ncd {}\n".format(self.workingdir))
            if self.output_directories is not None:
                for outdir in self.output_directories:
                    if outdir:
                        tmpfile.write("\nmkdir -p {}\n".format(outdir))

            # create and set system scratch dir for temporary files
            tmpfile.write("umask 002\n")
            tmpfile.write("mkdir -p {}\n".format(tmpdir))
            tmpfile.write("export TMPDIR={}\n".format(tmpdir))
            cleanup_funcs.append(
                ("clean_temp", "{{ rm -rf {}; }}".format(tmpdir)))

            # output times whenever script exits, preserving
            # return status
            cleanup_funcs.append(
                ("info", "{ echo 'benchmark'; hostname; times; }"))
            for cleanup_func, cleanup_code in cleanup_funcs:
                tmpfile.write("\n{}() {}\n".format(cleanup_func, cleanup_code))

            tmpfile.write("\nclean_all() {{ {}; }}\n".format("; ".join(
                [x[0] for x in cleanup_funcs])))

            tmpfile.write("\ntrap clean_all EXIT\n\n")

            if self.job_memory != "unlimited":
                # restrict virtual memory
                # Note that there are resources in SGE which could do this directly
                # such as v_hmem.
                # Note that limiting resident set sizes (RSS) with ulimit is not
                # possible in newer kernels.
                # -v and -m accept memory in kb
                requested_memory_kb = max(
                    1000,
                    int(
                        math.ceil(
                            IOTools.human2bytes(self.job_memory) / 1024 *
                            self.job_threads)))
                # unsetting error exit as often not permissions
                tmpfile.write("set +e\n")
                tmpfile.write(
                    "ulimit -v {} > /dev/null \n".format(requested_memory_kb))
                tmpfile.write(
                    "ulimit -m {} > /dev/null \n".format(requested_memory_kb))
                # set as hard limit
                tmpfile.write("ulimit -H -v > /dev/null \n")
                tmpfile.write("set -e\n")

            if self.shellfile:

                # make sure path exists that we want to write to
                tmpfile.write("mkdir -p $(dirname \"{}\")\n".format(
                    self.shellfile))

                # output low-level debugging information to a shell log file
                tmpfile.write('echo "%s : START -> %s" >> %s\n' %
                              (self.job_name, tmpfilename, self.shellfile))
                # disabled - problems with quoting
                # tmpfile.write( '''echo 'statement=%s' >> %s\n''' %
                # (shellquote(statement), self.shellfile) )
                tmpfile.write("set | sed 's/^/%s : /' &>> %s\n" %
                              (self.job_name, self.shellfile))
                tmpfile.write("pwd | sed 's/^/%s : /' &>> %s\n" %
                              (self.job_name, self.shellfile))
                tmpfile.write("hostname | sed 's/^/%s: /' &>> %s\n" %
                              (self.job_name, self.shellfile))
                tmpfile.write("cat /proc/meminfo | sed 's/^/%s: /' &>> %s\n" %
                              (self.job_name, self.shellfile))
                tmpfile.write('echo "%s : END -> %s" >> %s\n' %
                              (self.job_name, tmpfilename, self.shellfile))
                tmpfile.write("ulimit | sed 's/^/%s: /' &>> %s\n" %
                              (self.job_name, self.shellfile))

            job_path = os.path.abspath(tmpfilename)

            tmpfile.write(expanded_statement)
            tmpfile.write("\n\n")
            tmpfile.close()

        return statement, job_path