Esempio n. 1
0
    def _get_joblist_command(self, jobs=None, user=None):
        """
        The command to report full information on existing jobs.

        TODO: in the case of job arrays, decide what to do (i.e., if we want
              to pass the -t options to list each subjob).
        """
        from aiida.common.exceptions import FeatureNotAvailable

        command = 'ps o pid,stat,user,time'

        if jobs:
            if isinstance(jobs, basestring):
                command += ' h {}'.format(escape_for_bash(jobs))
            else:
                try:
                    command += ' {}'.format(' '.join(
                        escape_for_bash(j) for j in jobs))
                except TypeError:
                    raise TypeError(
                        "If provided, the 'jobs' variable must be a string or a list of strings"
                    )

        if user and not jobs:
            if user != '$USER':
                user = escape_for_bash(user)
            command += ' -U {} -u {} h'.format(user, user)

        return command
Esempio n. 2
0
    def _get_joblist_command(self, jobs=None, user=None):
        """
        The command to report full information on existing jobs.

        TODO: in the case of job arrays, decide what to do (i.e., if we want
              to pass the -t options to list each subjob).
        """
        from aiida.common.exceptions import FeatureNotAvailable

        command = ['qstat', '-f']

        if jobs and user:
            raise FeatureNotAvailable("Cannot query by user and job(s) in PBS")

        if user:
            command.append('-u{}'.format(user))

        if jobs:
            if isinstance(jobs, basestring):
                command.append('{}'.format(escape_for_bash(jobs)))
            else:
                try:
                    command.append('{}'.format(' '.join(escape_for_bash(j) for j in jobs)))
                except TypeError:
                    raise TypeError(
                        "If provided, the 'jobs' variable must be a string or an iterable of strings")

        comm = ' '.join(command)
        self.logger.debug("qstat command: {}".format(comm))
        return comm
Esempio n. 3
0
    def _exec_command_internal(self, command):
        """
        Executes the specified command in bash login shell.
        
        Before the command is executed, changes directory to the current
        working directory as returned by self.getcwd().

        For executing commands and waiting for them to finish, use
        exec_command_wait.
        Otherwise, to end the process, use the proc.wait() method.

        :param  command: the command to execute. The command is assumed to be
            already escaped using :py:func:`aiida.common.utils.escape_for_bash`.
        
        :return: a tuple with (stdin, stdout, stderr, proc),
            where stdin, stdout and stderr behave as file-like objects,
            proc is the process object as returned by the
            subprocess.Popen() class.
        """
        from aiida.common.utils import escape_for_bash

        # Note: The outer shell will eat one level of escaping, while
        # 'bash -l -c ...' will eat another. Thus, we need to escape again.
        command = 'bash -l -c ' + escape_for_bash(command)

        proc = subprocess.Popen(command,
                                shell=True,
                                stdin=subprocess.PIPE,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE,
                                cwd=self.getcwd())
        return proc.stdin, proc.stdout, proc.stderr, proc
Esempio n. 4
0
    def _pg_execute_sh(self, command, user='******', **kwargs):
        '''
        executes a postgres command line as another system user in a subprocess.

        :param command: A psql command line as a str
        :param user: Name of a system user with postgres permissions
        :param kwargs: connection details to forward to psql, signature as in psycopg2.connect
        '''
        options = ''
        database = kwargs.pop('database', None)
        if database:
            options += '-d {}'.format(database)
        kwargs.pop('password', None)
        host = kwargs.pop('host', None)
        if host:
            options += '-h {}'.format(host)
        port = kwargs.pop('port', None)
        if port:
            options += '-p {}'.format(port)
        try:
            import subprocess32 as sp
        except ImportError:
            import subprocess as sp
        from aiida.common.utils import escape_for_bash
        result = sp.check_output([
            'sudo', 'su', user, '-c', 'psql {options} -tc {}'.format(
                escape_for_bash(command), options=options)
        ], **kwargs)
        if isinstance(result, str):
            result = result.strip().split('\n')
            result = [i for i in result if i]
        return result
Esempio n. 5
0
    def _get_detailed_jobinfo_command(self, jobid):
        """
        Return the command to run to get the detailed information on a job,
        even after the job has finished.

        The output text is just retrieved, and returned for logging purposes.
        """
        return "tracejob -v {}".format(escape_for_bash(jobid))
Esempio n. 6
0
def computer_config_show(computer, user, current, as_option_string):
    """Show the current or default configuration for COMPUTER."""
    import tabulate
    from aiida.common.utils import escape_for_bash

    config = {}
    table = []

    transport_cls = computer.get_transport_class()
    option_list = [
        param for param in transport_cli.create_configure_cmd(
            computer.get_transport_type()).params
        if isinstance(param, click.core.Option)
    ]
    option_list = [
        option for option in option_list
        if option.name in transport_cls.get_valid_auth_params()
    ]
    if current:
        config = get_computer_configuration(computer, user)
    else:
        config = {
            option.name:
            transport_cli.transport_option_default(option.name, computer)
            for option in option_list
        }

    option_items = []
    if as_option_string:
        for option in option_list:
            t_opt = transport_cls.auth_options[option.name]
            if config.get(option.name) or config.get(option.name) is False:
                if t_opt.get('switch'):
                    option_value = option.opts[-1] if config.get(
                        option.name) else '--no-{}'.format(
                            option.name.replace('_', '-'))
                elif t_opt.get('is_flag'):
                    is_default = config.get(
                        option.name) == transport_cli.transport_option_default(
                            option.name, computer)
                    option_value = option.opts[-1] if is_default else ''
                else:
                    option_value = '{}={}'.format(option.opts[-1],
                                                  config[option.name])
                option_items.append(option_value)
        opt_string = ' '.join(option_items)
        echo.echo(escape_for_bash(opt_string))
    else:
        table = [('* ' + name, config[name])
                 for name in transport_cls.get_valid_auth_params()]
        echo.echo(tabulate.tabulate(table, tablefmt='plain'))
Esempio n. 7
0
    def submit_from_script(self, working_directory, submit_script):
        """
        Goes in the working directory and submits the submit_script.

        Return a string with the JobID in a valid format to be used for
        querying.

        Typically, this function does not need to be modified by the plugins.
        """

        self.transport.chdir(working_directory)
        retval, stdout, stderr = self.transport.exec_command_wait(
            self._get_submit_command(escape_for_bash(submit_script)))
        return self._parse_submit_output(retval, stdout, stderr)
Esempio n. 8
0
    def _get_joblist_command(self, jobs=None, user=None):
        """
        The command to report full information on existing jobs.

        TODO: in the case of job arrays, decide what to do (i.e., if we want
              to pass the -t options to list each subjob).
        """
        command = 'ps -o pid,stat,user,time'

        if jobs:
            if isinstance(jobs, basestring):
                command += ' {}'.format(escape_for_bash(jobs))
            else:
                try:
                    command += ' {}'.format(' '.join(
                        escape_for_bash(j) for j in jobs))
                except TypeError:
                    raise TypeError(
                        "If provided, the 'jobs' variable must be a string or a list of strings"
                    )

        command += '| tail -n +2'  # -header, do not use 'h'

        return command
Esempio n. 9
0
def _pg_execute_sh(command, user='******', **kwargs):
    """
    executes a postgres command line as another system user in a subprocess.

    :param command: A psql command line as a str
    :param user: Name of a system user with postgres permissions
    :param kwargs: connection details to forward to psql, signature as in psycopg2.connect

    To stop `sudo` from asking for a password and fail if one is required,
    pass `noninteractive=True` as a kwarg.
    """
    options = ''
    database = kwargs.pop('database', None)
    if database:
        options += '-d {}'.format(database)
    kwargs.pop('password', None)
    host = kwargs.pop('host', None)
    if host:
        options += '-h {}'.format(host)
    port = kwargs.pop('port', None)
    if port:
        options += '-p {}'.format(port)

    # Build command line
    sudo_cmd = ['sudo', '-S']
    non_interactive = kwargs.pop('non_interactive', None)
    if non_interactive:
        sudo_cmd += ['-n']
    su_cmd = ['su', user, '-c']
    from aiida.common.utils import escape_for_bash
    psql_cmd = [
        'psql {options} -tc {}'.format(escape_for_bash(command),
                                       options=options)
    ]
    sudo_su_psql = sudo_cmd + su_cmd + psql_cmd
    result = subprocess.check_output(sudo_su_psql,
                                     preexec_fn=os.setsid,
                                     **kwargs)

    if isinstance(result, str):
        result = result.strip().split('\n')
        result = [i for i in result if i]
    return result
Esempio n. 10
0
 def _get_detailed_jobinfo_command(self, jobid):
     command = "qacct -j {}".format(escape_for_bash(jobid))
     return command
Esempio n. 11
0
    def _get_run_line(self, codes_info, codes_run_mode):
        """
        Return a string with the line to execute a specific code with
        specific arguments.

        :parameter codes_info: a list of aiida.common.datastructures.CodeInfo
          objects. Each contains the information needed to run the code. I.e.
          cmdline_params, stdin_name, stdout_name, stderr_name, join_files.
          See the documentation of JobTemplate and CodeInfo
        :parameter codes_run_mode: contains the information on how to launch the
          multiple codes. As described in aiida.common.datastructures.code_run_modes


            argv: an array with the executable and the command line arguments.
              The first argument is the executable. This should contain
              everything, including the mpirun command etc.
            stdin_name: the filename to be used as stdin, relative to the
              working dir, or None if no stdin redirection is required.
            stdout_name: the filename to be used to store the standard output,
              relative to the working dir,
              or None if no stdout redirection is required.
            stderr_name: the filename to be used to store the standard error,
              relative to the working dir,
              or None if no stderr redirection is required.
            join_files: if True, stderr is redirected to stdout; the value of
              stderr_name is ignored.

        Return a string with the following format:
        [executable] [args] {[ < stdin ]} {[ < stdout ]} {[2>&1 | 2> stderr]}
        """
        from aiida.common.datastructures import code_run_modes

        list_of_runlines = []

        for code_info in codes_info:
            command_to_exec_list = []
            for arg in code_info.cmdline_params:
                command_to_exec_list.append(escape_for_bash(arg))
            command_to_exec = " ".join(command_to_exec_list)

            stdin_str = "< {}".format(
                escape_for_bash(code_info.stdin_name)) if code_info.stdin_name else ""
            stdout_str = "> {}".format(
                escape_for_bash(code_info.stdout_name)) if code_info.stdout_name else ""

            join_files = code_info.join_files
            if join_files:
                stderr_str = "2>&1"
            else:
                stderr_str = "2> {}".format(
                    escape_for_bash(code_info.stderr_name)) if code_info.stderr_name else ""

            output_string = ("{} {} {} {}".format(
                command_to_exec,
                stdin_str, stdout_str, stderr_str))

            list_of_runlines.append(output_string)

        self.logger.debug('_get_run_line output: {}'.format(list_of_runlines))
        if codes_run_mode == code_run_modes.PARALLEL:
            list_of_runlines.append('wait\n')
            return " &\n\n".join(list_of_runlines)
        elif codes_run_mode == code_run_modes.SERIAL:
            return "\n\n".join(list_of_runlines)
        else:
            raise NotImplementedError('Unrecognized code run mode')
Esempio n. 12
0
    def _get_submit_script_header(self, job_tmpl):
        """
        Return the submit script header, using the parameters from the
        job_tmpl.

        Args:
           job_tmpl: an JobTemplate instance with relevant parameters set.

        TODO: truncate the title if too long
        """
        import re
        import string

        empty_line = ""

        lines = []
        if job_tmpl.submit_as_hold:
            lines.append("#PBS -h")

        if job_tmpl.rerunnable:
            lines.append("#PBS -r y")
        else:
            lines.append("#PBS -r n")

        if job_tmpl.email:
            # If not specified, but email events are set, PBSPro
            # sends the mail to the job owner by default
            lines.append('#PBS -M {}'.format(job_tmpl.email))

        email_events = ""
        if job_tmpl.email_on_started:
            email_events += "b"
        if job_tmpl.email_on_terminated:
            email_events += "ea"
        if email_events:
            lines.append("#PBS -m {}".format(email_events))
            if not job_tmpl.email:
                self.logger.info(
                    "Email triggers provided to PBSPro script for job,"
                    "but no email field set; will send emails to "
                    "the job owner as set in the scheduler")
        else:
            lines.append("#PBS -m n")

        if job_tmpl.job_name:
            # From qsub man page:
            # string, up to 15 characters in length.  It must
            # consist of an  alphabetic  or  numeric  character
            # followed  by printable, non-white-space characters.
            # Default:  if a script is used to submit the job, the job's name
            # is the name of the script.  If no script  is  used,  the  job's
            # name is "STDIN".
            #
            # I leave only letters, numbers, dots, dashes and underscores
            # Note: I don't compile the regexp, I am going to use it only once
            job_title = re.sub(r'[^a-zA-Z0-9_.-]+', '', job_tmpl.job_name)

            # prepend a 'j' (for 'job') before the string if the string
            # is now empty or does not start with a valid charachter
            if not job_title or (
                        job_title[0] not in string.letters + string.digits):
                job_title = 'j' + job_title

            # Truncate to the first 15 characters
            # Nothing is done if the string is shorter.
            job_title = job_title[:15]

            lines.append("#PBS -N {}".format(job_title))

        if job_tmpl.import_sys_environment:
            lines.append("#PBS -V")

        if job_tmpl.sched_output_path:
            lines.append("#PBS -o {}".format(job_tmpl.sched_output_path))

        if job_tmpl.sched_join_files:
            # from qsub man page:
            # 'oe': Standard error and standard output are merged  into
            #       standard output
            # 'eo': Standard error and standard output are merged  into
            #       standard error
            # 'n' : Standard error and standard output are not merged (default)
            lines.append("#PBS -j oe")
            if job_tmpl.sched_error_path:
                self.logger.info(
                    "sched_join_files is True, but sched_error_path is set in "
                    "PBSPro script; ignoring sched_error_path")
        else:
            if job_tmpl.sched_error_path:
                lines.append("#PBS -e {}".format(job_tmpl.sched_error_path))

        if job_tmpl.queue_name:
            lines.append("#PBS -q {}".format(job_tmpl.queue_name))

        if job_tmpl.priority:
            # Priority of the job.  Format: host-dependent integer.  Default:
            # zero.   Range:  [-1024,  +1023] inclusive.  Sets job's Priority
            # attribute to priority.
            # TODO: Here I expect that priority is passed in the correct PBSPro
            # format. To fix.
            lines.append("#PBS -p {}".format(job_tmpl.priority))

        if not job_tmpl.job_resource:
            raise ValueError("Job resources (as the num_machines) are required "
                             "for the PBSPro scheduler plugin")

        resource_lines = self._get_resource_lines(
            num_machines=job_tmpl.job_resource.num_machines,
            num_mpiprocs_per_machine=job_tmpl.job_resource.num_mpiprocs_per_machine,
            num_cores_per_machine=job_tmpl.job_resource.num_cores_per_machine,
            max_memory_kb=job_tmpl.max_memory_kb,
            max_wallclock_seconds=job_tmpl.max_wallclock_seconds)

        lines += resource_lines

        if job_tmpl.custom_scheduler_commands:
            lines.append(job_tmpl.custom_scheduler_commands)

        # Job environment variables are to be set on one single line.
        # This is a tough job due to the escaping of commas, etc.
        # moreover, I am having issues making it work.
        # Therefore, I assume that this is bash and export variables by
        # and.

        if job_tmpl.job_environment:
            lines.append(empty_line)
            lines.append("# ENVIRONMENT VARIABLES BEGIN ###")
            if not isinstance(job_tmpl.job_environment, dict):
                raise ValueError("If you provide job_environment, it must be "
                                 "a dictionary")
            for k, v in job_tmpl.job_environment.iteritems():
                lines.append("export {}={}".format(
                    k.strip(),
                    escape_for_bash(v)))
            lines.append("# ENVIRONMENT VARIABLES  END  ###")
            lines.append(empty_line)

        # Required to change directory to the working directory, that is
        # the one from which the job was submitted
        lines.append('cd "$PBS_O_WORKDIR"')
        lines.append(empty_line)

        return "\n".join(lines)
Esempio n. 13
0
    def _get_submit_script_header(self, job_tmpl):
        """
        Return the submit script header, using the parameters from the
        job_tmpl. See the following manual
        https://www-01.ibm.com/support/knowledgecenter/SSETD4_9.1.2/lsf_command_ref/bsub.1.dita?lang=en
        for more details about the possible options to bsub, in particular for
        the parallel environment definition (with the -m option).
        
        :param job_tmpl: an JobTemplate instance with relevant parameters set.
        """
        import string, re

        empty_line = ""

        lines = []
        if job_tmpl.submit_as_hold:
            lines.append("#BSUB -H")

        if job_tmpl.rerunnable:
            lines.append("#BSUB -r")
        else:
            lines.append("#BSUB -rn")

        if job_tmpl.email:
            # If not specified, but email events are set, SLURM
            # sends the mail to the job owner by default
            lines.append('#BSUB -u {}'.format(job_tmpl.email))

        if job_tmpl.email_on_started:
            lines.append("#BSUB -B")
        if job_tmpl.email_on_terminated:
            lines.append("#BSUB -N")

        if job_tmpl.job_name:
            # The man page specifies only a limitation
            # on the job name to 4094 characters.
            # To be safe, I remove unwanted characters, and I
            # trim it to length 128.

            # I leave only letters, numbers, dots, dashes and underscores
            # Note: I don't compile the regexp, I am going to use it only once
            job_title = re.sub(r'[^a-zA-Z0-9_.-]+', '', job_tmpl.job_name)

            # prepend a 'j' (for 'job') before the string if the string
            # is now empty or does not start with a valid character
            if not job_title or (job_title[0]
                                 not in string.letters + string.digits):
                job_title = 'j' + job_title

            # Truncate to the first 128 characters
            # Nothing is done if the string is shorter.
            job_title = job_title[:128]
            lines.append('#BSUB -J "{}"'.format(job_title))

        if not job_tmpl.import_sys_environment:
            self.logger.warning("LSF scheduler cannot ignore "
                                "the user environment")

        if job_tmpl.sched_output_path:
            lines.append("#BSUB -o {}".format(job_tmpl.sched_output_path))

        sched_error_path = getattr(job_tmpl, 'sched_error_path', None)
        if job_tmpl.sched_join_files:
            sched_error_path = "{}_".format(job_tmpl.sched_output_path)
            self.logger.warning("LSF scheduler does not support joining "
                                "the standard output and standard error "
                                "files; std error file assigned instead "
                                "to the file {}".format(sched_error_path))

        if sched_error_path:
            lines.append("#BSUB -e {}".format(job_tmpl.sched_error_path))

        if job_tmpl.queue_name:
            lines.append("#BSUB -q {}".format(job_tmpl.queue_name))

        if job_tmpl.priority:
            # Specifies user-assigned job priority that orders all jobs
            # (from all users) in a queue. Valid values for priority
            # are any integers between 1 and MAX_USER_PRIORITY
            # (configured in lsb.params, displayed by "bparams -l").
            # Jobs are scheduled based first on their queue priority first, then
            # job priority, and lastly in first-come first-served order.
            lines.append("#BSUB -sp {}".format(job_tmpl.priority))

        if not job_tmpl.job_resource:
            raise ValueError("Job resources (as the tot_num_mpiprocs) are "
                             "required for the LSF scheduler plugin")

        lines.append("#BSUB -n {}".format(
            job_tmpl.job_resource.get_tot_num_mpiprocs()))
        # Note:  make sure that PARALLEL_SCHED_BY_SLOT=Y is NOT
        # defined in lsb.params (you can check with the output of bparams -l).
        # Note: the -n option of bsub can also contain a maximum number of
        # procs to be used
        if job_tmpl.job_resource.parallel_env:
            lines.append('#BSUB -m "{}"'.format(
                job_tmpl.job_resource.parallel_env))

        if job_tmpl.max_wallclock_seconds is not None:
            # ABS_RUNLIMIT=Y should be set, in lsb.params (check with bparams -l)
            try:
                tot_secs = int(job_tmpl.max_wallclock_seconds)
                if tot_secs <= 0:
                    raise ValueError
            except ValueError:
                raise ValueError(
                    "max_wallclock_seconds must be "
                    "a positive integer (in seconds)! It is instead '{}'"
                    "".format((job_tmpl.max_wallclock_seconds)))
            hours = tot_secs // 3600
            # The double negation results in the ceiling rather than the floor
            # of the division
            minutes = -(-(tot_secs % 3600) // 60)
            lines.append("#BSUB -W {:02d}:{:02d}".format(hours, minutes))

        # TODO: check if this is the memory per node
        if job_tmpl.max_memory_kb:
            try:
                virtualMemoryKb = int(job_tmpl.max_memory_kb)
                if virtualMemoryKb <= 0:
                    raise ValueError
            except ValueError:
                raise ValueError(
                    "max_memory_kb must be "
                    "a positive integer (in kB)! It is instead '{}'"
                    "".format((job_tmpl.MaxMemoryKb)))
            # The -M option sets a per-process (soft) memory limit for all the
            # processes that belong to this job
            lines.append("#BSUB -M {}".format(virtualMemoryKb))

        if job_tmpl.custom_scheduler_commands:
            lines.append(job_tmpl.custom_scheduler_commands)

        # Job environment variables are to be set on one single line.
        # This is a tough job due to the escaping of commas, etc.
        # moreover, I am having issues making it work.
        # Therefore, I assume that this is bash and export variables by
        # hand.
        if job_tmpl.job_environment:
            lines.append(empty_line)
            lines.append("# ENVIRONMENT VARIABLES BEGIN ###")
            if not isinstance(job_tmpl.job_environment, dict):
                raise ValueError("If you provide job_environment, it must be "
                                 "a dictionary")
            for k, v in job_tmpl.job_environment.iteritems():
                lines.append("export {}={}".format(k.strip(),
                                                   escape_for_bash(v)))
            lines.append("# ENVIRONMENT VARIABLES END  ###")
            lines.append(empty_line)

        lines.append(empty_line)

        # The following seems to be the only way to copy the input files
        # to the node where the computation are actually launched (the
        # -f option of bsub that does not always work...)
        # TODO: implement the case when LSB_OUTDIR is not properly defined...
        # (need to add the line "#BSUB -outdir PATH_TO_REMOTE_DIRECTORY")
        # IMPORTANT! the -z is needed, because if LSB_OUTDIR is not defined,
        # you would do 'cp -R /* .' basically copying ALL FILES in your
        # computer (including mounted partitions) in the current dir!!
        lines.append("""
if [ ! -z "$LSB_OUTDIR" ]
then 
  cp -R "$LSB_OUTDIR"/* .
fi
""")

        return "\n".join(lines)
Esempio n. 14
0
    def _get_submit_script_header(self, job_tmpl):
        """
        Return the submit script header, using the parameters from the
        job_tmpl.

        Args:
           job_tmpl: an JobTemplate instance with relevant parameters set.
        """

        lines = []
        empty_line = ""

        # Redirecting script output on the correct files
        # Should be one of the first commands
        if job_tmpl.sched_output_path:
            lines.append("exec > {}".format(job_tmpl.sched_output_path))

        if job_tmpl.sched_join_files:
            # TODO: manual says:
            # By  default both standard output and standard error are directed
            # to a file of the name "slurm-%j.out", where the "%j" is replaced
            # with  the  job  allocation  number.
            # See that this automatic redirection works also if
            # I specify a different --output file
            if job_tmpl.sched_error_path:
                self.logger.info(
                    "sched_join_files is True, but sched_error_path is set; "
                    " ignoring sched_error_path")
        else:
            if job_tmpl.sched_error_path:
                lines.append("exec 2> {}".format(job_tmpl.sched_error_path))
            else:
                # To avoid automatic join of files
                lines.append("exec 2>&1")

        if job_tmpl.max_memory_kb:
            try:
                virtual_memory_kb = int(job_tmpl.max_memory_kb)
                if virtual_memory_kb <= 0:
                    raise ValueError
            except ValueError:
                raise ValueError(
                    "max_memory_kb must be "
                    "a positive integer (in kB)! It is instead '{}'"
                    "".format((job_tmpl.MaxMemoryKb)))
            lines.append("ulimit -v {}".format(virtualMemoryKb))
        if not job_tmpl.import_sys_environment:
            lines.append("env --ignore-environment \\")

        if job_tmpl.custom_scheduler_commands:
            lines.append(job_tmpl.custom_scheduler_commands)

        # Job environment variables are to be set on one single line.
        # This is a tough job due to the escaping of commas, etc.
        # moreover, I am having issues making it work.
        # Therefore, I assume that this is bash and export variables by
        # and.

        if job_tmpl.job_environment:
            lines.append(empty_line)
            lines.append("# ENVIRONMENT VARIABLES BEGIN ###")
            if not isinstance(job_tmpl.job_environment, dict):
                raise ValueError("If you provide job_environment, it must be "
                                 "a dictionary")
            for key, value in job_tmpl.job_environment.iteritems():
                lines.append("export {}={}".format(key.strip(),
                                                   escape_for_bash(value)))
            lines.append("# ENVIRONMENT VARIABLES  END  ###")
            lines.append(empty_line)

        lines.append(empty_line)

        ## The following code is not working as there's an empty line
        ## inserted between the header and the actual command.
        # if job_tmpl.max_wallclock_seconds is not None:
        #     try:
        #         tot_secs = int(job_tmpl.max_wallclock_seconds)
        #         if tot_secs <= 0:
        #             raise ValueError
        #     except ValueError:
        #         raise ValueError(
        #             "max_wallclock_seconds must be "
        #             "a positive integer (in seconds)! It is instead '{}'"
        #             "".format((job_tmpl.max_wallclock_seconds)))
        #     lines.append("timeout {} \\".format(tot_secs))

        return "\n".join(lines)
Esempio n. 15
0
    def _get_submit_script_header(self, job_tmpl):
        """
        Return the submit script header, using the parameters from the
        job_tmpl.

        Args:
           job_tmpl: an JobTemplate instance with relevant parameters set.
        """
        import re
        import string

        lines = []
        empty_line = ""

        if job_tmpl.max_memory_kb:
            try:
                virtualMemoryKb = int(job_tmpl.max_memory_kb)
                if virtualMemoryKb <= 0:
                    raise ValueError
            except ValueError:
                raise ValueError(
                    "max_memory_kb must be "
                    "a positive integer (in kB)! It is instead '{}'"
                    "".format((job_tmpl.MaxMemoryKb)))
            lines.append("ulimit -v {}", virtualMemoryKb)
        if not job_tmpl.import_sys_environment:
            lines.append("env --ignore-environment \\")

        if job_tmpl.custom_scheduler_commands:
            lines.append(job_tmpl.custom_scheduler_commands)

        # Job environment variables are to be set on one single line.
        # This is a tough job due to the escaping of commas, etc.
        # moreover, I am having issues making it work.
        # Therefore, I assume that this is bash and export variables by
        # and.

        if job_tmpl.job_environment:
            lines.append(empty_line)
            lines.append("# ENVIRONMENT VARIABLES BEGIN ###")
            if not isinstance(job_tmpl.job_environment, dict):
                raise ValueError("If you provide job_environment, it must be "
                                 "a dictionary")
            for k, v in job_tmpl.job_environment.iteritems():
                lines.append("export {}={}".format(k.strip(),
                                                   escape_for_bash(v)))
            lines.append("# ENVIRONMENT VARIABLES  END  ###")
            lines.append(empty_line)

        lines.append(empty_line)

        ## The following code is not working as there's an empty line
        ## inserted between the header and the actual command.
        # if job_tmpl.max_wallclock_seconds is not None:
        #     try:
        #         tot_secs = int(job_tmpl.max_wallclock_seconds)
        #         if tot_secs <= 0:
        #             raise ValueError
        #     except ValueError:
        #         raise ValueError(
        #             "max_wallclock_seconds must be "
        #             "a positive integer (in seconds)! It is instead '{}'"
        #             "".format((job_tmpl.max_wallclock_seconds)))
        #     lines.append("timeout {} \\".format(tot_secs))

        return "\n".join(lines)
Esempio n. 16
0
    def _get_submit_script_header(self, job_tmpl):
        """
        Return the submit script header, using the parameters from the
        job_tmpl.

        Args:
           job_tmpl: an JobTemplate instance with relevant parameters set.

        TODO: truncate the title if too long
        """
        import re
        import string

        empty_line = ""

        lines = []

        # SGE provides flags for wd and cwd
        if job_tmpl.working_directory:
            lines.append('#$ -wd {}'.format(job_tmpl.working_directory))
        else:
            lines.append('#$ -cwd')

        # Enforce bash shell
        lines.append('#$ -S /bin/bash')

        if job_tmpl.submit_as_hold:
            #if isinstance(job_tmpl.submit_as_hold, str):
            lines.append('#$ -h {}'.format(job_tmpl.submit_as_hold))

        if job_tmpl.rerunnable:
            #if isinstance(job_tmpl.rerunnable, str):
            lines.append('#$ -r {}'.format(job_tmpl.rerunnable))

        if job_tmpl.email:
            # If not specified, but email events are set, PBSPro
            # sends the mail to the job owner by default
            lines.append('#$ -M {}'.format(job_tmpl.email))

        email_events = ""
        if job_tmpl.email_on_started:
            email_events += "b"
        if job_tmpl.email_on_terminated:
            email_events += "ea"
        if email_events:
            lines.append("#$ -m {}".format(email_events))
            if not job_tmpl.email:
                self.logger.info(
                    "Email triggers provided to SGE script for job,"
                    "but no email field set; will send emails to "
                    "the job owner as set in the scheduler")
        else:
            lines.append("#$ -m n")

        #From the qsub man page:
        #"The name may be any arbitrary alphanumeric ASCII string, but
        # may  not contain  "\n", "\t", "\r", "/", ":", "@", "\", "*",
        # or "?"."
        if job_tmpl.job_name:
            job_title = re.sub(r'[^a-zA-Z0-9_.-]+', '', job_tmpl.job_name)

            # prepend a 'j' (for 'job') before the string if the string
            # is now empty or does not start with a valid character
            # (the first symbol cannot be digit, at least in some versions
            #  of the scheduler)
            if not job_title or (job_title[0] not in string.letters):
                job_title = 'j' + job_title

            lines.append('#$ -N {}'.format(job_tmpl.job_name))

        if job_tmpl.import_sys_environment:
            lines.append("#$ -V")

        if job_tmpl.sched_output_path:
            lines.append("#$ -o {}".format(job_tmpl.sched_output_path))

        if job_tmpl.sched_join_files:
            # from qsub man page:
            # 'y': Standard error and standard output are merged  into
            #       standard output
            # 'n' : Standard error and standard output are not merged (default)
            lines.append("#$ -j y")
            if job_tmpl.sched_error_path:
                self.logger.info(
                    "sched_join_files is True, but sched_error_path is set in "
                    "SGE script; ignoring sched_error_path")
        else:
            if job_tmpl.sched_error_path:
                lines.append("#$ -e {}".format(job_tmpl.sched_error_path))

        if job_tmpl.queue_name:
            lines.append("#$ -q {}".format(job_tmpl.queue_name))

        if job_tmpl.priority:
            # Priority of the job.  Format: host-dependent integer.  Default:
            # zero.   Range:  [-1023,  +1024].  Sets job's Priority
            # attribute to priority.
            lines.append("#$ -p {}".format(job_tmpl.priority))

        if not job_tmpl.job_resource:
            raise ValueError(
                "Job resources (as the tot_num_mpiprocs) are required "
                "for the SGE scheduler plugin")
        #Setting up the parallel environment
        lines.append('#$ -pe {} {}'.\
                     format(str(job_tmpl.job_resource.parallel_env),\
                            int(job_tmpl.job_resource.tot_num_mpiprocs)))

        if job_tmpl.max_wallclock_seconds is not None:
            try:
                tot_secs = int(job_tmpl.max_wallclock_seconds)
                if tot_secs <= 0:
                    raise ValueError
            except ValueError:
                raise ValueError(
                    "max_wallclock_seconds must be "
                    "a positive integer (in seconds)! It is instead '{}'"
                    "".format((job_tmpl.max_wallclock_seconds)))
            hours = tot_secs // 3600
            tot_minutes = tot_secs % 3600
            minutes = tot_minutes // 60
            seconds = tot_minutes % 60
            lines.append("#$ -l h_rt={:02d}:{:02d}:{:02d}".format(
                hours, minutes, seconds))

        if job_tmpl.custom_scheduler_commands:
            lines.append(job_tmpl.custom_scheduler_commands)

        #TAKEN FROM PBSPRO:
        # Job environment variables are to be set on one single line.
        # This is a tough job due to the escaping of commas, etc.
        # moreover, I am having issues making it work.
        # Therefore, I assume that this is bash and export variables by
        # and.
        if job_tmpl.job_environment:
            lines.append(empty_line)
            lines.append("# ENVIRONMENT VARIABLES BEGIN ###")
            if not isinstance(job_tmpl.job_environment, dict):
                raise ValueError("If you provide job_environment, it must be "
                                 "a dictionary")
            for k, v in job_tmpl.job_environment.iteritems():
                lines.append("export {}={}".format(k.strip(),
                                                   escape_for_bash(v)))
            lines.append("# ENVIRONMENT VARIABLES  END  ###")
            lines.append(empty_line)

        return "\n".join(lines)
Esempio n. 17
0
    def _get_submit_script_header(self, job_tmpl):
        """
        Return the submit script header, using the parameters from the
        job_tmpl.

        Args:
           job_tmpl: an JobTemplate instance with relevant parameters set.

        TODO: truncate the title if too long
        """
        import string

        empty_line = ""

        lines = []
        if job_tmpl.submit_as_hold:
            lines.append("#SBATCH -H")

        if job_tmpl.rerunnable:
            lines.append("#SBATCH --requeue")
        else:
            lines.append("#SBATCH --no-requeue")

        if job_tmpl.email:
            # If not specified, but email events are set, SLURM
            # sends the mail to the job owner by default
            lines.append('#SBATCH --mail-user={}'.format(job_tmpl.email))

        if job_tmpl.email_on_started:
            lines.append("#SBATCH --mail-type=BEGIN")
        if job_tmpl.email_on_terminated:
            lines.append("#SBATCH --mail-type=FAIL")
            lines.append("#SBATCH --mail-type=END")

        if job_tmpl.job_name:
            # The man page does not specify any specific limitation
            # on the job name.
            # Just to be sure, I remove unwanted characters, and I
            # trim it to length 128

            # I leave only letters, numbers, dots, dashes and underscores
            # Note: I don't compile the regexp, I am going to use it only once
            job_title = re.sub(r'[^a-zA-Z0-9_.-]+', '', job_tmpl.job_name)

            # prepend a 'j' (for 'job') before the string if the string
            # is now empty or does not start with a valid charachter
            if not job_title or (job_title[0]
                                 not in string.letters + string.digits):
                job_title = 'j' + job_title

            # Truncate to the first 128 characters
            # Nothing is done if the string is shorter.
            job_title = job_title[:128]

            lines.append('#SBATCH --job-name="{}"'.format(job_title))

        if job_tmpl.import_sys_environment:
            lines.append("#SBATCH --get-user-env")

        if job_tmpl.sched_output_path:
            lines.append("#SBATCH --output={}".format(
                job_tmpl.sched_output_path))

        if job_tmpl.sched_join_files:
            # TODO: manual says:
            #By  default both standard output and standard error are directed
            #to a file of the name "slurm-%j.out", where the "%j" is replaced
            #with  the  job  allocation  number.
            # See that this automatic redirection works also if
            # I specify a different --output file
            if job_tmpl.sched_error_path:
                self.logger.info(
                    "sched_join_files is True, but sched_error_path is set in "
                    "SLURM script; ignoring sched_error_path")
        else:
            if job_tmpl.sched_error_path:
                lines.append("#SBATCH --error={}".format(
                    job_tmpl.sched_error_path))
            else:
                # To avoid automatic join of files
                lines.append("#SBATCH --error=slurm-%j.err")

        if job_tmpl.queue_name:
            lines.append("#SBATCH --partition={}".format(job_tmpl.queue_name))

        if job_tmpl.priority:
            #  Run the job with an adjusted scheduling priority  within  SLURM.
            #  With no adjustment value the scheduling priority is decreased by
            #  100. The adjustment range is from -10000 (highest  priority)  to
            #  10000  (lowest  priority).
            lines.append("#SBATCH --nice={}".format(job_tmpl.priority))

        if not job_tmpl.job_resource:
            raise ValueError(
                "Job resources (as the num_machines) are required "
                "for the SLURM scheduler plugin")

        lines.append("#SBATCH --nodes={}".format(
            job_tmpl.job_resource.num_machines))
        if job_tmpl.job_resource.num_mpiprocs_per_machine:
            lines.append("#SBATCH --ntasks-per-node={}".format(
                job_tmpl.job_resource.num_mpiprocs_per_machine))

        if job_tmpl.job_resource.num_cores_per_mpiproc:
            lines.append("#SBATCH --cpus-per-task={}".format(
                job_tmpl.job_resource.num_cores_per_mpiproc))

        if job_tmpl.max_wallclock_seconds is not None:
            try:
                tot_secs = int(job_tmpl.max_wallclock_seconds)
                if tot_secs <= 0:
                    raise ValueError
            except ValueError:
                raise ValueError(
                    "max_wallclock_seconds must be "
                    "a positive integer (in seconds)! It is instead '{}'"
                    "".format((job_tmpl.max_wallclock_seconds)))
            days = tot_secs // 86400
            tot_hours = tot_secs % 86400
            hours = tot_hours // 3600
            tot_minutes = tot_hours % 3600
            minutes = tot_minutes // 60
            seconds = tot_minutes % 60
            if days == 0:
                lines.append("#SBATCH --time={:02d}:{:02d}:{:02d}".format(
                    hours, minutes, seconds))
            else:
                lines.append("#SBATCH --time={:d}-{:02d}:{:02d}:{:02d}".format(
                    days, hours, minutes, seconds))

        # It is the memory per node, not per cpu!
        if job_tmpl.max_memory_kb:
            try:
                virtualMemoryKb = int(job_tmpl.max_memory_kb)
                if virtualMemoryKb <= 0:
                    raise ValueError
            except ValueError:
                raise ValueError(
                    "max_memory_kb must be "
                    "a positive integer (in kB)! It is instead '{}'"
                    "".format((job_tmpl.MaxMemoryKb)))
            # --mem: Specify the real memory required per node in MegaBytes.
            # --mem and  --mem-per-cpu  are  mutually exclusive.
            lines.append("#SBATCH --mem={}".format(virtualMemoryKb // 1024))

        if job_tmpl.custom_scheduler_commands:
            lines.append(job_tmpl.custom_scheduler_commands)

        # Job environment variables are to be set on one single line.
        # This is a tough job due to the escaping of commas, etc.
        # moreover, I am having issues making it work.
        # Therefore, I assume that this is bash and export variables by
        # and.

        if job_tmpl.job_environment:
            lines.append(empty_line)
            lines.append("# ENVIRONMENT VARIABLES BEGIN ###")
            if not isinstance(job_tmpl.job_environment, dict):
                raise ValueError("If you provide job_environment, it must be "
                                 "a dictionary")
            for k, v in job_tmpl.job_environment.iteritems():
                lines.append("export {}={}".format(k.strip(),
                                                   escape_for_bash(v)))
            lines.append("# ENVIRONMENT VARIABLES  END  ###")
            lines.append(empty_line)

        lines.append(empty_line)

        return "\n".join(lines)